• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2  * \copy
3  *     Copyright (c)  2009-2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  *
32  * \file  svc motion estimate.c
33  *
34  * \brief  Interfaces introduced in svc mb motion estimation
35  *
36  * \date  08/11/2009 Created
37  *
38  *************************************************************************************
39  */
40 
41 #include "cpu_core.h"
42 #include "ls_defines.h"
43 #include "svc_motion_estimate.h"
44 #include "wels_transpose_matrix.h"
45 
46 namespace WelsEnc {
47 
48 const int32_t QStepx16ByQp[52] = {  /* save QStep<<4 for int32_t */
49   10,  11,  13,  14,  16,  18,  /* 0~5   */
50   20,  22,  26,  28,  32,  36,  /* 6~11  */
51   40,  44,  52,  56,  64,  72,  /* 12~17 */
52   80,  88,  104, 112, 128, 144, /* 18~23 */
53   160, 176, 208, 224, 256, 288, /* 24~29 */
54   320, 352, 416, 448, 512, 576, /* 30~35 */
55   640, 704, 832, 896, 1024, 1152, /* 36~41 */
56   1280, 1408, 1664, 1792, 2048, 2304, /* 42~47 */
57   2560, 2816, 3328, 3584     /* 48~51 */
58 };
59 
UpdateMeResults(const SMVUnitXY ksBestMv,const uint32_t kiBestSadCost,uint8_t * pRef,SWelsME * pMe)60 static inline void UpdateMeResults (const SMVUnitXY ksBestMv, const uint32_t kiBestSadCost, uint8_t* pRef,
61                                     SWelsME* pMe) {
62   pMe->sMv = ksBestMv;
63   pMe->pRefMb = pRef;
64   pMe->uiSadCost = kiBestSadCost;
65 }
MeEndIntepelSearch(SWelsME * pMe)66 static inline void MeEndIntepelSearch (SWelsME* pMe) {
67   /* -> qpel mv */
68   pMe->sMv.iMvX *= (1 << 2);
69   pMe->sMv.iMvY *= (1 << 2);
70   pMe->uiSatdCost = pMe->uiSadCost;
71 }
72 
WelsInitMeFunc(SWelsFuncPtrList * pFuncList,uint32_t uiCpuFlag,bool bScreenContent)73 void WelsInitMeFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent) {
74   pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
75 
76   if (!bScreenContent) {
77     pFuncList->pfCheckDirectionalMv = CheckDirectionalMvFalse;
78     pFuncList->pfCalculateBlockFeatureOfFrame[0] =
79       pFuncList->pfCalculateBlockFeatureOfFrame[1] = NULL;
80     pFuncList->pfCalculateSingleBlockFeature[0] =
81       pFuncList->pfCalculateSingleBlockFeature[1] = NULL;
82 
83   } else {
84     pFuncList->pfCheckDirectionalMv = CheckDirectionalMv;
85 
86     //for cross serarch
87     pFuncList->pfVerticalFullSearch = LineFullSearch_c;
88     pFuncList->pfHorizontalFullSearch = LineFullSearch_c;
89 
90 #if defined (X86_ASM)
91     if (uiCpuFlag & WELS_CPU_SSE41) {
92       pFuncList->pfSampleSadHor8[0] = SampleSad8x8Hor8_sse41;
93       pFuncList->pfSampleSadHor8[1] = SampleSad16x16Hor8_sse41;
94       pFuncList->pfVerticalFullSearch = VerticalFullSearchUsingSSE41;
95       pFuncList->pfHorizontalFullSearch = HorizontalFullSearchUsingSSE41;
96     }
97 #endif
98 
99     //for feature search
100     pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_c;
101     pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_c;
102     pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_c;
103     pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_c;
104     //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
105     pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_c;
106     pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_c;
107 #if defined (X86_ASM)
108     if (uiCpuFlag & WELS_CPU_SSE2) {
109       //for feature search
110       pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_sse2;
111       pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_sse2;
112       pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_sse2;
113       pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_sse2;
114       //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
115       pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_sse2;
116       pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_sse2;
117     }
118     if (uiCpuFlag & WELS_CPU_SSE41) {
119       //for feature search
120       pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_sse4;
121       pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_sse4;
122     }
123 #endif
124 
125 #if defined (HAVE_NEON)
126     if (uiCpuFlag & WELS_CPU_NEON) {
127       //for feature search
128       pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_neon;
129       pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_neon;
130       pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_neon;
131       pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_neon;
132       //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
133       pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_neon;
134       pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_neon;
135     }
136 #endif
137 
138 #if defined (HAVE_NEON_AARCH64)
139     if (uiCpuFlag & WELS_CPU_NEON) {
140       //for feature search
141       pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_AArch64_neon;
142       pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_AArch64_neon;
143       pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_AArch64_neon;
144       pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_AArch64_neon;
145       //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
146       pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_AArch64_neon;
147       pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_AArch64_neon;
148     }
149 #endif
150 
151 #if defined (HAVE_LSX)
152     if (uiCpuFlag & WELS_CPU_LSX) {
153       pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_lsx;
154       //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
155       pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_lsx;
156     }
157 #endif
158   }
159 }
160 
161 /*!
162  * \brief  BL mb motion estimate search
163  *
164  * \param  enc      Wels encoder context
165  * \param  pMe          Wels me information
166  *
167  * \return  NONE
168  */
169 
WelsMotionEstimateSearch(SWelsFuncPtrList * pFuncList,SDqLayer * pCurDqLayer,SWelsME * pMe,SSlice * pSlice)170 void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, SDqLayer* pCurDqLayer, SWelsME* pMe, SSlice* pSlice) {
171   const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0];
172   const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0];
173 
174   //  Step 1: Initial point prediction
175   if (!WelsMotionEstimateInitialPoint (pFuncList, pMe, pSlice, kiStrideEnc, kiStrideRef)) {
176     pFuncList->pfSearchMethod[pMe->uiBlockSize] (pFuncList, pMe, pSlice, kiStrideEnc, kiStrideRef);
177     MeEndIntepelSearch (pMe);
178   }
179 
180   pFuncList->pfCalculateSatd (pFuncList->sSampleDealingFuncs.pfSampleSatd[pMe->uiBlockSize], pMe, kiStrideEnc,
181                               kiStrideRef);
182 }
183 
WelsMotionEstimateSearchStatic(SWelsFuncPtrList * pFuncList,SDqLayer * pCurDqLayer,SWelsME * pMe,SSlice * pLpslice)184 void WelsMotionEstimateSearchStatic (SWelsFuncPtrList* pFuncList, SDqLayer* pCurDqLayer, SWelsME* pMe,
185                                      SSlice* pLpslice) {
186   const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0];
187   const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0];
188 
189   pMe->sMv.iMvX = pMe->sMv.iMvY = 0;
190   pMe->uiSadCost =
191     pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize] (pMe->pEncMb, kiStrideEnc, pMe->pRefMb, kiStrideRef) ;
192   pMe->uiSadCost += COST_MVD (pMe->pMvdCost, - pMe->sMvp.iMvX, - pMe->sMvp.iMvY);
193   MeEndIntepelSearch (pMe);
194   pFuncList->pfCalculateSatd (pFuncList->sSampleDealingFuncs.pfSampleSatd[pMe->uiBlockSize], pMe, kiStrideEnc,
195                               kiStrideRef);
196 }
197 
WelsMotionEstimateSearchScrolled(SWelsFuncPtrList * pFuncList,SDqLayer * pCurDqLayer,SWelsME * pMe,SSlice * pSlice)198 void WelsMotionEstimateSearchScrolled (SWelsFuncPtrList* pFuncList, SDqLayer* pCurDqLayer, SWelsME* pMe,
199                                        SSlice* pSlice) {
200   const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0];
201   const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0];
202 
203   pMe->sMv = pMe->sDirectionalMv;
204   pMe->pRefMb = pMe->pColoRefMb + pMe->sMv.iMvY * kiStrideRef + pMe->sMv.iMvX;
205   pMe->uiSadCost =
206     pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize] (pMe->pEncMb, kiStrideEnc, pMe->pRefMb, kiStrideRef)
207     + COST_MVD (pMe->pMvdCost, (pMe->sMv.iMvX * (1 << 2)) - pMe->sMvp.iMvX, (pMe->sMv.iMvY * (1 << 2)) - pMe->sMvp.iMvY);
208   MeEndIntepelSearch (pMe);
209   pFuncList->pfCalculateSatd (pFuncList->sSampleDealingFuncs.pfSampleSatd[pMe->uiBlockSize], pMe, kiStrideEnc,
210                               kiStrideRef);
211 }
212 /*!
213  * \brief  EL mb motion estimate initial point testing
214  *
215  * \param  pix_pFuncList  SSampleDealingFunc
216  * \param  pMe          Wels me information
217  * \param  mv_range  search range in motion estimate
218  * \param  point      the best match point in motion estimation
219  *
220  * \return  NONE
221  */
WelsMotionEstimateInitialPoint(SWelsFuncPtrList * pFuncList,SWelsME * pMe,SSlice * pSlice,int32_t iStrideEnc,int32_t iStrideRef)222 bool WelsMotionEstimateInitialPoint (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, int32_t iStrideEnc,
223                                      int32_t iStrideRef) {
224   PSampleSadSatdCostFunc pSad    = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
225   const uint16_t* kpMvdCost  = pMe->pMvdCost;
226   uint8_t* const kpEncMb    = pMe->pEncMb;
227   int16_t iMvc0, iMvc1;
228   int32_t iSadCost;
229   int32_t iBestSadCost;
230   uint8_t* pRefMb;
231   uint8_t* pFref2;
232   uint32_t i;
233   const uint32_t kuiMvcNum    = pSlice->uiMvcNum;
234   const SMVUnitXY* kpMvcList  = &pSlice->sMvc[0];
235   const SMVUnitXY ksMvStartMin    = pSlice->sMvStartMin;
236   const SMVUnitXY ksMvStartMax    = pSlice->sMvStartMax;
237   const SMVUnitXY ksMvp    = pMe->sMvp;
238   SMVUnitXY sMv;
239 
240   //  Step 1: Initial point prediction
241   // init with sMvp
242   sMv.iMvX  = WELS_CLIP3 ((2 + ksMvp.iMvX) >> 2, ksMvStartMin.iMvX, ksMvStartMax.iMvX);
243   sMv.iMvY  = WELS_CLIP3 ((2 + ksMvp.iMvY) >> 2, ksMvStartMin.iMvY, ksMvStartMax.iMvY);
244 
245   pRefMb = &pMe->pRefMb[sMv.iMvY * iStrideRef + sMv.iMvX];
246 
247   iBestSadCost = pSad (kpEncMb, iStrideEnc, pRefMb, iStrideRef);
248   iBestSadCost += COST_MVD (kpMvdCost, ((sMv.iMvX) * (1 << 2)) - ksMvp.iMvX, ((sMv.iMvY) * (1 << 2)) - ksMvp.iMvY);
249 
250   for (i = 0; i < kuiMvcNum; i++) {
251     //clipping here is essential since some pOut-of-range MVC may happen here (i.e., refer to baseMV)
252     iMvc0 = WELS_CLIP3 ((2 + kpMvcList[i].iMvX) >> 2, ksMvStartMin.iMvX, ksMvStartMax.iMvX);
253     iMvc1 = WELS_CLIP3 ((2 + kpMvcList[i].iMvY) >> 2, ksMvStartMin.iMvY, ksMvStartMax.iMvY);
254 
255     if (((iMvc0 - sMv.iMvX) || (iMvc1 - sMv.iMvY))) {
256       pFref2 = &pMe->pRefMb[iMvc1 * iStrideRef + iMvc0];
257 
258       iSadCost = pSad (kpEncMb, iStrideEnc, pFref2, iStrideRef) +
259                  COST_MVD (kpMvdCost, (iMvc0 * (1 << 2)) - ksMvp.iMvX, (iMvc1 * (1 << 2)) - ksMvp.iMvY);
260 
261       if (iSadCost < iBestSadCost) {
262         sMv.iMvX = iMvc0;
263         sMv.iMvY = iMvc1;
264         pRefMb = pFref2;
265         iBestSadCost = iSadCost;
266       }
267     }
268   }
269 
270   if (pFuncList->pfCheckDirectionalMv
271       (pSad, pMe, ksMvStartMin, ksMvStartMax, iStrideEnc, iStrideRef, iSadCost)) {
272     sMv = pMe->sDirectionalMv;
273     pRefMb =  &pMe->pColoRefMb[sMv.iMvY * iStrideRef + sMv.iMvX];
274     iBestSadCost = iSadCost;
275   }
276 
277   UpdateMeResults (sMv, iBestSadCost, pRefMb, pMe);
278   if (iBestSadCost < static_cast<int32_t> (pMe->uSadPredISatd.uiSadPred)) {
279     //Initial point early Stop
280     MeEndIntepelSearch (pMe);
281     return true;
282   }
283   return false;
284 }
285 
CalculateSatdCost(PSampleSadSatdCostFunc pSatd,SWelsME * pMe,const int32_t kiEncStride,const int32_t kiRefStride)286 void CalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe,
287                         const int32_t kiEncStride, const int32_t kiRefStride) {
288   pMe->uSadPredISatd.uiSatd = pSatd (pMe->pEncMb, kiEncStride, pMe->pRefMb, kiRefStride);
289   pMe->uiSatdCost = pMe->uSadPredISatd.uiSatd + COST_MVD (pMe->pMvdCost, pMe->sMv.iMvX - pMe->sMvp.iMvX,
290                     pMe->sMv.iMvY - pMe->sMvp.iMvY);
291 }
NotCalculateSatdCost(PSampleSadSatdCostFunc pSatd,SWelsME * pMe,const int32_t kiEncStride,const int32_t kiRefStride)292 void NotCalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe,
293                            const int32_t kiEncStride, const int32_t kiRefStride) {
294 }
295 
296 
297 /////////////////////////
298 // Diamond Search Basics
299 /////////////////////////
WelsMeSadCostSelect(int32_t * iSadCost,const uint16_t * kpMvdCost,int32_t * pBestCost,const int32_t kiDx,const int32_t kiDy,int32_t * pIx,int32_t * pIy)300 bool WelsMeSadCostSelect (int32_t* iSadCost, const uint16_t* kpMvdCost, int32_t* pBestCost, const int32_t kiDx,
301                           const int32_t kiDy, int32_t* pIx, int32_t* pIy) {
302   int32_t iTempSadCost[4];
303   int32_t iInputSadCost = *pBestCost;
304   iTempSadCost[0] = iSadCost[0] + COST_MVD (kpMvdCost, kiDx, kiDy - 4);
305   iTempSadCost[1] = iSadCost[1] + COST_MVD (kpMvdCost, kiDx, kiDy + 4);
306   iTempSadCost[2] = iSadCost[2] + COST_MVD (kpMvdCost, kiDx - 4, kiDy);
307   iTempSadCost[3] = iSadCost[3] + COST_MVD (kpMvdCost, kiDx + 4, kiDy);
308 
309   if (iTempSadCost[0] < *pBestCost) {
310     *pBestCost = iTempSadCost[0];
311     *pIx = 0;
312     *pIy = 1;
313   }
314 
315   if (iTempSadCost[1] < *pBestCost) {
316     *pBestCost = iTempSadCost[1];
317     *pIx = 0;
318     *pIy = -1;
319   }
320 
321   if (iTempSadCost[2] < *pBestCost) {
322     *pBestCost = iTempSadCost[2];
323     *pIx = 1;
324     *pIy = 0;
325   }
326 
327   if (iTempSadCost[3] < *pBestCost) {
328     *pBestCost = iTempSadCost[3];
329     *pIx = -1;
330     *pIy = 0;
331   }
332   return (*pBestCost == iInputSadCost);
333 }
334 
WelsDiamondSearch(SWelsFuncPtrList * pFuncList,SWelsME * pMe,SSlice * pSlice,const int32_t kiStrideEnc,const int32_t kiStrideRef)335 void WelsDiamondSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
336                         const int32_t kiStrideEnc,  const int32_t kiStrideRef) {
337   PSample4SadCostFunc      pSad          =  pFuncList->sSampleDealingFuncs.pfSample4Sad[pMe->uiBlockSize];
338 
339   uint8_t* pFref = pMe->pRefMb;
340   uint8_t* const kpEncMb = pMe->pEncMb;
341   const uint16_t* kpMvdCost = pMe->pMvdCost;
342 
343   const SMVUnitXY ksMvStartMin    = pSlice->sMvStartMin;
344   const SMVUnitXY ksMvStartMax    = pSlice->sMvStartMax;
345 
346   int32_t iMvDx = ((pMe->sMv.iMvX) * (1 << 2)) - pMe->sMvp.iMvX;
347   int32_t iMvDy = ((pMe->sMv.iMvY) * (1 << 2)) - pMe->sMvp.iMvY;
348 
349   uint8_t* pRefMb = pFref;
350   int32_t iBestCost = (pMe->uiSadCost);
351 
352   int32_t iTimeThreshold = ITERATIVE_TIMES;
353   ENFORCE_STACK_ALIGN_1D (int32_t, iSadCosts, 4, 16)
354 
355   while (iTimeThreshold--) {
356     pMe->sMv.iMvX = (iMvDx + pMe->sMvp.iMvX) >> 2;
357     pMe->sMv.iMvY = (iMvDy + pMe->sMvp.iMvY) >> 2;
358     if (!CheckMvInRange (pMe->sMv, ksMvStartMin, ksMvStartMax))
359       continue;
360     pSad (kpEncMb, kiStrideEnc, pRefMb, kiStrideRef, &iSadCosts[0]);
361 
362     int32_t iX, iY;
363 
364     const bool kbIsBestCostWorse = WelsMeSadCostSelect (iSadCosts, kpMvdCost, &iBestCost, iMvDx, iMvDy, &iX, &iY);
365     if (kbIsBestCostWorse)
366       break;
367 
368     iMvDx -= (iX * (1 << 2)) ;
369     iMvDy -= (iY * (1 << 2)) ;
370 
371     pRefMb -= (iX + iY * kiStrideRef);
372 
373   }
374 
375   /* integer-pel mv */
376   pMe->sMv.iMvX = (iMvDx + pMe->sMvp.iMvX) >> 2;
377   pMe->sMv.iMvY = (iMvDy + pMe->sMvp.iMvY) >> 2;
378   pMe->uiSatdCost = pMe->uiSadCost = (iBestCost);
379   pMe->pRefMb = pRefMb;
380 }
381 
382 /////////////////////////
383 // DirectionalMv Basics
384 /////////////////////////
CheckDirectionalMv(PSampleSadSatdCostFunc pSad,SWelsME * pMe,const SMVUnitXY ksMinMv,const SMVUnitXY ksMaxMv,const int32_t kiEncStride,const int32_t kiRefStride,int32_t & iBestSadCost)385 bool CheckDirectionalMv (PSampleSadSatdCostFunc pSad, SWelsME* pMe,
386                          const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
387                          int32_t& iBestSadCost) {
388   const int16_t kiMvX = pMe->sDirectionalMv.iMvX;
389   const int16_t kiMvY = pMe->sDirectionalMv.iMvY;
390 
391   //Check MV from scrolling detection
392   if ((BLOCK_16x16 != pMe->uiBlockSize) //scrolled_MV with P16x16 is checked SKIP checking function
393       && (kiMvX | kiMvY)   //(0,0) checked in ordinary initial point checking
394       && CheckMvInRange (pMe->sDirectionalMv, ksMinMv, ksMaxMv)) {
395     uint8_t* pRef = &pMe->pColoRefMb[kiMvY * kiRefStride + kiMvX];
396     uint32_t uiCurrentSadCost = pSad (pMe->pEncMb, kiEncStride,  pRef, kiRefStride) +
397                                 COST_MVD (pMe->pMvdCost, (kiMvX * (1 << 2)) - pMe->sMvp.iMvX, (kiMvY * (1 << 2)) - pMe->sMvp.iMvY);
398     if (uiCurrentSadCost < pMe->uiSadCost) {
399       iBestSadCost = uiCurrentSadCost;
400       return true;
401     }
402   }
403   return false;
404 }
405 
CheckDirectionalMvFalse(PSampleSadSatdCostFunc pSad,SWelsME * vpMe,const SMVUnitXY ksMinMv,const SMVUnitXY ksMaxMv,const int32_t kiEncStride,const int32_t kiRefStride,int32_t & iBestSadCost)406 bool CheckDirectionalMvFalse (PSampleSadSatdCostFunc pSad, SWelsME* vpMe,
407                               const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
408                               int32_t& iBestSadCost) {
409   return false;
410 }
411 
412 /////////////////////////
413 // Cross Search Basics
414 /////////////////////////
415 #if defined (X86_ASM)
CalcMvdCostx8_c(uint16_t * pMvdCost,const int32_t kiStartMv,uint16_t * pMvdTable,const uint16_t kiFixedCost)416 void CalcMvdCostx8_c (uint16_t* pMvdCost, const int32_t kiStartMv, uint16_t* pMvdTable, const uint16_t kiFixedCost) {
417   uint16_t* pBaseCost  = pMvdCost;
418   const int32_t kiOffset = (kiStartMv * (1 << 2));
419   uint16_t* pMvd  = pMvdTable + kiOffset;
420   for (int32_t i = 0; i < 8; ++ i) {
421     pBaseCost[i] = ((*pMvd) + kiFixedCost);
422     pMvd += 4;
423   }
424 }
VerticalFullSearchUsingSSE41(SWelsFuncPtrList * pFuncList,SWelsME * pMe,uint16_t * pMvdTable,const int32_t kiEncStride,const int32_t kiRefStride,const int16_t kiMinMv,const int16_t kiMaxMv,const bool bVerticalSearch)425 void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
426                                    uint16_t* pMvdTable,
427                                    const int32_t kiEncStride, const int32_t kiRefStride,
428                                    const int16_t kiMinMv, const int16_t kiMaxMv,
429                                    const bool bVerticalSearch) {
430   uint8_t*  kpEncMb = pMe->pEncMb;
431   const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixY;
432   uint8_t* pRef         = &pMe->pColoRefMb[kiMinMv * kiRefStride];
433 
434   const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY;
435 
436   int32_t iMinPos = kiCurMeBlockPixY + kiMinMv;
437   int32_t iMaxPos = kiCurMeBlockPixY + kiMaxMv;
438   int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX);
439   uint16_t* pMvdCost  = & (pMvdTable[ (kiMinMv * (1 << 2)) - pMe->sMvp.iMvY]);
440   int16_t iStartMv = 0;
441 
442 
443   const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
444   const int32_t kiEdgeBlocks = kIsBlock16x16 ? 16 : 8;
445   PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
446   PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
447   PTransposeMatrixBlockFunc TransposeMatrixBlock = kIsBlock16x16 ? TransposeMatrixBlock16x16_sse2 :
448       TransposeMatrixBlock8x8_mmx;
449   PTransposeMatrixBlocksFunc TransposeMatrixBlocks = kIsBlock16x16 ? TransposeMatrixBlocksx16_sse2 :
450       TransposeMatrixBlocksx8_mmx;
451 
452   const int32_t kiDiff   = iMaxPos - iMinPos;
453   const int32_t kiRowNum  = WELS_ALIGN ((kiDiff - kiEdgeBlocks + 1), kiEdgeBlocks);
454   const int32_t kiBlocksNum  = kIsBlock16x16 ? (kiRowNum >> 4) : (kiRowNum >> 3);
455   int32_t iCountLoop8  = (kiRowNum - kiEdgeBlocks) >> 3;
456   const int32_t kiRemainingVectors  = kiDiff - (iCountLoop8 << 3);
457   const int32_t kiMatrixStride  = MAX_VERTICAL_MV_RANGE;
458   ENFORCE_STACK_ALIGN_2D (uint8_t, uiMatrixRef, 16, kiMatrixStride, 16);  // transpose matrix result for ref
459   ENFORCE_STACK_ALIGN_2D (uint8_t, uiMatrixEnc, 16, 16, 16);     // transpose matrix result for enc
460   assert (kiRowNum <= kiMatrixStride); // make sure effective memory
461 
462   TransposeMatrixBlock (&uiMatrixEnc[0][0], 16, kpEncMb, kiEncStride);
463   TransposeMatrixBlocks (&uiMatrixRef[0][0], kiMatrixStride, pRef, kiRefStride, kiBlocksNum);
464   ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
465   int32_t iTargetPos   = iMinPos;
466   int16_t iBestPos    = pMe->sMv.iMvX;
467   uint32_t uiBestCost   = pMe->uiSadCost;
468   uint32_t uiCostMin;
469   int32_t iIndexMinPos;
470   kpEncMb = &uiMatrixEnc[0][0];
471   pRef = &uiMatrixRef[0][0];
472 
473   while (iCountLoop8 > 0) {
474     CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd);
475     uiCostMin = pSampleSadHor8 (kpEncMb, 16, pRef, kiMatrixStride, uiBaseCost, &iIndexMinPos);
476     if (uiCostMin < uiBestCost) {
477       uiBestCost = uiCostMin;
478       iBestPos  = iTargetPos + iIndexMinPos;
479     }
480     iTargetPos += 8;
481     pRef += 8;
482     iStartMv += 8;
483     -- iCountLoop8;
484   }
485   if (kiRemainingVectors > 0) {
486     kpEncMb = pMe->pEncMb;
487     pRef = &pMe->pColoRefMb[ (iTargetPos - kiCurMeBlockPix) * kiRefStride];
488     while (iTargetPos < iMaxPos) {
489       const uint16_t uiMvdCost = pMvdCost[iStartMv * (1 << 2)];
490       uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost);
491       if (uiSadCost < uiBestCost) {
492         uiBestCost = uiSadCost;
493         iBestPos = iTargetPos;
494       }
495       iStartMv++;
496       pRef += kiRefStride;
497       ++iTargetPos;
498     }
499   }
500   if (uiBestCost < pMe->uiSadCost) {
501     SMVUnitXY sBestMv;
502     sBestMv.iMvX = 0;
503     sBestMv.iMvY = iBestPos - kiCurMeBlockPix;
504     UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiRefStride], pMe);
505   }
506 }
507 
HorizontalFullSearchUsingSSE41(SWelsFuncPtrList * pFuncList,SWelsME * pMe,uint16_t * pMvdTable,const int32_t kiEncStride,const int32_t kiRefStride,const int16_t kiMinMv,const int16_t kiMaxMv,const bool bVerticalSearch)508 void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
509                                      uint16_t* pMvdTable,
510                                      const int32_t kiEncStride, const int32_t kiRefStride,
511                                      const int16_t kiMinMv, const int16_t kiMaxMv,
512                                      const bool bVerticalSearch) {
513   uint8_t* kpEncMb = pMe->pEncMb;
514 
515   const int32_t iCurMeBlockPixX = pMe->iCurMeBlockPixX;
516   int32_t iMinPos = iCurMeBlockPixX + kiMinMv;
517   int32_t iMaxPos = iCurMeBlockPixX + kiMaxMv;
518   int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY);
519   uint16_t* pMvdCost  = & (pMvdTable[ (kiMinMv * (1 << 2)) - pMe->sMvp.iMvX]);
520   int16_t iStartMv = 0;
521   uint8_t* pRef         = &pMe->pColoRefMb[kiMinMv];
522   const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
523   PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
524   PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
525   ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
526   const int32_t kiNumVector = iMaxPos - iMinPos;
527   int32_t iCountLoop8 = kiNumVector >> 3;
528   const int32_t kiRemainingLoop8 = kiNumVector & 7;
529   int32_t iTargetPos   = iMinPos;
530   int16_t iBestPos    = pMe->sMv.iMvX;
531   uint32_t uiBestCost   = pMe->uiSadCost;
532   uint32_t uiCostMin;
533   int32_t iIndexMinPos;
534 
535   while (iCountLoop8 > 0) {
536     CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd);
537     uiCostMin = pSampleSadHor8 (kpEncMb, kiEncStride, pRef, kiRefStride, uiBaseCost, &iIndexMinPos);
538     if (uiCostMin < uiBestCost) {
539       uiBestCost = uiCostMin;
540       iBestPos  = iTargetPos + iIndexMinPos;
541     }
542     iTargetPos += 8;
543     pRef += 8;
544     iStartMv += 8;
545     -- iCountLoop8;
546   }
547   if (kiRemainingLoop8 > 0) {
548     while (iTargetPos < iMaxPos) {
549       const uint16_t uiMvdCost = pMvdCost[iStartMv * (1 << 2)];
550       uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost);
551       if (uiSadCost < uiBestCost) {
552         uiBestCost = uiSadCost;
553         iBestPos = iTargetPos;
554       }
555       iStartMv++;
556       ++pRef;
557       ++iTargetPos;
558     }
559   }
560   if (uiBestCost < pMe->uiSadCost) {
561     SMVUnitXY sBestMv;
562     sBestMv.iMvX = iBestPos - iCurMeBlockPixX;
563     sBestMv.iMvY = 0;
564     UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvX], pMe);
565   }
566 }
567 #endif
LineFullSearch_c(SWelsFuncPtrList * pFuncList,SWelsME * pMe,uint16_t * pMvdTable,const int32_t kiEncStride,const int32_t kiRefStride,const int16_t iMinMv,const int16_t iMaxMv,const bool bVerticalSearch)568 void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
569                        uint16_t* pMvdTable,
570                        const int32_t kiEncStride, const int32_t kiRefStride,
571                        const int16_t iMinMv, const int16_t iMaxMv,
572                        const bool bVerticalSearch) {
573   PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
574   const int32_t kiCurMeBlockPixX = pMe->iCurMeBlockPixX;
575   const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY;
576   int32_t iMinPos, iMaxPos;
577   int32_t iFixedMvd;
578   int32_t iCurMeBlockPix;
579   int32_t iStride;
580   uint16_t* pMvdCost;
581 
582   if (bVerticalSearch) {
583     iMinPos = kiCurMeBlockPixY + iMinMv;
584     iMaxPos = kiCurMeBlockPixY + iMaxMv;
585     iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX);
586     iCurMeBlockPix = pMe->iCurMeBlockPixY;
587     iStride = kiRefStride;
588     pMvdCost  = & (pMvdTable[ (iMinMv * (1 << 2)) - pMe->sMvp.iMvY]);
589   } else {
590     iMinPos = kiCurMeBlockPixX + iMinMv;
591     iMaxPos = kiCurMeBlockPixX + iMaxMv;
592     iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY);
593     iCurMeBlockPix = pMe->iCurMeBlockPixX;
594     iStride = 1;
595     pMvdCost  = & (pMvdTable[ (iMinMv * (1 << 2)) - pMe->sMvp.iMvX]);
596   }
597   uint8_t* pRef            = &pMe->pColoRefMb[ iMinMv * iStride];
598   uint32_t uiBestCost    = 0xFFFFFFFF;
599   int32_t iBestPos       = 0;
600 
601   for (int32_t iTargetPos = iMinPos; iTargetPos < iMaxPos; ++ iTargetPos) {
602     uint8_t* const kpEncMb  = pMe->pEncMb;
603     uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + *pMvdCost);
604     if (uiSadCost < uiBestCost) {
605       uiBestCost  = uiSadCost;
606       iBestPos  = iTargetPos;
607     }
608     pRef += iStride;
609     pMvdCost += 4;
610   }
611 
612   if (uiBestCost < pMe->uiSadCost) {
613     SMVUnitXY sBestMv;
614     sBestMv.iMvX = bVerticalSearch ? 0 : (iBestPos - iCurMeBlockPix);
615     sBestMv.iMvY = bVerticalSearch ? (iBestPos - iCurMeBlockPix) : 0;
616     UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiRefStride + sBestMv.iMvX], pMe);
617   }
618 }
619 
WelsMotionCrossSearch(SWelsFuncPtrList * pFuncList,SWelsME * pMe,SSlice * pSlice,const int32_t kiEncStride,const int32_t kiRefStride)620 void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
621                             const int32_t kiEncStride,  const int32_t kiRefStride) {
622   PLineFullSearchFunc pfVerticalFullSearchFunc = pFuncList->pfVerticalFullSearch;
623   PLineFullSearchFunc pfHorizontalFullSearchFunc = pFuncList->pfHorizontalFullSearch;
624 
625   //vertical search
626   pfVerticalFullSearchFunc (pFuncList, pMe,
627                             pMe->pMvdCost,
628                             kiEncStride, kiRefStride,
629                             pSlice->sMvStartMin.iMvY,
630                             pSlice->sMvStartMax.iMvY, true);
631 
632   //horizontal search
633   if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
634     pfHorizontalFullSearchFunc (pFuncList, pMe,
635                                 pMe->pMvdCost,
636                                 kiEncStride, kiRefStride,
637                                 pSlice->sMvStartMin.iMvX,
638                                 pSlice->sMvStartMax.iMvX,
639                                 false);
640   }
641 }
642 
643 
644 /////////////////////////
645 // Feature Search Basics
646 /////////////////////////
647 //memory related
RequestFeatureSearchPreparation(CMemoryAlign * pMa,const int32_t kiFrameWidth,const int32_t kiFrameHeight,const int32_t iNeedFeatureStorage,SFeatureSearchPreparation * pFeatureSearchPreparation)648 int32_t RequestFeatureSearchPreparation (CMemoryAlign* pMa, const int32_t kiFrameWidth,  const int32_t kiFrameHeight,
649     const int32_t iNeedFeatureStorage,
650     SFeatureSearchPreparation* pFeatureSearchPreparation) {
651   const int32_t kiFeatureStrategyIndex = iNeedFeatureStorage >> 16;
652   const bool bFme8x8 = ((iNeedFeatureStorage & 0x0000FF & ME_FME) == ME_FME);
653   const int32_t kiMarginSize = bFme8x8 ? 8 : 16;
654   const int32_t kiFrameSize = (kiFrameWidth - kiMarginSize) * (kiFrameHeight - kiMarginSize);
655   int32_t iListOfFeatureOfBlock;
656 
657   if (0 == kiFeatureStrategyIndex) {
658     iListOfFeatureOfBlock = sizeof (uint16_t) * kiFrameSize;
659   } else {
660     iListOfFeatureOfBlock = sizeof (uint16_t) * kiFrameSize +
661                             (kiFrameWidth - kiMarginSize) * sizeof (uint32_t) + kiFrameWidth * 8 * sizeof (uint8_t);
662   }
663   pFeatureSearchPreparation->pFeatureOfBlock =
664     (uint16_t*)pMa->WelsMallocz (iListOfFeatureOfBlock, "pFeatureOfBlock");
665   WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == (pFeatureSearchPreparation->pFeatureOfBlock))
666 
667   pFeatureSearchPreparation->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
668   pFeatureSearchPreparation->bFMESwitchFlag = true;
669   pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM;
670   pFeatureSearchPreparation->iHighFreMbCount = 0;
671 
672   return ENC_RETURN_SUCCESS;
673 }
ReleaseFeatureSearchPreparation(CMemoryAlign * pMa,uint16_t * & pFeatureOfBlock)674 int32_t ReleaseFeatureSearchPreparation (CMemoryAlign* pMa, uint16_t*& pFeatureOfBlock) {
675   if (pMa && pFeatureOfBlock) {
676     pMa->WelsFree (pFeatureOfBlock, "pFeatureOfBlock");
677     pFeatureOfBlock = NULL;
678     return ENC_RETURN_SUCCESS;
679   }
680   return ENC_RETURN_UNEXPECTED;
681 }
682 
RequestScreenBlockFeatureStorage(CMemoryAlign * pMa,const int32_t kiFrameWidth,const int32_t kiFrameHeight,const int32_t iNeedFeatureStorage,SScreenBlockFeatureStorage * pScreenBlockFeatureStorage)683 int32_t RequestScreenBlockFeatureStorage (CMemoryAlign* pMa, const int32_t kiFrameWidth,  const int32_t kiFrameHeight,
684     const int32_t iNeedFeatureStorage,
685     SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
686 
687   const int32_t kiFeatureStrategyIndex = iNeedFeatureStorage >> 16;
688   const int32_t kiMe8x8FME = iNeedFeatureStorage & 0x0000FF & ME_FME;
689   const int32_t kiMe16x16FME = ((iNeedFeatureStorage & 0x00FF00) >> 8) & ME_FME;
690   if ((kiMe8x8FME == ME_FME) && (kiMe16x16FME == ME_FME)) {
691     return ENC_RETURN_UNSUPPORTED_PARA;
692     //the following memory allocation cannot support when FME at both size
693   }
694 
695   const bool bIsBlock8x8 = (kiMe8x8FME == ME_FME);
696   const int32_t kiMarginSize = bIsBlock8x8 ? 8 : 16;
697   const int32_t kiFrameSize = (kiFrameWidth - kiMarginSize) * (kiFrameHeight - kiMarginSize);
698   const int32_t kiListSize  = (0 == kiFeatureStrategyIndex) ? (bIsBlock8x8 ? LIST_SIZE_SUM_8x8 : LIST_SIZE_SUM_16x16) :
699                               256;
700 
701   pScreenBlockFeatureStorage->pTimesOfFeatureValue = (uint32_t*)pMa->WelsMallocz (kiListSize * sizeof (uint32_t),
702       "pScreenBlockFeatureStorage->pTimesOfFeatureValue");
703   WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pTimesOfFeatureValue)
704 
705   pScreenBlockFeatureStorage->pLocationOfFeature = (uint16_t**)pMa->WelsMallocz (kiListSize * sizeof (uint16_t*),
706       "pScreenBlockFeatureStorage->pLocationOfFeature");
707   WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pLocationOfFeature)
708 
709   pScreenBlockFeatureStorage->pLocationPointer = (uint16_t*)pMa->WelsMallocz (2 * kiFrameSize * sizeof (uint16_t),
710       "pScreenBlockFeatureStorage->pLocationPointer");
711   WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pLocationPointer)
712   //  uint16_t* pFeatureValuePointerList[WELS_MAX (LIST_SIZE_SUM_16x16, LIST_SIZE_MSE_16x16)] = {0};
713   pScreenBlockFeatureStorage->pFeatureValuePointerList = (uint16_t**)pMa->WelsMallocz (WELS_MAX (LIST_SIZE_SUM_16x16,
714       LIST_SIZE_MSE_16x16) * sizeof (uint16_t*),
715       "pScreenBlockFeatureStorage->pFeatureValuePointerList");
716   WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pFeatureValuePointerList)
717 
718   pScreenBlockFeatureStorage->pFeatureOfBlockPointer = NULL;
719   pScreenBlockFeatureStorage->iIs16x16 = !bIsBlock8x8;
720   pScreenBlockFeatureStorage->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
721   pScreenBlockFeatureStorage->iActualListSize = kiListSize;
722   WelsSetMemMultiplebytes_c (pScreenBlockFeatureStorage->uiSadCostThreshold, UINT_MAX, BLOCK_SIZE_ALL, sizeof (uint32_t));
723   pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = false;
724 
725   return ENC_RETURN_SUCCESS;
726 }
ReleaseScreenBlockFeatureStorage(CMemoryAlign * pMa,SScreenBlockFeatureStorage * pScreenBlockFeatureStorage)727 int32_t ReleaseScreenBlockFeatureStorage (CMemoryAlign* pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
728   if (pMa && pScreenBlockFeatureStorage) {
729     if (pScreenBlockFeatureStorage->pTimesOfFeatureValue) {
730       pMa->WelsFree (pScreenBlockFeatureStorage->pTimesOfFeatureValue, "pScreenBlockFeatureStorage->pTimesOfFeatureValue");
731       pScreenBlockFeatureStorage->pTimesOfFeatureValue = NULL;
732     }
733 
734     if (pScreenBlockFeatureStorage->pLocationOfFeature) {
735       pMa->WelsFree (pScreenBlockFeatureStorage->pLocationOfFeature, "pScreenBlockFeatureStorage->pLocationOfFeature");
736       pScreenBlockFeatureStorage->pLocationOfFeature = NULL;
737     }
738 
739     if (pScreenBlockFeatureStorage->pLocationPointer) {
740       pMa->WelsFree (pScreenBlockFeatureStorage->pLocationPointer, "pScreenBlockFeatureStorage->pLocationPointer");
741       pScreenBlockFeatureStorage->pLocationPointer = NULL;
742     }
743 
744     if (pScreenBlockFeatureStorage->pFeatureValuePointerList) {
745       pMa->WelsFree (pScreenBlockFeatureStorage->pFeatureValuePointerList,
746                      "pScreenBlockFeatureStorage->pFeatureValuePointerList");
747       pScreenBlockFeatureStorage->pFeatureValuePointerList = NULL;
748     }
749 
750     return ENC_RETURN_SUCCESS;
751   }
752   return ENC_RETURN_UNEXPECTED;
753 }
754 
755 //preprocess related
SumOf8x8SingleBlock_c(uint8_t * pRef,const int32_t kiRefStride)756 int32_t SumOf8x8SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride) {
757   int32_t iSum = 0, i;
758   for (i = 0; i < 8; i++) {
759     iSum +=  pRef[0]    + pRef[1]  + pRef[2]  + pRef[3];
760     iSum +=  pRef[4]    + pRef[5]  + pRef[6]  + pRef[7];
761     pRef += kiRefStride;
762   }
763   return iSum;
764 }
SumOf16x16SingleBlock_c(uint8_t * pRef,const int32_t kiRefStride)765 int32_t SumOf16x16SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride) {
766   int32_t iSum = 0, i;
767   for (i = 0; i < 16; i++) {
768     iSum +=  pRef[0]    + pRef[1]  + pRef[2]  + pRef[3];
769     iSum +=  pRef[4]    + pRef[5]  + pRef[6]  + pRef[7];
770     iSum    +=  pRef[8]    + pRef[9]  + pRef[10]  + pRef[11];
771     iSum    +=  pRef[12]  + pRef[13]  + pRef[14]  + pRef[15];
772     pRef += kiRefStride;
773   }
774   return iSum;
775 }
776 
SumOf8x8BlockOfFrame_c(uint8_t * pRefPicture,const int32_t kiWidth,const int32_t kiHeight,const int32_t kiRefStride,uint16_t * pFeatureOfBlock,uint32_t pTimesOfFeatureValue[])777 void SumOf8x8BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
778                              const int32_t kiRefStride,
779                              uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) {
780   int32_t x, y;
781   uint8_t* pRef;
782   uint16_t* pBuffer;
783   int32_t iSum;
784   for (y = 0; y < kiHeight; y++) {
785     pRef = pRefPicture  + kiRefStride * y;
786     pBuffer  = pFeatureOfBlock + kiWidth * y;
787     for (x = 0; x < kiWidth; x++) {
788       iSum = SumOf8x8SingleBlock_c (pRef + x, kiRefStride);
789 
790       pBuffer[x] = iSum;
791       pTimesOfFeatureValue[iSum]++;
792     }
793   }
794 }
795 
SumOf16x16BlockOfFrame_c(uint8_t * pRefPicture,const int32_t kiWidth,const int32_t kiHeight,const int32_t kiRefStride,uint16_t * pFeatureOfBlock,uint32_t pTimesOfFeatureValue[])796 void SumOf16x16BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
797                                const int32_t kiRefStride,
798                                uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) {
799   //TODO: this is similar to SumOf8x8BlockOfFrame_c expect the calling of single block func, refactor-able?
800   int32_t x, y;
801   uint8_t* pRef;
802   uint16_t* pBuffer;
803   int32_t iSum;
804   for (y = 0; y < kiHeight; y++) {
805     pRef = pRefPicture  + kiRefStride * y;
806     pBuffer  = pFeatureOfBlock + kiWidth * y;
807     for (x = 0; x < kiWidth; x++) {
808       iSum = SumOf16x16SingleBlock_c (pRef + x, kiRefStride);
809 
810       pBuffer[x] = iSum;
811       pTimesOfFeatureValue[iSum]++;
812     }
813   }
814 }
815 
InitializeHashforFeature_c(uint32_t * pTimesOfFeatureValue,uint16_t * pBuf,const int32_t kiListSize,uint16_t ** pLocationOfFeature,uint16_t ** pFeatureValuePointerList)816 void InitializeHashforFeature_c (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
817                                  uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList) {
818   //assign location pointer
819   uint16_t* pBufPos  = pBuf;
820   for (int32_t i = 0 ; i < kiListSize; ++i) {
821     pLocationOfFeature[i] =
822       pFeatureValuePointerList[i] = pBufPos;
823     pBufPos      += (pTimesOfFeatureValue[i] << 1);
824   }
825 }
FillQpelLocationByFeatureValue_c(uint16_t * pFeatureOfBlock,const int32_t kiWidth,const int32_t kiHeight,uint16_t ** pFeatureValuePointerList)826 void FillQpelLocationByFeatureValue_c (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
827                                        uint16_t** pFeatureValuePointerList) {
828   //assign each pixel's position
829   uint16_t* pSrcPointer  =  pFeatureOfBlock;
830   int32_t iQpelY = 0;
831   for (int32_t y = 0; y < kiHeight; y++) {
832     for (int32_t x = 0; x < kiWidth; x++) {
833       uint16_t uiFeature = pSrcPointer[x];
834       pFeatureValuePointerList[uiFeature][0] = x << 2;
835       pFeatureValuePointerList[uiFeature][1] = iQpelY;
836       pFeatureValuePointerList[uiFeature] += 2;
837     }
838     iQpelY += 4;
839     pSrcPointer += kiWidth;
840   }
841 }
842 
CalculateFeatureOfBlock(SWelsFuncPtrList * pFunc,SPicture * pRef,SScreenBlockFeatureStorage * pScreenBlockFeatureStorage)843 bool CalculateFeatureOfBlock (SWelsFuncPtrList* pFunc, SPicture* pRef,
844                               SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
845   uint16_t* pFeatureOfBlock = pScreenBlockFeatureStorage->pFeatureOfBlockPointer;
846   uint32_t* pTimesOfFeatureValue = pScreenBlockFeatureStorage->pTimesOfFeatureValue;
847   uint16_t** pLocationOfFeature  = pScreenBlockFeatureStorage->pLocationOfFeature;
848   uint16_t* pBuf = pScreenBlockFeatureStorage->pLocationPointer;
849 
850   if (NULL == pFeatureOfBlock || NULL == pTimesOfFeatureValue || NULL == pLocationOfFeature || NULL == pBuf
851       || NULL == pRef->pData[0]) {
852     return false;
853   }
854 
855   uint8_t* pRefData = pRef->pData[0];
856   const int32_t iRefStride = pRef->iLineSize[0];
857   int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16;
858   const int32_t iEdgeDiscard = (iIs16x16 ? 16 : 8); //this is to save complexity of padding on pRef
859   const int32_t iWidth = pRef->iWidthInPixel - iEdgeDiscard;
860   const int32_t kiHeight = pRef->iHeightInPixel - iEdgeDiscard;
861   const int32_t kiActualListSize = pScreenBlockFeatureStorage->iActualListSize;
862 
863   memset (pTimesOfFeatureValue, 0, sizeof (int32_t)*kiActualListSize);
864   (pFunc->pfCalculateBlockFeatureOfFrame[iIs16x16]) (pRefData, iWidth, kiHeight, iRefStride, pFeatureOfBlock,
865       pTimesOfFeatureValue);
866 
867   //assign pLocationOfFeature pointer
868   pFunc->pfInitializeHashforFeature (pTimesOfFeatureValue, pBuf, kiActualListSize,
869                                      pLocationOfFeature, pScreenBlockFeatureStorage->pFeatureValuePointerList);
870 
871   //assign each pixel's pLocationOfFeature
872   pFunc->pfFillQpelLocationByFeatureValue (pFeatureOfBlock, iWidth, kiHeight,
873       pScreenBlockFeatureStorage->pFeatureValuePointerList);
874   return true;
875 }
876 
PerformFMEPreprocess(SWelsFuncPtrList * pFunc,SPicture * pRef,uint16_t * pFeatureOfBlock,SScreenBlockFeatureStorage * pScreenBlockFeatureStorage)877 void PerformFMEPreprocess (SWelsFuncPtrList* pFunc, SPicture* pRef, uint16_t* pFeatureOfBlock,
878                            SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
879   pScreenBlockFeatureStorage->pFeatureOfBlockPointer = pFeatureOfBlock;
880   pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = CalculateFeatureOfBlock (pFunc, pRef,
881       pScreenBlockFeatureStorage);
882 
883   if (pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
884     uint32_t uiRefPictureAvgQstepx16 = QStepx16ByQp[WelsMedian (0, pRef->iFrameAverageQp, 51)];
885     uint32_t uiSadCostThreshold16x16 = ((30 * (uiRefPictureAvgQstepx16 + 160)) >> 3);
886     pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_16x16] = uiSadCostThreshold16x16;
887     pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_8x8] = (uiSadCostThreshold16x16 >> 2);
888     pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_16x8]
889       = pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_8x16]
890         = pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_4x4] = UINT_MAX;
891   }
892 }
893 
894 //search related
SetFeatureSearchIn(SWelsFuncPtrList * pFunc,const SWelsME & sMe,const SSlice * pSlice,SScreenBlockFeatureStorage * pRefFeatureStorage,const int32_t kiEncStride,const int32_t kiRefStride,SFeatureSearchIn * pFeatureSearchIn)895 bool SetFeatureSearchIn (SWelsFuncPtrList* pFunc,  const SWelsME& sMe,
896                          const SSlice* pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage,
897                          const int32_t kiEncStride, const int32_t kiRefStride,
898                          SFeatureSearchIn* pFeatureSearchIn) {
899   pFeatureSearchIn->pSad = pFunc->sSampleDealingFuncs.pfSampleSad[sMe.uiBlockSize];
900   pFeatureSearchIn->iFeatureOfCurrent = pFunc->pfCalculateSingleBlockFeature[BLOCK_16x16 == sMe.uiBlockSize] (sMe.pEncMb,
901                                         kiEncStride);
902 
903   pFeatureSearchIn->pEnc       = sMe.pEncMb;
904   pFeatureSearchIn->pColoRef = sMe.pColoRefMb;
905   pFeatureSearchIn->iEncStride = kiEncStride;
906   pFeatureSearchIn->iRefStride = kiRefStride;
907   pFeatureSearchIn->uiSadCostThresh = sMe.uiSadCostThreshold;
908 
909   pFeatureSearchIn->iCurPixX = sMe.iCurMeBlockPixX;
910   pFeatureSearchIn->iCurPixXQpel = (pFeatureSearchIn->iCurPixX << 2);
911   pFeatureSearchIn->iCurPixY = sMe.iCurMeBlockPixY;
912   pFeatureSearchIn->iCurPixYQpel = (pFeatureSearchIn->iCurPixY << 2);
913 
914   pFeatureSearchIn->pTimesOfFeature = pRefFeatureStorage->pTimesOfFeatureValue;
915   pFeatureSearchIn->pQpelLocationOfFeature = pRefFeatureStorage->pLocationOfFeature;
916   pFeatureSearchIn->pMvdCostX = sMe.pMvdCost - pFeatureSearchIn->iCurPixXQpel - sMe.sMvp.iMvX;
917   pFeatureSearchIn->pMvdCostY = sMe.pMvdCost - pFeatureSearchIn->iCurPixYQpel - sMe.sMvp.iMvY;
918 
919   pFeatureSearchIn->iMinQpelX = pFeatureSearchIn->iCurPixXQpel + ((pSlice->sMvStartMin.iMvX) * (1 << 2));
920   pFeatureSearchIn->iMinQpelY = pFeatureSearchIn->iCurPixYQpel + ((pSlice->sMvStartMin.iMvY) * (1 << 2));
921   pFeatureSearchIn->iMaxQpelX = pFeatureSearchIn->iCurPixXQpel + ((pSlice->sMvStartMax.iMvX) * (1 << 2));
922   pFeatureSearchIn->iMaxQpelY = pFeatureSearchIn->iCurPixYQpel + ((pSlice->sMvStartMax.iMvY) * (1 << 2));
923 
924   if (NULL == pFeatureSearchIn->pSad || NULL == pFeatureSearchIn->pTimesOfFeature
925       || NULL == pFeatureSearchIn->pQpelLocationOfFeature) {
926     return false;
927   }
928   return true;
929 }
SaveFeatureSearchOut(const SMVUnitXY sBestMv,const uint32_t uiBestSadCost,uint8_t * pRef,SFeatureSearchOut * pFeatureSearchOut)930 void SaveFeatureSearchOut (const SMVUnitXY sBestMv, const uint32_t uiBestSadCost, uint8_t* pRef,
931                            SFeatureSearchOut* pFeatureSearchOut) {
932   pFeatureSearchOut->sBestMv = sBestMv;
933   pFeatureSearchOut->uiBestSadCost = uiBestSadCost;
934   pFeatureSearchOut->pBestRef = pRef;
935 }
936 
FeatureSearchOne(SFeatureSearchIn & sFeatureSearchIn,const int32_t iFeatureDifference,const uint32_t kuiExpectedSearchTimes,SFeatureSearchOut * pFeatureSearchOut)937 bool FeatureSearchOne (SFeatureSearchIn& sFeatureSearchIn, const int32_t iFeatureDifference,
938                        const uint32_t kuiExpectedSearchTimes,
939                        SFeatureSearchOut* pFeatureSearchOut) {
940   const int32_t iFeatureOfRef = (sFeatureSearchIn.iFeatureOfCurrent + iFeatureDifference);
941   if (iFeatureOfRef < 0 || iFeatureOfRef >= LIST_SIZE)
942     return true;
943 
944   PSampleSadSatdCostFunc pSad = sFeatureSearchIn.pSad;
945   uint8_t* pEnc =  sFeatureSearchIn.pEnc;
946   uint8_t* pColoRef = sFeatureSearchIn.pColoRef;
947   const int32_t iEncStride =  sFeatureSearchIn.iEncStride;
948   const int32_t iRefStride =  sFeatureSearchIn.iRefStride;
949   const uint16_t uiSadCostThresh = sFeatureSearchIn.uiSadCostThresh;
950 
951   const int32_t iCurPixX = sFeatureSearchIn.iCurPixX;
952   const int32_t iCurPixY = sFeatureSearchIn.iCurPixY;
953   const int32_t iCurPixXQpel = sFeatureSearchIn.iCurPixXQpel;
954   const int32_t iCurPixYQpel = sFeatureSearchIn.iCurPixYQpel;
955 
956   const int32_t iMinQpelX =  sFeatureSearchIn.iMinQpelX;
957   const int32_t iMinQpelY =  sFeatureSearchIn.iMinQpelY;
958   const int32_t iMaxQpelX =  sFeatureSearchIn.iMaxQpelX;
959   const int32_t iMaxQpelY =  sFeatureSearchIn.iMaxQpelY;
960 
961   const int32_t iSearchTimes = WELS_MIN (sFeatureSearchIn.pTimesOfFeature[iFeatureOfRef], kuiExpectedSearchTimes);
962   const int32_t iSearchTimesx2 = (iSearchTimes << 1);
963   const uint16_t* pQpelPosition = sFeatureSearchIn.pQpelLocationOfFeature[iFeatureOfRef];
964 
965   SMVUnitXY sBestMv;
966   uint32_t uiBestCost, uiTmpCost;
967   uint8_t* pBestRef, *pCurRef;
968   int32_t iQpelX, iQpelY;
969   int32_t iIntepelX, iIntepelY;
970   int32_t i;
971 
972   sBestMv.iMvX = pFeatureSearchOut->sBestMv.iMvX;
973   sBestMv.iMvY = pFeatureSearchOut->sBestMv.iMvY;
974   uiBestCost = pFeatureSearchOut->uiBestSadCost;
975   pBestRef = pFeatureSearchOut->pBestRef;
976 
977   for (i = 0; i < iSearchTimesx2; i += 2) {
978     iQpelX = pQpelPosition[i];
979     iQpelY = pQpelPosition[i + 1];
980 
981     if ((iQpelX > iMaxQpelX) || (iQpelX < iMinQpelX)
982         || (iQpelY > iMaxQpelY) || (iQpelY < iMinQpelY)
983         || (iQpelX == iCurPixXQpel) || (iQpelY == iCurPixYQpel))
984       continue;
985 
986     uiTmpCost = sFeatureSearchIn.pMvdCostX[ iQpelX ] + sFeatureSearchIn.pMvdCostY[ iQpelY ];
987     if (uiTmpCost + iFeatureDifference >= uiBestCost)
988       continue;
989 
990     iIntepelX = (iQpelX >> 2) - iCurPixX;
991     iIntepelY = (iQpelY >> 2) - iCurPixY;
992     pCurRef = &pColoRef[iIntepelX + iIntepelY * iRefStride];
993     uiTmpCost += pSad (pEnc, iEncStride, pCurRef, iRefStride);
994     if (uiTmpCost < uiBestCost) {
995       sBestMv.iMvX = iIntepelX;
996       sBestMv.iMvY = iIntepelY;
997       uiBestCost = uiTmpCost;
998       pBestRef = pCurRef;
999 
1000       if (uiBestCost < uiSadCostThresh)
1001         break;
1002     }
1003   }
1004   SaveFeatureSearchOut (sBestMv, uiBestCost, pBestRef, pFeatureSearchOut);
1005   return (i < iSearchTimesx2);
1006 }
1007 
1008 
MotionEstimateFeatureFullSearch(SFeatureSearchIn & sFeatureSearchIn,const uint32_t kuiMaxSearchPoint,SWelsME * pMe)1009 void MotionEstimateFeatureFullSearch (SFeatureSearchIn& sFeatureSearchIn,
1010                                       const uint32_t kuiMaxSearchPoint,
1011                                       SWelsME* pMe) {
1012   SFeatureSearchOut sFeatureSearchOut = { { 0 } };//TODO: this can be refactored and removed
1013   sFeatureSearchOut.uiBestSadCost = pMe->uiSadCost;
1014   sFeatureSearchOut.sBestMv = pMe->sMv;
1015   sFeatureSearchOut.pBestRef = pMe->pRefMb;
1016 
1017   int32_t iFeatureDifference = 0;//TODO: change it according to computational-complexity setting when needed
1018   FeatureSearchOne (sFeatureSearchIn, iFeatureDifference, kuiMaxSearchPoint, &sFeatureSearchOut);
1019   if (sFeatureSearchOut.uiBestSadCost < pMe->uiSadCost) {  //TODO: this may be refactored and removed
1020     UpdateMeResults (sFeatureSearchOut.sBestMv,
1021                      sFeatureSearchOut.uiBestSadCost, sFeatureSearchOut.pBestRef,
1022                      pMe);
1023   }
1024 }
1025 
1026 //switch related
CountFMECostDown(const SDqLayer * pCurLayer)1027 static uint32_t CountFMECostDown (const SDqLayer* pCurLayer) {
1028   uint32_t uiCostDownSum      = 0;
1029   const int32_t kiSliceCount  = GetCurrentSliceNum (pCurLayer);
1030   if (kiSliceCount >= 1) {
1031     int32_t iSliceIndex  = 0;
1032     SSlice* pSlice    = pCurLayer->ppSliceInLayer[iSliceIndex];
1033     while (iSliceIndex < kiSliceCount) {
1034       pSlice        = pCurLayer->ppSliceInLayer[iSliceIndex];
1035       uiCostDownSum += pSlice->uiSliceFMECostDown;
1036       ++ iSliceIndex;
1037     }
1038   }
1039   return uiCostDownSum;
1040 }
1041 #define FMESWITCH_MBAVERCOSTSAVING_THRESHOLD (2) //empirically set.
1042 #define FMESWITCH_GOODFRAMECOUNT_MAX (5) //empirically set.
UpdateFMEGoodFrameCount(const uint32_t iAvMBNormalizedRDcostDown,uint8_t & uiFMEGoodFrameCount)1043 static void UpdateFMEGoodFrameCount (const uint32_t iAvMBNormalizedRDcostDown, uint8_t& uiFMEGoodFrameCount) {
1044   //this strategy may be changed, here the number is derived from empirical-numbers
1045   // uiFMEGoodFrameCount lies in [0,FMESWITCH_GOODFRAMECOUNT_MAX]
1046   if (iAvMBNormalizedRDcostDown > FMESWITCH_MBAVERCOSTSAVING_THRESHOLD) {
1047     if (uiFMEGoodFrameCount < FMESWITCH_GOODFRAMECOUNT_MAX)
1048       ++ uiFMEGoodFrameCount;
1049   } else {
1050     if (uiFMEGoodFrameCount > 0)
1051       -- uiFMEGoodFrameCount;
1052   }
1053 }
UpdateFMESwitch(SDqLayer * pCurLayer)1054 void UpdateFMESwitch (SDqLayer* pCurLayer) {
1055   const uint32_t iFMECost = CountFMECostDown (pCurLayer);
1056   const uint32_t iAvMBNormalizedRDcostDown  = iFMECost / (pCurLayer->iMbWidth * pCurLayer->iMbHeight);
1057   UpdateFMEGoodFrameCount (iAvMBNormalizedRDcostDown, pCurLayer->pFeatureSearchPreparation->uiFMEGoodFrameCount);
1058 }
UpdateFMESwitchNull(SDqLayer * pCurLayer)1059 void UpdateFMESwitchNull (SDqLayer* pCurLayer) {
1060 }
1061 /////////////////////////
1062 // Search function options
1063 /////////////////////////
WelsDiamondCrossSearch(SWelsFuncPtrList * pFunc,SWelsME * pMe,SSlice * pSlice,const int32_t kiEncStride,const int32_t kiRefStride)1064 void WelsDiamondCrossSearch (SWelsFuncPtrList* pFunc, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride,
1065                              const int32_t kiRefStride) {
1066   //  Step 1: diamond search
1067   WelsDiamondSearch (pFunc, pMe, pSlice, kiEncStride, kiRefStride);
1068 
1069   //  Step 2: CROSS search
1070   pMe->uiSadCostThreshold = pMe->pRefFeatureStorage->uiSadCostThreshold[pMe->uiBlockSize];
1071   if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
1072     WelsMotionCrossSearch (pFunc, pMe, pSlice, kiEncStride, kiRefStride);
1073   }
1074 }
WelsDiamondCrossFeatureSearch(SWelsFuncPtrList * pFunc,SWelsME * pMe,SSlice * pSlice,const int32_t kiEncStride,const int32_t kiRefStride)1075 void WelsDiamondCrossFeatureSearch (SWelsFuncPtrList* pFunc, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride,
1076                                     const int32_t kiRefStride) {
1077   //  Step 1: diamond search + cross
1078   WelsDiamondCrossSearch (pFunc, pMe, pSlice, kiEncStride, kiRefStride);
1079 
1080   // Step 2: FeatureSearch
1081   if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
1082     pSlice->uiSliceFMECostDown += pMe->uiSadCost;
1083 
1084     uint32_t uiMaxSearchPoint = INT_MAX;//TODO: change it according to computational-complexity setting
1085     SFeatureSearchIn sFeatureSearchIn = {0};
1086     if (SetFeatureSearchIn (pFunc, *pMe, pSlice, pMe->pRefFeatureStorage,
1087                             kiEncStride, kiRefStride,
1088                             &sFeatureSearchIn)) {
1089       MotionEstimateFeatureFullSearch (sFeatureSearchIn, uiMaxSearchPoint, pMe);
1090     }
1091     pSlice->uiSliceFMECostDown -= pMe->uiSadCost;
1092   }
1093 }
1094 
1095 
1096 } // namespace WelsEnc
1097 
1098