1 /*!
2 * \copy
3 * Copyright (c) 2009-2013, Cisco Systems
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 *
32 * \file svc motion estimate.h
33 *
34 * \brief Interfaces introduced in svc mb motion estimation
35 *
36 * \date 08/11/2009 Created
37 *
38 *************************************************************************************
39 */
40 #ifndef SVC_MOTION_ESTIMATE_
41 #define SVC_MOTION_ESTIMATE_
42
43 #include "typedefs.h"
44 #include "encoder_context.h"
45 #include "wels_func_ptr_def.h"
46
47 namespace WelsEnc {
48 #define CAMERA_STARTMV_RANGE (64)
49 #define ITERATIVE_TIMES (16)
50 #define CAMERA_MV_RANGE (CAMERA_STARTMV_RANGE+ITERATIVE_TIMES)
51 #define CAMERA_MVD_RANGE ((CAMERA_MV_RANGE+1)<<1) //mvd=mv_range*2;
52 #define BASE_MV_MB_NMB ((2*CAMERA_MV_RANGE/MB_WIDTH_LUMA)-1)
53 #define CAMERA_HIGHLAYER_MVD_RANGE (243)//mvd range;
54 #define EXPANDED_MV_RANGE (504) //=512-8 rather than 511 to sacrifice same edge point but save complexity in assemblys
55 #define EXPANDED_MVD_RANGE ((504+1)<<1)
56
57 enum {
58 ME_DIA = 0x01, // LITTLE DIAMOND= 0x01
59 ME_CROSS = 0x02, // CROSS= 0x02
60 ME_FME = 0x04, // FME = 0x04
61 ME_FULL = 0x10, // FULL
62
63 // derived ME methods combination
64 ME_DIA_CROSS = (ME_DIA | ME_CROSS), // DIA+CROSS
65 ME_DIA_CROSS_FME = (ME_DIA_CROSS | ME_FME) // DIA+CROSS+FME
66 };
67
68 union SadPredISatdUnit {
69 uint32_t uiSadPred;
70 uint32_t uiSatd; //reuse the sad_pred as a temp satd pData
71 };
72 typedef struct TagWelsME {
73 /* input */
74 uint16_t* pMvdCost;
75 union SadPredISatdUnit uSadPredISatd; //reuse the sad_pred as a temp pData
76 uint32_t
77 uiSadCost; //used by ME and RC //max SAD should be max_delta*size+lambda*mvdsize = 255*256+91*33*2 = 65280 + 6006 = 71286 > (2^16)-1 = 65535
78 uint32_t uiSatdCost; /* satd + lm * nbits */
79 uint32_t uiSadCostThreshold;
80 int32_t iCurMeBlockPixX;
81 int32_t iCurMeBlockPixY;
82 uint8_t uiBlockSize; /* BLOCK_WxH */
83 uint8_t uiReserved;
84
85 uint8_t* pEncMb;
86 uint8_t* pRefMb;
87 uint8_t* pColoRefMb;
88
89 SMVUnitXY sMvp;
90 SMVUnitXY sMvBase;
91 SMVUnitXY sDirectionalMv;
92
93 SScreenBlockFeatureStorage* pRefFeatureStorage;
94
95 /* output */
96 SMVUnitXY sMv;
97 } SWelsME;
98
99 typedef struct TagFeatureSearchIn {
100 PSampleSadSatdCostFunc pSad;
101
102 uint32_t* pTimesOfFeature;
103 uint16_t** pQpelLocationOfFeature;
104 uint16_t* pMvdCostX;
105 uint16_t* pMvdCostY;
106
107 uint8_t* pEnc;
108 uint8_t* pColoRef;
109 int32_t iEncStride;
110 int32_t iRefStride;
111 uint16_t uiSadCostThresh;
112
113 int32_t iFeatureOfCurrent;
114
115 int32_t iCurPixX;
116 int32_t iCurPixY;
117 int32_t iCurPixXQpel;
118 int32_t iCurPixYQpel;
119
120 int32_t iMinQpelX;
121 int32_t iMinQpelY;
122 int32_t iMaxQpelX;
123 int32_t iMaxQpelY;
124 } SFeatureSearchIn;
125
126 typedef struct TagFeatureSearchOut {
127 SMVUnitXY sBestMv;
128 uint32_t uiBestSadCost;
129 uint8_t* pBestRef;
130 } SFeatureSearchOut;
131
132 #define COST_MVD(table, mx, my) (table[mx] + table[my])
133 extern const int32_t QStepx16ByQp[52];
134
135 // Function definitions below
136
137 void WelsInitMeFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent);
138
139 /*!
140 * \brief BL mb motion estimate search
141 *
142 * \param enc Wels encoder context
143 * \param m Wels me information
144 *
145 * \return NONE
146 */
147 void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, SDqLayer* pLplayer, SWelsME* pLpme, SSlice* pLpslice);
148 void WelsMotionEstimateSearchStatic (SWelsFuncPtrList* pFuncList, SDqLayer* pLplayer, SWelsME* pLpme, SSlice* pLpslice);
149 void WelsMotionEstimateSearchScrolled (SWelsFuncPtrList* pFuncList, SDqLayer* pLplayer, SWelsME* pLpme, SSlice* pLpslice);
150 /*!
151 * \brief BL mb motion estimate initial point testing
152 *
153 * \param enc Wels encoder context
154 * \param m Wels me information
155 * \param mv_range search range in motion estimate
156 * \param point the best match point in motion estimation
157 *
158 * \return NONE
159 */
160
161
162 /*!
163 * \brief EL mb motion estimate initial point testing
164 *
165 * \param pix_func SSampleDealingFunc
166 * \param m Wels me information
167 * \param mv_range search range in motion estimate
168 * \param point the best match point in motion estimation
169 *
170 * \return NONE
171 */
172
173 bool WelsMotionEstimateInitialPoint (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
174 const int32_t kiStrideEnc, const int32_t kiStrideRef);
175
176 /*!
177 * \brief mb iterative motion estimate search
178 *
179 * \param enc Wels encoder context
180 * \param m Wels me information
181 * \param point the best match point in motion estimation
182 *
183 * \return NONE
184 */
185 void WelsDiamondSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride,
186 const int32_t kiRefStride);
187
188 bool WelsMeSadCostSelect (int32_t* pSadCost, const uint16_t* kpMvdCost, int32_t* pBestCost, const int32_t kiDx,
189 const int32_t kiDy, int32_t* pIx, int32_t* pIy);
190
191 void CalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe, const int32_t kiEncStride, const int32_t kiRefStride);
192 void NotCalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe, const int32_t kiEncStride,
193 const int32_t kiRefStride);
194 bool CheckDirectionalMv (PSampleSadSatdCostFunc pSad, SWelsME* pMe,
195 const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
196 int32_t& iBestSadCost);
197 bool CheckDirectionalMvFalse (PSampleSadSatdCostFunc pSad, SWelsME* pMe,
198 const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
199 int32_t& iBestSadCost);
200
201 // Cross Search Basics
202 void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
203 uint16_t* pMvdTable,
204 const int32_t kiEncStride, const int32_t kiRefStride,
205 const int16_t kiMinMv, const int16_t kiMaxMv,
206 const bool bVerticalSearch);
207 #ifdef X86_ASM
208 extern "C"
209 {
210 uint32_t SampleSad8x8Hor8_sse41 (uint8_t*, int32_t, uint8_t*, int32_t, uint16_t*, int32_t*);
211 uint32_t SampleSad16x16Hor8_sse41 (uint8_t*, int32_t, uint8_t*, int32_t, uint16_t*, int32_t*);
212 }
213
214 void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
215 uint16_t* pMvdTable,
216 const int32_t kiEncStride, const int32_t kiRefStride,
217 const int16_t kiMinMv, const int16_t kiMaxMv,
218 const bool bVerticalSearch);
219 void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
220 uint16_t* pMvdTable,
221 const int32_t kiEncStride, const int32_t kiRefStride,
222 const int16_t kiMinMv, const int16_t kiMaxMv,
223 const bool bVerticalSearch);
224 #endif
225 void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
226 const int32_t kiEncStride, const int32_t kiRefStride);
227 void WelsDiamondCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
228 const int32_t kiEncStride, const int32_t kiRefStride);
229
230 // Feature Search Basics
231 #define LIST_SIZE_SUM_16x16 0x0FF01 //(256*255+1)
232 #define LIST_SIZE_SUM_8x8 0x03FC1 //(64*255+1)
233 #define LIST_SIZE_MSE_16x16 0x00878 //(avg+mse)/2, max= (255+16*255)/2
234
235 #define FME_DEFAULT_FEATURE_INDEX (0)
236 #define FMESWITCH_DEFAULT_GOODFRAME_NUM (2)
237 #define FMESWITCH_MBSAD_THRESHOLD 30 // empirically set.
238
239 void InitializeHashforFeature_c (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
240 uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);
241 void FillQpelLocationByFeatureValue_c (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
242 uint16_t** pFeatureValuePointerList);
243 int32_t SumOf8x8SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride);
244 int32_t SumOf16x16SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride);
245 void SumOf8x8BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
246 const int32_t kiRefStride,
247 uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
248 void SumOf16x16BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
249 const int32_t kiRefStride,
250 uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
251
252 #ifdef X86_ASM
253 extern "C"
254 {
255 void InitializeHashforFeature_sse2 (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
256 uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);
257 void FillQpelLocationByFeatureValue_sse2 (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
258 uint16_t** pFeatureValuePointerList);
259 int32_t SumOf8x8SingleBlock_sse2 (uint8_t* pRef, const int32_t kiRefStride);
260 int32_t SumOf16x16SingleBlock_sse2 (uint8_t* pRef, const int32_t kiRefStride);
261 void SumOf8x8BlockOfFrame_sse2 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
262 const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
263 void SumOf16x16BlockOfFrame_sse2 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
264 const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
265 void SumOf8x8BlockOfFrame_sse4 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
266 const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
267 void SumOf16x16BlockOfFrame_sse4 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
268 const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
269 }
270 #endif
271 #ifdef HAVE_NEON
272 extern "C"
273 {
274 void InitializeHashforFeature_neon (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
275 uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);
276 void FillQpelLocationByFeatureValue_neon (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
277 uint16_t** pFeatureValuePointerList);
278 int32_t SumOf8x8SingleBlock_neon (uint8_t* pRef, const int32_t kiRefStride);
279 int32_t SumOf16x16SingleBlock_neon (uint8_t* pRef, const int32_t kiRefStride);
280 void SumOf8x8BlockOfFrame_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
281 const int32_t kiRefStride,
282 uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
283 void SumOf16x16BlockOfFrame_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
284 const int32_t kiRefStride,
285 uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
286 }
287 #endif
288
289 #ifdef HAVE_NEON_AARCH64
290 extern "C"
291 {
292 void InitializeHashforFeature_AArch64_neon (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
293 uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);
294 void FillQpelLocationByFeatureValue_AArch64_neon (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
295 uint16_t** pFeatureValuePointerList);
296 int32_t SumOf8x8SingleBlock_AArch64_neon (uint8_t* pRef, const int32_t kiRefStride);
297 int32_t SumOf16x16SingleBlock_AArch64_neon (uint8_t* pRef, const int32_t kiRefStride);
298 void SumOf8x8BlockOfFrame_AArch64_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
299 const int32_t kiRefStride,
300 uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
301 void SumOf16x16BlockOfFrame_AArch64_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
302 const int32_t kiRefStride,
303 uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
304 }
305 #endif
306
307 #ifdef HAVE_LSX
308 extern "C"
309 {
310 int32_t SumOf8x8SingleBlock_lsx (uint8_t* pRef, const int32_t kiRefStride);
311 void SumOf8x8BlockOfFrame_lsx (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
312 const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
313 }
314 #endif
315
316 int32_t RequestScreenBlockFeatureStorage (CMemoryAlign* pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight,
317 const int32_t iNeedFeatureStorage,
318 SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
319 int32_t ReleaseScreenBlockFeatureStorage (CMemoryAlign* pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
320 int32_t RequestFeatureSearchPreparation (CMemoryAlign* pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight,
321 const int32_t iNeedFeatureStorage,
322 SFeatureSearchPreparation* pFeatureSearchPreparation);
323 int32_t ReleaseFeatureSearchPreparation (CMemoryAlign* pMa, uint16_t*& pFeatureOfBlock);
324
325 #define FMESWITCH_DEFAULT_GOODFRAME_NUM (2)
326 #define FME_DEFAULT_FEATURE_INDEX (0)
327
328
329 void PerformFMEPreprocess (SWelsFuncPtrList* pFunc, SPicture* pRef, uint16_t* pFeatureOfBlock,
330 SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
331 bool SetFeatureSearchIn (SWelsFuncPtrList* pFunc, const SWelsME& sMe,
332 const SSlice* pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage,
333 const int32_t kiEncStride, const int32_t kiRefStride,
334 SFeatureSearchIn* pFeatureSearchIn);
335 void MotionEstimateFeatureFullSearch (SFeatureSearchIn& sFeatureSearchIn,
336 const uint32_t kuiMaxSearchPoint,
337 SWelsME* pMe);
338 void UpdateFMESwitch (SDqLayer* pCurLayer);
339 void UpdateFMESwitchNull (SDqLayer* pCurLayer);
340
341 void WelsDiamondCrossFeatureSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
342 const int32_t kiEncStride, const int32_t kiRefStride);
343
344 //inline functions
SetMvWithinIntegerMvRange(const int32_t kiMbWidth,const int32_t kiMbHeight,const int32_t kiMbX,const int32_t kiMbY,const int32_t kiMaxMvRange,SMVUnitXY * pMvMin,SMVUnitXY * pMvMax)345 inline void SetMvWithinIntegerMvRange (const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX,
346 const int32_t kiMbY,
347 const int32_t kiMaxMvRange,
348 SMVUnitXY* pMvMin, SMVUnitXY* pMvMax) {
349 pMvMin->iMvX = WELS_MAX (-1 * ((kiMbX + 1) * (1 << 4)) + INTPEL_NEEDED_MARGIN, -1 * kiMaxMvRange);
350 pMvMin->iMvY = WELS_MAX (-1 * ((kiMbY + 1) * (1 << 4)) + INTPEL_NEEDED_MARGIN, -1 * kiMaxMvRange);
351 pMvMax->iMvX = WELS_MIN (((kiMbWidth - kiMbX) * (1 << 4)) - INTPEL_NEEDED_MARGIN, kiMaxMvRange);
352 pMvMax->iMvY = WELS_MIN (((kiMbHeight - kiMbY) * (1 << 4)) - INTPEL_NEEDED_MARGIN, kiMaxMvRange);
353 }
354
CheckMvInRange(const SMVUnitXY ksCurrentMv,const SMVUnitXY ksMinMv,const SMVUnitXY ksMaxMv)355 inline bool CheckMvInRange (const SMVUnitXY ksCurrentMv, const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv) {
356 return (CheckInRangeCloseOpen (ksCurrentMv.iMvX, ksMinMv.iMvX, ksMaxMv.iMvX)
357 && CheckInRangeCloseOpen (ksCurrentMv.iMvY, ksMinMv.iMvY, ksMaxMv.iMvY));
358 }
359 //FME switch related
CalcFMESwitchFlag(const uint8_t uiFMEGoodFrameCount,const int32_t iHighFreMbPrecentage,const int32_t iAvgMbSAD,const bool bScrollingDetected)360 inline bool CalcFMESwitchFlag (const uint8_t uiFMEGoodFrameCount, const int32_t iHighFreMbPrecentage,
361 const int32_t iAvgMbSAD, const bool bScrollingDetected) {
362 return (bScrollingDetected || (uiFMEGoodFrameCount > 0 && iAvgMbSAD > FMESWITCH_MBSAD_THRESHOLD));
363 //TODO: add the logic of iHighFreMbPrecentage
364 //return ( iHighFreMbPrecentage > 2
365 // && ( bScrollingDetected || iHighFreMbPrecentage >15
366 // ||( uiFMEGoodFrameCount>0 && iFrameSAD > FMESWITCH_FRAMESAD_THRESHOLD ) ) );
367 }
368 }
369 #endif
370