• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2  * \copy
3  *     Copyright (c)  2009-2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  *
32  * \file  svc motion estimate.h
33  *
34  * \brief  Interfaces introduced in svc mb motion estimation
35  *
36  * \date  08/11/2009 Created
37  *
38  *************************************************************************************
39  */
40 #ifndef SVC_MOTION_ESTIMATE_
41 #define SVC_MOTION_ESTIMATE_
42 
43 #include "typedefs.h"
44 #include "encoder_context.h"
45 #include "wels_func_ptr_def.h"
46 
47 namespace WelsEnc {
48 #define CAMERA_STARTMV_RANGE (64)
49 #define  ITERATIVE_TIMES  (16)
50 #define CAMERA_MV_RANGE (CAMERA_STARTMV_RANGE+ITERATIVE_TIMES)
51 #define CAMERA_MVD_RANGE  ((CAMERA_MV_RANGE+1)<<1) //mvd=mv_range*2;
52 #define  BASE_MV_MB_NMB  ((2*CAMERA_MV_RANGE/MB_WIDTH_LUMA)-1)
53 #define CAMERA_HIGHLAYER_MVD_RANGE (243)//mvd range;
54 #define EXPANDED_MV_RANGE (504) //=512-8 rather than 511 to sacrifice same edge point but save complexity in assemblys
55 #define EXPANDED_MVD_RANGE ((504+1)<<1)
56 
57 enum {
58 ME_DIA    = 0x01,  // LITTLE DIAMOND= 0x01
59 ME_CROSS  = 0x02,  // CROSS=  0x02
60 ME_FME    = 0x04,  // FME = 0x04
61 ME_FULL    = 0x10,  // FULL
62 
63 // derived ME methods combination
64 ME_DIA_CROSS    = (ME_DIA | ME_CROSS),   // DIA+CROSS
65 ME_DIA_CROSS_FME  = (ME_DIA_CROSS | ME_FME)  // DIA+CROSS+FME
66 };
67 
68 union SadPredISatdUnit {
69 uint32_t  uiSadPred;
70 uint32_t  uiSatd;    //reuse the sad_pred as a temp satd pData
71 };
72 typedef struct TagWelsME {
73 /* input */
74 uint16_t*          pMvdCost;
75 union SadPredISatdUnit  uSadPredISatd; //reuse the sad_pred as a temp pData
76 uint32_t
77 uiSadCost;  //used by ME and RC //max SAD should be max_delta*size+lambda*mvdsize = 255*256+91*33*2 = 65280 + 6006 = 71286 > (2^16)-1 = 65535
78 uint32_t          uiSatdCost; /* satd + lm * nbits */
79 uint32_t          uiSadCostThreshold;
80 int32_t            iCurMeBlockPixX;
81 int32_t            iCurMeBlockPixY;
82 uint8_t            uiBlockSize;   /* BLOCK_WxH */
83 uint8_t            uiReserved;
84 
85 uint8_t*            pEncMb;
86 uint8_t*            pRefMb;
87 uint8_t*            pColoRefMb;
88 
89 SMVUnitXY          sMvp;
90 SMVUnitXY          sMvBase;
91 SMVUnitXY          sDirectionalMv;
92 
93 SScreenBlockFeatureStorage* pRefFeatureStorage;
94 
95 /* output */
96 SMVUnitXY          sMv;
97 } SWelsME;
98 
99 typedef struct TagFeatureSearchIn {
100 PSampleSadSatdCostFunc pSad;
101 
102 uint32_t* pTimesOfFeature;
103 uint16_t** pQpelLocationOfFeature;
104 uint16_t* pMvdCostX;
105 uint16_t* pMvdCostY;
106 
107 uint8_t* pEnc;
108 uint8_t* pColoRef;
109 int32_t iEncStride;
110 int32_t iRefStride;
111 uint16_t uiSadCostThresh;
112 
113 int32_t iFeatureOfCurrent;
114 
115 int32_t iCurPixX;
116 int32_t iCurPixY;
117 int32_t iCurPixXQpel;
118 int32_t iCurPixYQpel;
119 
120 int32_t iMinQpelX;
121 int32_t iMinQpelY;
122 int32_t iMaxQpelX;
123 int32_t iMaxQpelY;
124 } SFeatureSearchIn;
125 
126 typedef struct TagFeatureSearchOut {
127 SMVUnitXY sBestMv;
128 uint32_t uiBestSadCost;
129 uint8_t* pBestRef;
130 } SFeatureSearchOut;
131 
132 #define  COST_MVD(table, mx, my)  (table[mx] + table[my])
133 extern const int32_t QStepx16ByQp[52];
134 
135 // Function definitions below
136 
137 void WelsInitMeFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent);
138 
139 /*!
140  * \brief  BL mb motion estimate search
141  *
142  * \param  enc      Wels encoder context
143  * \param  m          Wels me information
144  *
145  * \return  NONE
146  */
147 void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, SDqLayer* pLplayer, SWelsME* pLpme, SSlice* pLpslice);
148 void WelsMotionEstimateSearchStatic (SWelsFuncPtrList* pFuncList, SDqLayer* pLplayer, SWelsME* pLpme, SSlice* pLpslice);
149 void WelsMotionEstimateSearchScrolled (SWelsFuncPtrList* pFuncList, SDqLayer* pLplayer, SWelsME* pLpme, SSlice* pLpslice);
150 /*!
151  * \brief  BL mb motion estimate initial point testing
152  *
153  * \param  enc      Wels encoder context
154  * \param  m          Wels me information
155  * \param  mv_range  search range in motion estimate
156  * \param  point      the best match point in motion estimation
157  *
158  * \return  NONE
159  */
160 
161 
162 /*!
163  * \brief  EL mb motion estimate initial point testing
164  *
165  * \param  pix_func  SSampleDealingFunc
166  * \param  m          Wels me information
167  * \param  mv_range  search range in motion estimate
168  * \param  point      the best match point in motion estimation
169  *
170  * \return  NONE
171  */
172 
173 bool WelsMotionEstimateInitialPoint (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
174                                      const int32_t kiStrideEnc, const int32_t kiStrideRef);
175 
176 /*!
177  * \brief  mb iterative motion estimate search
178  *
179  * \param  enc      Wels encoder context
180  * \param  m          Wels me information
181  * \param  point      the best match point in motion estimation
182  *
183  * \return  NONE
184  */
185 void WelsDiamondSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride,
186                         const int32_t kiRefStride);
187 
188 bool WelsMeSadCostSelect (int32_t* pSadCost, const uint16_t* kpMvdCost, int32_t* pBestCost, const int32_t kiDx,
189                           const int32_t kiDy, int32_t* pIx, int32_t* pIy);
190 
191 void CalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe, const int32_t kiEncStride, const int32_t kiRefStride);
192 void NotCalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe, const int32_t kiEncStride,
193                            const int32_t kiRefStride);
194 bool CheckDirectionalMv (PSampleSadSatdCostFunc pSad, SWelsME* pMe,
195                          const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
196                          int32_t& iBestSadCost);
197 bool CheckDirectionalMvFalse (PSampleSadSatdCostFunc pSad, SWelsME* pMe,
198                               const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
199                               int32_t& iBestSadCost);
200 
201 // Cross Search Basics
202 void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
203                        uint16_t* pMvdTable,
204                        const int32_t kiEncStride, const int32_t kiRefStride,
205                        const int16_t kiMinMv, const int16_t kiMaxMv,
206                        const bool bVerticalSearch);
207 #ifdef X86_ASM
208 extern "C"
209 {
210 uint32_t SampleSad8x8Hor8_sse41 (uint8_t*, int32_t, uint8_t*, int32_t, uint16_t*, int32_t*);
211 uint32_t SampleSad16x16Hor8_sse41 (uint8_t*, int32_t, uint8_t*, int32_t, uint16_t*, int32_t*);
212 }
213 
214 void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
215                                    uint16_t* pMvdTable,
216                                    const int32_t kiEncStride, const int32_t kiRefStride,
217                                    const int16_t kiMinMv, const int16_t kiMaxMv,
218                                    const bool bVerticalSearch);
219 void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
220                                      uint16_t* pMvdTable,
221                                      const int32_t kiEncStride, const int32_t kiRefStride,
222                                      const int16_t kiMinMv, const int16_t kiMaxMv,
223                                      const bool bVerticalSearch);
224 #endif
225 void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
226                             const int32_t kiEncStride, const int32_t kiRefStride);
227 void WelsDiamondCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
228                              const int32_t kiEncStride, const int32_t kiRefStride);
229 
230 // Feature Search Basics
231 #define LIST_SIZE_SUM_16x16 0x0FF01  //(256*255+1)
232 #define LIST_SIZE_SUM_8x8     0x03FC1  //(64*255+1)
233 #define LIST_SIZE_MSE_16x16 0x00878  //(avg+mse)/2, max= (255+16*255)/2
234 
235 #define FME_DEFAULT_FEATURE_INDEX (0)
236 #define FMESWITCH_DEFAULT_GOODFRAME_NUM (2)
237 #define FMESWITCH_MBSAD_THRESHOLD   30 // empirically set.
238 
239 void InitializeHashforFeature_c (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
240                                  uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);
241 void FillQpelLocationByFeatureValue_c (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
242                                        uint16_t** pFeatureValuePointerList);
243 int32_t SumOf8x8SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride);
244 int32_t SumOf16x16SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride);
245 void SumOf8x8BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
246                              const int32_t kiRefStride,
247                              uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
248 void SumOf16x16BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
249                                const int32_t kiRefStride,
250                                uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
251 
252 #ifdef X86_ASM
253 extern "C"
254 {
255 void InitializeHashforFeature_sse2 (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
256                                      uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);
257 void FillQpelLocationByFeatureValue_sse2 (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
258                                            uint16_t** pFeatureValuePointerList);
259 int32_t SumOf8x8SingleBlock_sse2 (uint8_t* pRef, const int32_t kiRefStride);
260 int32_t SumOf16x16SingleBlock_sse2 (uint8_t* pRef, const int32_t kiRefStride);
261 void SumOf8x8BlockOfFrame_sse2 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
262                 const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
263 void SumOf16x16BlockOfFrame_sse2 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
264                 const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
265 void SumOf8x8BlockOfFrame_sse4 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
266                 const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
267 void SumOf16x16BlockOfFrame_sse4 (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
268                 const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
269 }
270 #endif
271 #ifdef HAVE_NEON
272 extern "C"
273 {
274 void InitializeHashforFeature_neon (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
275                                     uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);
276 void FillQpelLocationByFeatureValue_neon (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
277                                           uint16_t** pFeatureValuePointerList);
278 int32_t SumOf8x8SingleBlock_neon (uint8_t* pRef, const int32_t kiRefStride);
279 int32_t SumOf16x16SingleBlock_neon (uint8_t* pRef, const int32_t kiRefStride);
280 void SumOf8x8BlockOfFrame_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
281                                 const int32_t kiRefStride,
282                                 uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
283 void SumOf16x16BlockOfFrame_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
284                                   const int32_t kiRefStride,
285                                   uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
286 }
287 #endif
288 
289 #ifdef HAVE_NEON_AARCH64
290 extern "C"
291 {
292 void InitializeHashforFeature_AArch64_neon (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
293                                     uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList);
294 void FillQpelLocationByFeatureValue_AArch64_neon (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
295                                           uint16_t** pFeatureValuePointerList);
296 int32_t SumOf8x8SingleBlock_AArch64_neon (uint8_t* pRef, const int32_t kiRefStride);
297 int32_t SumOf16x16SingleBlock_AArch64_neon (uint8_t* pRef, const int32_t kiRefStride);
298 void SumOf8x8BlockOfFrame_AArch64_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
299                                 const int32_t kiRefStride,
300                                 uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
301 void SumOf16x16BlockOfFrame_AArch64_neon (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
302                                   const int32_t kiRefStride,
303                                   uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
304 }
305 #endif
306 
307 #ifdef HAVE_LSX
308 extern "C"
309 {
310 int32_t SumOf8x8SingleBlock_lsx (uint8_t* pRef, const int32_t kiRefStride);
311 void SumOf8x8BlockOfFrame_lsx (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
312                 const int32_t kiRefStride, uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]);
313 }
314 #endif
315 
316 int32_t RequestScreenBlockFeatureStorage (CMemoryAlign* pMa, const int32_t kiFrameWidth,  const int32_t kiFrameHeight,
317     const int32_t iNeedFeatureStorage,
318     SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
319 int32_t ReleaseScreenBlockFeatureStorage (CMemoryAlign* pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
320 int32_t RequestFeatureSearchPreparation (CMemoryAlign* pMa, const int32_t kiFrameWidth,  const int32_t kiFrameHeight,
321     const int32_t iNeedFeatureStorage,
322     SFeatureSearchPreparation* pFeatureSearchPreparation);
323 int32_t ReleaseFeatureSearchPreparation (CMemoryAlign* pMa, uint16_t*& pFeatureOfBlock);
324 
325 #define FMESWITCH_DEFAULT_GOODFRAME_NUM (2)
326 #define FME_DEFAULT_FEATURE_INDEX (0)
327 
328 
329 void PerformFMEPreprocess (SWelsFuncPtrList* pFunc, SPicture* pRef, uint16_t* pFeatureOfBlock,
330                            SScreenBlockFeatureStorage* pScreenBlockFeatureStorage);
331 bool SetFeatureSearchIn (SWelsFuncPtrList* pFunc,  const SWelsME& sMe,
332                          const SSlice* pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage,
333                          const int32_t kiEncStride, const int32_t kiRefStride,
334                          SFeatureSearchIn* pFeatureSearchIn);
335 void MotionEstimateFeatureFullSearch (SFeatureSearchIn& sFeatureSearchIn,
336                                       const uint32_t kuiMaxSearchPoint,
337                                       SWelsME* pMe);
338 void UpdateFMESwitch (SDqLayer* pCurLayer);
339 void UpdateFMESwitchNull (SDqLayer* pCurLayer);
340 
341 void WelsDiamondCrossFeatureSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
342                                     const int32_t kiEncStride, const int32_t kiRefStride);
343 
344 //inline functions
SetMvWithinIntegerMvRange(const int32_t kiMbWidth,const int32_t kiMbHeight,const int32_t kiMbX,const int32_t kiMbY,const int32_t kiMaxMvRange,SMVUnitXY * pMvMin,SMVUnitXY * pMvMax)345 inline void SetMvWithinIntegerMvRange (const int32_t kiMbWidth, const int32_t kiMbHeight, const int32_t kiMbX,
346                                        const int32_t kiMbY,
347                                        const int32_t kiMaxMvRange,
348                                        SMVUnitXY* pMvMin, SMVUnitXY* pMvMax) {
349 pMvMin->iMvX = WELS_MAX (-1 * ((kiMbX + 1) * (1 << 4)) + INTPEL_NEEDED_MARGIN, -1 * kiMaxMvRange);
350 pMvMin->iMvY = WELS_MAX (-1 * ((kiMbY + 1) * (1 << 4)) + INTPEL_NEEDED_MARGIN, -1 * kiMaxMvRange);
351 pMvMax->iMvX = WELS_MIN (((kiMbWidth - kiMbX) * (1 << 4)) - INTPEL_NEEDED_MARGIN, kiMaxMvRange);
352 pMvMax->iMvY = WELS_MIN (((kiMbHeight - kiMbY) * (1 << 4)) - INTPEL_NEEDED_MARGIN, kiMaxMvRange);
353 }
354 
CheckMvInRange(const SMVUnitXY ksCurrentMv,const SMVUnitXY ksMinMv,const SMVUnitXY ksMaxMv)355 inline bool CheckMvInRange (const SMVUnitXY ksCurrentMv, const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv) {
356 return (CheckInRangeCloseOpen (ksCurrentMv.iMvX, ksMinMv.iMvX, ksMaxMv.iMvX)
357         && CheckInRangeCloseOpen (ksCurrentMv.iMvY, ksMinMv.iMvY, ksMaxMv.iMvY));
358 }
359 //FME switch related
CalcFMESwitchFlag(const uint8_t uiFMEGoodFrameCount,const int32_t iHighFreMbPrecentage,const int32_t iAvgMbSAD,const bool bScrollingDetected)360 inline bool CalcFMESwitchFlag (const uint8_t uiFMEGoodFrameCount, const int32_t iHighFreMbPrecentage,
361                                const int32_t iAvgMbSAD, const bool bScrollingDetected) {
362 return (bScrollingDetected || (uiFMEGoodFrameCount > 0 && iAvgMbSAD > FMESWITCH_MBSAD_THRESHOLD));
363 //TODO: add the logic of iHighFreMbPrecentage
364 //return ( iHighFreMbPrecentage > 2
365 //            && ( bScrollingDetected || iHighFreMbPrecentage >15
366 //            ||( uiFMEGoodFrameCount>0 && iFrameSAD > FMESWITCH_FRAMESAD_THRESHOLD ) ) );
367 }
368 }
369 #endif
370