1 /*!
2 * \copy
3 * Copyright (c) 2009-2013, Cisco Systems
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * * Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 *
32 * \file svc motion estimate.c
33 *
34 * \brief Interfaces introduced in svc mb motion estimation
35 *
36 * \date 08/11/2009 Created
37 *
38 *************************************************************************************
39 */
40
41 #include "cpu_core.h"
42 #include "ls_defines.h"
43 #include "svc_motion_estimate.h"
44 #include "wels_transpose_matrix.h"
45
46 namespace WelsEnc {
47
48 const int32_t QStepx16ByQp[52] = { /* save QStep<<4 for int32_t */
49 10, 11, 13, 14, 16, 18, /* 0~5 */
50 20, 22, 26, 28, 32, 36, /* 6~11 */
51 40, 44, 52, 56, 64, 72, /* 12~17 */
52 80, 88, 104, 112, 128, 144, /* 18~23 */
53 160, 176, 208, 224, 256, 288, /* 24~29 */
54 320, 352, 416, 448, 512, 576, /* 30~35 */
55 640, 704, 832, 896, 1024, 1152, /* 36~41 */
56 1280, 1408, 1664, 1792, 2048, 2304, /* 42~47 */
57 2560, 2816, 3328, 3584 /* 48~51 */
58 };
59
UpdateMeResults(const SMVUnitXY ksBestMv,const uint32_t kiBestSadCost,uint8_t * pRef,SWelsME * pMe)60 static inline void UpdateMeResults (const SMVUnitXY ksBestMv, const uint32_t kiBestSadCost, uint8_t* pRef,
61 SWelsME* pMe) {
62 pMe->sMv = ksBestMv;
63 pMe->pRefMb = pRef;
64 pMe->uiSadCost = kiBestSadCost;
65 }
MeEndIntepelSearch(SWelsME * pMe)66 static inline void MeEndIntepelSearch (SWelsME* pMe) {
67 /* -> qpel mv */
68 pMe->sMv.iMvX *= (1 << 2);
69 pMe->sMv.iMvY *= (1 << 2);
70 pMe->uiSatdCost = pMe->uiSadCost;
71 }
72
WelsInitMeFunc(SWelsFuncPtrList * pFuncList,uint32_t uiCpuFlag,bool bScreenContent)73 void WelsInitMeFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag, bool bScreenContent) {
74 pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
75
76 if (!bScreenContent) {
77 pFuncList->pfCheckDirectionalMv = CheckDirectionalMvFalse;
78 pFuncList->pfCalculateBlockFeatureOfFrame[0] =
79 pFuncList->pfCalculateBlockFeatureOfFrame[1] = NULL;
80 pFuncList->pfCalculateSingleBlockFeature[0] =
81 pFuncList->pfCalculateSingleBlockFeature[1] = NULL;
82
83 } else {
84 pFuncList->pfCheckDirectionalMv = CheckDirectionalMv;
85
86 //for cross serarch
87 pFuncList->pfVerticalFullSearch = LineFullSearch_c;
88 pFuncList->pfHorizontalFullSearch = LineFullSearch_c;
89
90 #if defined (X86_ASM)
91 if (uiCpuFlag & WELS_CPU_SSE41) {
92 pFuncList->pfSampleSadHor8[0] = SampleSad8x8Hor8_sse41;
93 pFuncList->pfSampleSadHor8[1] = SampleSad16x16Hor8_sse41;
94 pFuncList->pfVerticalFullSearch = VerticalFullSearchUsingSSE41;
95 pFuncList->pfHorizontalFullSearch = HorizontalFullSearchUsingSSE41;
96 }
97 #endif
98
99 //for feature search
100 pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_c;
101 pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_c;
102 pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_c;
103 pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_c;
104 //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
105 pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_c;
106 pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_c;
107 #if defined (X86_ASM)
108 if (uiCpuFlag & WELS_CPU_SSE2) {
109 //for feature search
110 pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_sse2;
111 pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_sse2;
112 pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_sse2;
113 pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_sse2;
114 //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
115 pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_sse2;
116 pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_sse2;
117 }
118 if (uiCpuFlag & WELS_CPU_SSE41) {
119 //for feature search
120 pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_sse4;
121 pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_sse4;
122 }
123 #endif
124
125 #if defined (HAVE_NEON)
126 if (uiCpuFlag & WELS_CPU_NEON) {
127 //for feature search
128 pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_neon;
129 pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_neon;
130 pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_neon;
131 pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_neon;
132 //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
133 pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_neon;
134 pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_neon;
135 }
136 #endif
137
138 #if defined (HAVE_NEON_AARCH64)
139 if (uiCpuFlag & WELS_CPU_NEON) {
140 //for feature search
141 pFuncList->pfInitializeHashforFeature = InitializeHashforFeature_AArch64_neon;
142 pFuncList->pfFillQpelLocationByFeatureValue = FillQpelLocationByFeatureValue_AArch64_neon;
143 pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_AArch64_neon;
144 pFuncList->pfCalculateBlockFeatureOfFrame[1] = SumOf16x16BlockOfFrame_AArch64_neon;
145 //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
146 pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_AArch64_neon;
147 pFuncList->pfCalculateSingleBlockFeature[1] = SumOf16x16SingleBlock_AArch64_neon;
148 }
149 #endif
150
151 #if defined (HAVE_LSX)
152 if (uiCpuFlag & WELS_CPU_LSX) {
153 pFuncList->pfCalculateBlockFeatureOfFrame[0] = SumOf8x8BlockOfFrame_lsx;
154 //TODO: it is possible to differentiate width that is times of 8, so as to accelerate the speed when width is times of 8?
155 pFuncList->pfCalculateSingleBlockFeature[0] = SumOf8x8SingleBlock_lsx;
156 }
157 #endif
158 }
159 }
160
161 /*!
162 * \brief BL mb motion estimate search
163 *
164 * \param enc Wels encoder context
165 * \param pMe Wels me information
166 *
167 * \return NONE
168 */
169
WelsMotionEstimateSearch(SWelsFuncPtrList * pFuncList,SDqLayer * pCurDqLayer,SWelsME * pMe,SSlice * pSlice)170 void WelsMotionEstimateSearch (SWelsFuncPtrList* pFuncList, SDqLayer* pCurDqLayer, SWelsME* pMe, SSlice* pSlice) {
171 const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0];
172 const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0];
173
174 // Step 1: Initial point prediction
175 if (!WelsMotionEstimateInitialPoint (pFuncList, pMe, pSlice, kiStrideEnc, kiStrideRef)) {
176 pFuncList->pfSearchMethod[pMe->uiBlockSize] (pFuncList, pMe, pSlice, kiStrideEnc, kiStrideRef);
177 MeEndIntepelSearch (pMe);
178 }
179
180 pFuncList->pfCalculateSatd (pFuncList->sSampleDealingFuncs.pfSampleSatd[pMe->uiBlockSize], pMe, kiStrideEnc,
181 kiStrideRef);
182 }
183
WelsMotionEstimateSearchStatic(SWelsFuncPtrList * pFuncList,SDqLayer * pCurDqLayer,SWelsME * pMe,SSlice * pLpslice)184 void WelsMotionEstimateSearchStatic (SWelsFuncPtrList* pFuncList, SDqLayer* pCurDqLayer, SWelsME* pMe,
185 SSlice* pLpslice) {
186 const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0];
187 const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0];
188
189 pMe->sMv.iMvX = pMe->sMv.iMvY = 0;
190 pMe->uiSadCost =
191 pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize] (pMe->pEncMb, kiStrideEnc, pMe->pRefMb, kiStrideRef) ;
192 pMe->uiSadCost += COST_MVD (pMe->pMvdCost, - pMe->sMvp.iMvX, - pMe->sMvp.iMvY);
193 MeEndIntepelSearch (pMe);
194 pFuncList->pfCalculateSatd (pFuncList->sSampleDealingFuncs.pfSampleSatd[pMe->uiBlockSize], pMe, kiStrideEnc,
195 kiStrideRef);
196 }
197
WelsMotionEstimateSearchScrolled(SWelsFuncPtrList * pFuncList,SDqLayer * pCurDqLayer,SWelsME * pMe,SSlice * pSlice)198 void WelsMotionEstimateSearchScrolled (SWelsFuncPtrList* pFuncList, SDqLayer* pCurDqLayer, SWelsME* pMe,
199 SSlice* pSlice) {
200 const int32_t kiStrideEnc = pCurDqLayer->iEncStride[0];
201 const int32_t kiStrideRef = pCurDqLayer->pRefPic->iLineSize[0];
202
203 pMe->sMv = pMe->sDirectionalMv;
204 pMe->pRefMb = pMe->pColoRefMb + pMe->sMv.iMvY * kiStrideRef + pMe->sMv.iMvX;
205 pMe->uiSadCost =
206 pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize] (pMe->pEncMb, kiStrideEnc, pMe->pRefMb, kiStrideRef)
207 + COST_MVD (pMe->pMvdCost, (pMe->sMv.iMvX * (1 << 2)) - pMe->sMvp.iMvX, (pMe->sMv.iMvY * (1 << 2)) - pMe->sMvp.iMvY);
208 MeEndIntepelSearch (pMe);
209 pFuncList->pfCalculateSatd (pFuncList->sSampleDealingFuncs.pfSampleSatd[pMe->uiBlockSize], pMe, kiStrideEnc,
210 kiStrideRef);
211 }
212 /*!
213 * \brief EL mb motion estimate initial point testing
214 *
215 * \param pix_pFuncList SSampleDealingFunc
216 * \param pMe Wels me information
217 * \param mv_range search range in motion estimate
218 * \param point the best match point in motion estimation
219 *
220 * \return NONE
221 */
WelsMotionEstimateInitialPoint(SWelsFuncPtrList * pFuncList,SWelsME * pMe,SSlice * pSlice,int32_t iStrideEnc,int32_t iStrideRef)222 bool WelsMotionEstimateInitialPoint (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice, int32_t iStrideEnc,
223 int32_t iStrideRef) {
224 PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
225 const uint16_t* kpMvdCost = pMe->pMvdCost;
226 uint8_t* const kpEncMb = pMe->pEncMb;
227 int16_t iMvc0, iMvc1;
228 int32_t iSadCost;
229 int32_t iBestSadCost;
230 uint8_t* pRefMb;
231 uint8_t* pFref2;
232 uint32_t i;
233 const uint32_t kuiMvcNum = pSlice->uiMvcNum;
234 const SMVUnitXY* kpMvcList = &pSlice->sMvc[0];
235 const SMVUnitXY ksMvStartMin = pSlice->sMvStartMin;
236 const SMVUnitXY ksMvStartMax = pSlice->sMvStartMax;
237 const SMVUnitXY ksMvp = pMe->sMvp;
238 SMVUnitXY sMv;
239
240 // Step 1: Initial point prediction
241 // init with sMvp
242 sMv.iMvX = WELS_CLIP3 ((2 + ksMvp.iMvX) >> 2, ksMvStartMin.iMvX, ksMvStartMax.iMvX);
243 sMv.iMvY = WELS_CLIP3 ((2 + ksMvp.iMvY) >> 2, ksMvStartMin.iMvY, ksMvStartMax.iMvY);
244
245 pRefMb = &pMe->pRefMb[sMv.iMvY * iStrideRef + sMv.iMvX];
246
247 iBestSadCost = pSad (kpEncMb, iStrideEnc, pRefMb, iStrideRef);
248 iBestSadCost += COST_MVD (kpMvdCost, ((sMv.iMvX) * (1 << 2)) - ksMvp.iMvX, ((sMv.iMvY) * (1 << 2)) - ksMvp.iMvY);
249
250 for (i = 0; i < kuiMvcNum; i++) {
251 //clipping here is essential since some pOut-of-range MVC may happen here (i.e., refer to baseMV)
252 iMvc0 = WELS_CLIP3 ((2 + kpMvcList[i].iMvX) >> 2, ksMvStartMin.iMvX, ksMvStartMax.iMvX);
253 iMvc1 = WELS_CLIP3 ((2 + kpMvcList[i].iMvY) >> 2, ksMvStartMin.iMvY, ksMvStartMax.iMvY);
254
255 if (((iMvc0 - sMv.iMvX) || (iMvc1 - sMv.iMvY))) {
256 pFref2 = &pMe->pRefMb[iMvc1 * iStrideRef + iMvc0];
257
258 iSadCost = pSad (kpEncMb, iStrideEnc, pFref2, iStrideRef) +
259 COST_MVD (kpMvdCost, (iMvc0 * (1 << 2)) - ksMvp.iMvX, (iMvc1 * (1 << 2)) - ksMvp.iMvY);
260
261 if (iSadCost < iBestSadCost) {
262 sMv.iMvX = iMvc0;
263 sMv.iMvY = iMvc1;
264 pRefMb = pFref2;
265 iBestSadCost = iSadCost;
266 }
267 }
268 }
269
270 if (pFuncList->pfCheckDirectionalMv
271 (pSad, pMe, ksMvStartMin, ksMvStartMax, iStrideEnc, iStrideRef, iSadCost)) {
272 sMv = pMe->sDirectionalMv;
273 pRefMb = &pMe->pColoRefMb[sMv.iMvY * iStrideRef + sMv.iMvX];
274 iBestSadCost = iSadCost;
275 }
276
277 UpdateMeResults (sMv, iBestSadCost, pRefMb, pMe);
278 if (iBestSadCost < static_cast<int32_t> (pMe->uSadPredISatd.uiSadPred)) {
279 //Initial point early Stop
280 MeEndIntepelSearch (pMe);
281 return true;
282 }
283 return false;
284 }
285
CalculateSatdCost(PSampleSadSatdCostFunc pSatd,SWelsME * pMe,const int32_t kiEncStride,const int32_t kiRefStride)286 void CalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe,
287 const int32_t kiEncStride, const int32_t kiRefStride) {
288 pMe->uSadPredISatd.uiSatd = pSatd (pMe->pEncMb, kiEncStride, pMe->pRefMb, kiRefStride);
289 pMe->uiSatdCost = pMe->uSadPredISatd.uiSatd + COST_MVD (pMe->pMvdCost, pMe->sMv.iMvX - pMe->sMvp.iMvX,
290 pMe->sMv.iMvY - pMe->sMvp.iMvY);
291 }
NotCalculateSatdCost(PSampleSadSatdCostFunc pSatd,SWelsME * pMe,const int32_t kiEncStride,const int32_t kiRefStride)292 void NotCalculateSatdCost (PSampleSadSatdCostFunc pSatd, SWelsME* pMe,
293 const int32_t kiEncStride, const int32_t kiRefStride) {
294 }
295
296
297 /////////////////////////
298 // Diamond Search Basics
299 /////////////////////////
WelsMeSadCostSelect(int32_t * iSadCost,const uint16_t * kpMvdCost,int32_t * pBestCost,const int32_t kiDx,const int32_t kiDy,int32_t * pIx,int32_t * pIy)300 bool WelsMeSadCostSelect (int32_t* iSadCost, const uint16_t* kpMvdCost, int32_t* pBestCost, const int32_t kiDx,
301 const int32_t kiDy, int32_t* pIx, int32_t* pIy) {
302 int32_t iTempSadCost[4];
303 int32_t iInputSadCost = *pBestCost;
304 iTempSadCost[0] = iSadCost[0] + COST_MVD (kpMvdCost, kiDx, kiDy - 4);
305 iTempSadCost[1] = iSadCost[1] + COST_MVD (kpMvdCost, kiDx, kiDy + 4);
306 iTempSadCost[2] = iSadCost[2] + COST_MVD (kpMvdCost, kiDx - 4, kiDy);
307 iTempSadCost[3] = iSadCost[3] + COST_MVD (kpMvdCost, kiDx + 4, kiDy);
308
309 if (iTempSadCost[0] < *pBestCost) {
310 *pBestCost = iTempSadCost[0];
311 *pIx = 0;
312 *pIy = 1;
313 }
314
315 if (iTempSadCost[1] < *pBestCost) {
316 *pBestCost = iTempSadCost[1];
317 *pIx = 0;
318 *pIy = -1;
319 }
320
321 if (iTempSadCost[2] < *pBestCost) {
322 *pBestCost = iTempSadCost[2];
323 *pIx = 1;
324 *pIy = 0;
325 }
326
327 if (iTempSadCost[3] < *pBestCost) {
328 *pBestCost = iTempSadCost[3];
329 *pIx = -1;
330 *pIy = 0;
331 }
332 return (*pBestCost == iInputSadCost);
333 }
334
WelsDiamondSearch(SWelsFuncPtrList * pFuncList,SWelsME * pMe,SSlice * pSlice,const int32_t kiStrideEnc,const int32_t kiStrideRef)335 void WelsDiamondSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
336 const int32_t kiStrideEnc, const int32_t kiStrideRef) {
337 PSample4SadCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSample4Sad[pMe->uiBlockSize];
338
339 uint8_t* pFref = pMe->pRefMb;
340 uint8_t* const kpEncMb = pMe->pEncMb;
341 const uint16_t* kpMvdCost = pMe->pMvdCost;
342
343 const SMVUnitXY ksMvStartMin = pSlice->sMvStartMin;
344 const SMVUnitXY ksMvStartMax = pSlice->sMvStartMax;
345
346 int32_t iMvDx = ((pMe->sMv.iMvX) * (1 << 2)) - pMe->sMvp.iMvX;
347 int32_t iMvDy = ((pMe->sMv.iMvY) * (1 << 2)) - pMe->sMvp.iMvY;
348
349 uint8_t* pRefMb = pFref;
350 int32_t iBestCost = (pMe->uiSadCost);
351
352 int32_t iTimeThreshold = ITERATIVE_TIMES;
353 ENFORCE_STACK_ALIGN_1D (int32_t, iSadCosts, 4, 16)
354
355 while (iTimeThreshold--) {
356 pMe->sMv.iMvX = (iMvDx + pMe->sMvp.iMvX) >> 2;
357 pMe->sMv.iMvY = (iMvDy + pMe->sMvp.iMvY) >> 2;
358 if (!CheckMvInRange (pMe->sMv, ksMvStartMin, ksMvStartMax))
359 continue;
360 pSad (kpEncMb, kiStrideEnc, pRefMb, kiStrideRef, &iSadCosts[0]);
361
362 int32_t iX, iY;
363
364 const bool kbIsBestCostWorse = WelsMeSadCostSelect (iSadCosts, kpMvdCost, &iBestCost, iMvDx, iMvDy, &iX, &iY);
365 if (kbIsBestCostWorse)
366 break;
367
368 iMvDx -= (iX * (1 << 2)) ;
369 iMvDy -= (iY * (1 << 2)) ;
370
371 pRefMb -= (iX + iY * kiStrideRef);
372
373 }
374
375 /* integer-pel mv */
376 pMe->sMv.iMvX = (iMvDx + pMe->sMvp.iMvX) >> 2;
377 pMe->sMv.iMvY = (iMvDy + pMe->sMvp.iMvY) >> 2;
378 pMe->uiSatdCost = pMe->uiSadCost = (iBestCost);
379 pMe->pRefMb = pRefMb;
380 }
381
382 /////////////////////////
383 // DirectionalMv Basics
384 /////////////////////////
CheckDirectionalMv(PSampleSadSatdCostFunc pSad,SWelsME * pMe,const SMVUnitXY ksMinMv,const SMVUnitXY ksMaxMv,const int32_t kiEncStride,const int32_t kiRefStride,int32_t & iBestSadCost)385 bool CheckDirectionalMv (PSampleSadSatdCostFunc pSad, SWelsME* pMe,
386 const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
387 int32_t& iBestSadCost) {
388 const int16_t kiMvX = pMe->sDirectionalMv.iMvX;
389 const int16_t kiMvY = pMe->sDirectionalMv.iMvY;
390
391 //Check MV from scrolling detection
392 if ((BLOCK_16x16 != pMe->uiBlockSize) //scrolled_MV with P16x16 is checked SKIP checking function
393 && (kiMvX | kiMvY) //(0,0) checked in ordinary initial point checking
394 && CheckMvInRange (pMe->sDirectionalMv, ksMinMv, ksMaxMv)) {
395 uint8_t* pRef = &pMe->pColoRefMb[kiMvY * kiRefStride + kiMvX];
396 uint32_t uiCurrentSadCost = pSad (pMe->pEncMb, kiEncStride, pRef, kiRefStride) +
397 COST_MVD (pMe->pMvdCost, (kiMvX * (1 << 2)) - pMe->sMvp.iMvX, (kiMvY * (1 << 2)) - pMe->sMvp.iMvY);
398 if (uiCurrentSadCost < pMe->uiSadCost) {
399 iBestSadCost = uiCurrentSadCost;
400 return true;
401 }
402 }
403 return false;
404 }
405
CheckDirectionalMvFalse(PSampleSadSatdCostFunc pSad,SWelsME * vpMe,const SMVUnitXY ksMinMv,const SMVUnitXY ksMaxMv,const int32_t kiEncStride,const int32_t kiRefStride,int32_t & iBestSadCost)406 bool CheckDirectionalMvFalse (PSampleSadSatdCostFunc pSad, SWelsME* vpMe,
407 const SMVUnitXY ksMinMv, const SMVUnitXY ksMaxMv, const int32_t kiEncStride, const int32_t kiRefStride,
408 int32_t& iBestSadCost) {
409 return false;
410 }
411
412 /////////////////////////
413 // Cross Search Basics
414 /////////////////////////
415 #if defined (X86_ASM)
CalcMvdCostx8_c(uint16_t * pMvdCost,const int32_t kiStartMv,uint16_t * pMvdTable,const uint16_t kiFixedCost)416 void CalcMvdCostx8_c (uint16_t* pMvdCost, const int32_t kiStartMv, uint16_t* pMvdTable, const uint16_t kiFixedCost) {
417 uint16_t* pBaseCost = pMvdCost;
418 const int32_t kiOffset = (kiStartMv * (1 << 2));
419 uint16_t* pMvd = pMvdTable + kiOffset;
420 for (int32_t i = 0; i < 8; ++ i) {
421 pBaseCost[i] = ((*pMvd) + kiFixedCost);
422 pMvd += 4;
423 }
424 }
VerticalFullSearchUsingSSE41(SWelsFuncPtrList * pFuncList,SWelsME * pMe,uint16_t * pMvdTable,const int32_t kiEncStride,const int32_t kiRefStride,const int16_t kiMinMv,const int16_t kiMaxMv,const bool bVerticalSearch)425 void VerticalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
426 uint16_t* pMvdTable,
427 const int32_t kiEncStride, const int32_t kiRefStride,
428 const int16_t kiMinMv, const int16_t kiMaxMv,
429 const bool bVerticalSearch) {
430 uint8_t* kpEncMb = pMe->pEncMb;
431 const int32_t kiCurMeBlockPix = pMe->iCurMeBlockPixY;
432 uint8_t* pRef = &pMe->pColoRefMb[kiMinMv * kiRefStride];
433
434 const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY;
435
436 int32_t iMinPos = kiCurMeBlockPixY + kiMinMv;
437 int32_t iMaxPos = kiCurMeBlockPixY + kiMaxMv;
438 int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX);
439 uint16_t* pMvdCost = & (pMvdTable[ (kiMinMv * (1 << 2)) - pMe->sMvp.iMvY]);
440 int16_t iStartMv = 0;
441
442
443 const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
444 const int32_t kiEdgeBlocks = kIsBlock16x16 ? 16 : 8;
445 PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
446 PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
447 PTransposeMatrixBlockFunc TransposeMatrixBlock = kIsBlock16x16 ? TransposeMatrixBlock16x16_sse2 :
448 TransposeMatrixBlock8x8_mmx;
449 PTransposeMatrixBlocksFunc TransposeMatrixBlocks = kIsBlock16x16 ? TransposeMatrixBlocksx16_sse2 :
450 TransposeMatrixBlocksx8_mmx;
451
452 const int32_t kiDiff = iMaxPos - iMinPos;
453 const int32_t kiRowNum = WELS_ALIGN ((kiDiff - kiEdgeBlocks + 1), kiEdgeBlocks);
454 const int32_t kiBlocksNum = kIsBlock16x16 ? (kiRowNum >> 4) : (kiRowNum >> 3);
455 int32_t iCountLoop8 = (kiRowNum - kiEdgeBlocks) >> 3;
456 const int32_t kiRemainingVectors = kiDiff - (iCountLoop8 << 3);
457 const int32_t kiMatrixStride = MAX_VERTICAL_MV_RANGE;
458 ENFORCE_STACK_ALIGN_2D (uint8_t, uiMatrixRef, 16, kiMatrixStride, 16); // transpose matrix result for ref
459 ENFORCE_STACK_ALIGN_2D (uint8_t, uiMatrixEnc, 16, 16, 16); // transpose matrix result for enc
460 assert (kiRowNum <= kiMatrixStride); // make sure effective memory
461
462 TransposeMatrixBlock (&uiMatrixEnc[0][0], 16, kpEncMb, kiEncStride);
463 TransposeMatrixBlocks (&uiMatrixRef[0][0], kiMatrixStride, pRef, kiRefStride, kiBlocksNum);
464 ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
465 int32_t iTargetPos = iMinPos;
466 int16_t iBestPos = pMe->sMv.iMvX;
467 uint32_t uiBestCost = pMe->uiSadCost;
468 uint32_t uiCostMin;
469 int32_t iIndexMinPos;
470 kpEncMb = &uiMatrixEnc[0][0];
471 pRef = &uiMatrixRef[0][0];
472
473 while (iCountLoop8 > 0) {
474 CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd);
475 uiCostMin = pSampleSadHor8 (kpEncMb, 16, pRef, kiMatrixStride, uiBaseCost, &iIndexMinPos);
476 if (uiCostMin < uiBestCost) {
477 uiBestCost = uiCostMin;
478 iBestPos = iTargetPos + iIndexMinPos;
479 }
480 iTargetPos += 8;
481 pRef += 8;
482 iStartMv += 8;
483 -- iCountLoop8;
484 }
485 if (kiRemainingVectors > 0) {
486 kpEncMb = pMe->pEncMb;
487 pRef = &pMe->pColoRefMb[ (iTargetPos - kiCurMeBlockPix) * kiRefStride];
488 while (iTargetPos < iMaxPos) {
489 const uint16_t uiMvdCost = pMvdCost[iStartMv * (1 << 2)];
490 uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost);
491 if (uiSadCost < uiBestCost) {
492 uiBestCost = uiSadCost;
493 iBestPos = iTargetPos;
494 }
495 iStartMv++;
496 pRef += kiRefStride;
497 ++iTargetPos;
498 }
499 }
500 if (uiBestCost < pMe->uiSadCost) {
501 SMVUnitXY sBestMv;
502 sBestMv.iMvX = 0;
503 sBestMv.iMvY = iBestPos - kiCurMeBlockPix;
504 UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiRefStride], pMe);
505 }
506 }
507
HorizontalFullSearchUsingSSE41(SWelsFuncPtrList * pFuncList,SWelsME * pMe,uint16_t * pMvdTable,const int32_t kiEncStride,const int32_t kiRefStride,const int16_t kiMinMv,const int16_t kiMaxMv,const bool bVerticalSearch)508 void HorizontalFullSearchUsingSSE41 (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
509 uint16_t* pMvdTable,
510 const int32_t kiEncStride, const int32_t kiRefStride,
511 const int16_t kiMinMv, const int16_t kiMaxMv,
512 const bool bVerticalSearch) {
513 uint8_t* kpEncMb = pMe->pEncMb;
514
515 const int32_t iCurMeBlockPixX = pMe->iCurMeBlockPixX;
516 int32_t iMinPos = iCurMeBlockPixX + kiMinMv;
517 int32_t iMaxPos = iCurMeBlockPixX + kiMaxMv;
518 int32_t iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY);
519 uint16_t* pMvdCost = & (pMvdTable[ (kiMinMv * (1 << 2)) - pMe->sMvp.iMvX]);
520 int16_t iStartMv = 0;
521 uint8_t* pRef = &pMe->pColoRefMb[kiMinMv];
522 const int32_t kIsBlock16x16 = pMe->uiBlockSize == BLOCK_16x16;
523 PSampleSadHor8Func pSampleSadHor8 = pFuncList->pfSampleSadHor8[kIsBlock16x16];
524 PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
525 ENFORCE_STACK_ALIGN_1D (uint16_t, uiBaseCost, 8, 16);
526 const int32_t kiNumVector = iMaxPos - iMinPos;
527 int32_t iCountLoop8 = kiNumVector >> 3;
528 const int32_t kiRemainingLoop8 = kiNumVector & 7;
529 int32_t iTargetPos = iMinPos;
530 int16_t iBestPos = pMe->sMv.iMvX;
531 uint32_t uiBestCost = pMe->uiSadCost;
532 uint32_t uiCostMin;
533 int32_t iIndexMinPos;
534
535 while (iCountLoop8 > 0) {
536 CalcMvdCostx8_c (uiBaseCost, iStartMv, pMvdCost, iFixedMvd);
537 uiCostMin = pSampleSadHor8 (kpEncMb, kiEncStride, pRef, kiRefStride, uiBaseCost, &iIndexMinPos);
538 if (uiCostMin < uiBestCost) {
539 uiBestCost = uiCostMin;
540 iBestPos = iTargetPos + iIndexMinPos;
541 }
542 iTargetPos += 8;
543 pRef += 8;
544 iStartMv += 8;
545 -- iCountLoop8;
546 }
547 if (kiRemainingLoop8 > 0) {
548 while (iTargetPos < iMaxPos) {
549 const uint16_t uiMvdCost = pMvdCost[iStartMv * (1 << 2)];
550 uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + uiMvdCost);
551 if (uiSadCost < uiBestCost) {
552 uiBestCost = uiSadCost;
553 iBestPos = iTargetPos;
554 }
555 iStartMv++;
556 ++pRef;
557 ++iTargetPos;
558 }
559 }
560 if (uiBestCost < pMe->uiSadCost) {
561 SMVUnitXY sBestMv;
562 sBestMv.iMvX = iBestPos - iCurMeBlockPixX;
563 sBestMv.iMvY = 0;
564 UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvX], pMe);
565 }
566 }
567 #endif
LineFullSearch_c(SWelsFuncPtrList * pFuncList,SWelsME * pMe,uint16_t * pMvdTable,const int32_t kiEncStride,const int32_t kiRefStride,const int16_t iMinMv,const int16_t iMaxMv,const bool bVerticalSearch)568 void LineFullSearch_c (SWelsFuncPtrList* pFuncList, SWelsME* pMe,
569 uint16_t* pMvdTable,
570 const int32_t kiEncStride, const int32_t kiRefStride,
571 const int16_t iMinMv, const int16_t iMaxMv,
572 const bool bVerticalSearch) {
573 PSampleSadSatdCostFunc pSad = pFuncList->sSampleDealingFuncs.pfSampleSad[pMe->uiBlockSize];
574 const int32_t kiCurMeBlockPixX = pMe->iCurMeBlockPixX;
575 const int32_t kiCurMeBlockPixY = pMe->iCurMeBlockPixY;
576 int32_t iMinPos, iMaxPos;
577 int32_t iFixedMvd;
578 int32_t iCurMeBlockPix;
579 int32_t iStride;
580 uint16_t* pMvdCost;
581
582 if (bVerticalSearch) {
583 iMinPos = kiCurMeBlockPixY + iMinMv;
584 iMaxPos = kiCurMeBlockPixY + iMaxMv;
585 iFixedMvd = * (pMvdTable - pMe->sMvp.iMvX);
586 iCurMeBlockPix = pMe->iCurMeBlockPixY;
587 iStride = kiRefStride;
588 pMvdCost = & (pMvdTable[ (iMinMv * (1 << 2)) - pMe->sMvp.iMvY]);
589 } else {
590 iMinPos = kiCurMeBlockPixX + iMinMv;
591 iMaxPos = kiCurMeBlockPixX + iMaxMv;
592 iFixedMvd = * (pMvdTable - pMe->sMvp.iMvY);
593 iCurMeBlockPix = pMe->iCurMeBlockPixX;
594 iStride = 1;
595 pMvdCost = & (pMvdTable[ (iMinMv * (1 << 2)) - pMe->sMvp.iMvX]);
596 }
597 uint8_t* pRef = &pMe->pColoRefMb[ iMinMv * iStride];
598 uint32_t uiBestCost = 0xFFFFFFFF;
599 int32_t iBestPos = 0;
600
601 for (int32_t iTargetPos = iMinPos; iTargetPos < iMaxPos; ++ iTargetPos) {
602 uint8_t* const kpEncMb = pMe->pEncMb;
603 uint32_t uiSadCost = pSad (kpEncMb, kiEncStride, pRef, kiRefStride) + (iFixedMvd + *pMvdCost);
604 if (uiSadCost < uiBestCost) {
605 uiBestCost = uiSadCost;
606 iBestPos = iTargetPos;
607 }
608 pRef += iStride;
609 pMvdCost += 4;
610 }
611
612 if (uiBestCost < pMe->uiSadCost) {
613 SMVUnitXY sBestMv;
614 sBestMv.iMvX = bVerticalSearch ? 0 : (iBestPos - iCurMeBlockPix);
615 sBestMv.iMvY = bVerticalSearch ? (iBestPos - iCurMeBlockPix) : 0;
616 UpdateMeResults (sBestMv, uiBestCost, &pMe->pColoRefMb[sBestMv.iMvY * kiRefStride + sBestMv.iMvX], pMe);
617 }
618 }
619
WelsMotionCrossSearch(SWelsFuncPtrList * pFuncList,SWelsME * pMe,SSlice * pSlice,const int32_t kiEncStride,const int32_t kiRefStride)620 void WelsMotionCrossSearch (SWelsFuncPtrList* pFuncList, SWelsME* pMe, SSlice* pSlice,
621 const int32_t kiEncStride, const int32_t kiRefStride) {
622 PLineFullSearchFunc pfVerticalFullSearchFunc = pFuncList->pfVerticalFullSearch;
623 PLineFullSearchFunc pfHorizontalFullSearchFunc = pFuncList->pfHorizontalFullSearch;
624
625 //vertical search
626 pfVerticalFullSearchFunc (pFuncList, pMe,
627 pMe->pMvdCost,
628 kiEncStride, kiRefStride,
629 pSlice->sMvStartMin.iMvY,
630 pSlice->sMvStartMax.iMvY, true);
631
632 //horizontal search
633 if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
634 pfHorizontalFullSearchFunc (pFuncList, pMe,
635 pMe->pMvdCost,
636 kiEncStride, kiRefStride,
637 pSlice->sMvStartMin.iMvX,
638 pSlice->sMvStartMax.iMvX,
639 false);
640 }
641 }
642
643
644 /////////////////////////
645 // Feature Search Basics
646 /////////////////////////
647 //memory related
RequestFeatureSearchPreparation(CMemoryAlign * pMa,const int32_t kiFrameWidth,const int32_t kiFrameHeight,const int32_t iNeedFeatureStorage,SFeatureSearchPreparation * pFeatureSearchPreparation)648 int32_t RequestFeatureSearchPreparation (CMemoryAlign* pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight,
649 const int32_t iNeedFeatureStorage,
650 SFeatureSearchPreparation* pFeatureSearchPreparation) {
651 const int32_t kiFeatureStrategyIndex = iNeedFeatureStorage >> 16;
652 const bool bFme8x8 = ((iNeedFeatureStorage & 0x0000FF & ME_FME) == ME_FME);
653 const int32_t kiMarginSize = bFme8x8 ? 8 : 16;
654 const int32_t kiFrameSize = (kiFrameWidth - kiMarginSize) * (kiFrameHeight - kiMarginSize);
655 int32_t iListOfFeatureOfBlock;
656
657 if (0 == kiFeatureStrategyIndex) {
658 iListOfFeatureOfBlock = sizeof (uint16_t) * kiFrameSize;
659 } else {
660 iListOfFeatureOfBlock = sizeof (uint16_t) * kiFrameSize +
661 (kiFrameWidth - kiMarginSize) * sizeof (uint32_t) + kiFrameWidth * 8 * sizeof (uint8_t);
662 }
663 pFeatureSearchPreparation->pFeatureOfBlock =
664 (uint16_t*)pMa->WelsMallocz (iListOfFeatureOfBlock, "pFeatureOfBlock");
665 WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == (pFeatureSearchPreparation->pFeatureOfBlock))
666
667 pFeatureSearchPreparation->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
668 pFeatureSearchPreparation->bFMESwitchFlag = true;
669 pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM;
670 pFeatureSearchPreparation->iHighFreMbCount = 0;
671
672 return ENC_RETURN_SUCCESS;
673 }
ReleaseFeatureSearchPreparation(CMemoryAlign * pMa,uint16_t * & pFeatureOfBlock)674 int32_t ReleaseFeatureSearchPreparation (CMemoryAlign* pMa, uint16_t*& pFeatureOfBlock) {
675 if (pMa && pFeatureOfBlock) {
676 pMa->WelsFree (pFeatureOfBlock, "pFeatureOfBlock");
677 pFeatureOfBlock = NULL;
678 return ENC_RETURN_SUCCESS;
679 }
680 return ENC_RETURN_UNEXPECTED;
681 }
682
RequestScreenBlockFeatureStorage(CMemoryAlign * pMa,const int32_t kiFrameWidth,const int32_t kiFrameHeight,const int32_t iNeedFeatureStorage,SScreenBlockFeatureStorage * pScreenBlockFeatureStorage)683 int32_t RequestScreenBlockFeatureStorage (CMemoryAlign* pMa, const int32_t kiFrameWidth, const int32_t kiFrameHeight,
684 const int32_t iNeedFeatureStorage,
685 SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
686
687 const int32_t kiFeatureStrategyIndex = iNeedFeatureStorage >> 16;
688 const int32_t kiMe8x8FME = iNeedFeatureStorage & 0x0000FF & ME_FME;
689 const int32_t kiMe16x16FME = ((iNeedFeatureStorage & 0x00FF00) >> 8) & ME_FME;
690 if ((kiMe8x8FME == ME_FME) && (kiMe16x16FME == ME_FME)) {
691 return ENC_RETURN_UNSUPPORTED_PARA;
692 //the following memory allocation cannot support when FME at both size
693 }
694
695 const bool bIsBlock8x8 = (kiMe8x8FME == ME_FME);
696 const int32_t kiMarginSize = bIsBlock8x8 ? 8 : 16;
697 const int32_t kiFrameSize = (kiFrameWidth - kiMarginSize) * (kiFrameHeight - kiMarginSize);
698 const int32_t kiListSize = (0 == kiFeatureStrategyIndex) ? (bIsBlock8x8 ? LIST_SIZE_SUM_8x8 : LIST_SIZE_SUM_16x16) :
699 256;
700
701 pScreenBlockFeatureStorage->pTimesOfFeatureValue = (uint32_t*)pMa->WelsMallocz (kiListSize * sizeof (uint32_t),
702 "pScreenBlockFeatureStorage->pTimesOfFeatureValue");
703 WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pTimesOfFeatureValue)
704
705 pScreenBlockFeatureStorage->pLocationOfFeature = (uint16_t**)pMa->WelsMallocz (kiListSize * sizeof (uint16_t*),
706 "pScreenBlockFeatureStorage->pLocationOfFeature");
707 WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pLocationOfFeature)
708
709 pScreenBlockFeatureStorage->pLocationPointer = (uint16_t*)pMa->WelsMallocz (2 * kiFrameSize * sizeof (uint16_t),
710 "pScreenBlockFeatureStorage->pLocationPointer");
711 WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pLocationPointer)
712 // uint16_t* pFeatureValuePointerList[WELS_MAX (LIST_SIZE_SUM_16x16, LIST_SIZE_MSE_16x16)] = {0};
713 pScreenBlockFeatureStorage->pFeatureValuePointerList = (uint16_t**)pMa->WelsMallocz (WELS_MAX (LIST_SIZE_SUM_16x16,
714 LIST_SIZE_MSE_16x16) * sizeof (uint16_t*),
715 "pScreenBlockFeatureStorage->pFeatureValuePointerList");
716 WELS_VERIFY_RETURN_IF (ENC_RETURN_MEMALLOCERR, NULL == pScreenBlockFeatureStorage->pFeatureValuePointerList)
717
718 pScreenBlockFeatureStorage->pFeatureOfBlockPointer = NULL;
719 pScreenBlockFeatureStorage->iIs16x16 = !bIsBlock8x8;
720 pScreenBlockFeatureStorage->uiFeatureStrategyIndex = kiFeatureStrategyIndex;
721 pScreenBlockFeatureStorage->iActualListSize = kiListSize;
722 WelsSetMemMultiplebytes_c (pScreenBlockFeatureStorage->uiSadCostThreshold, UINT_MAX, BLOCK_SIZE_ALL, sizeof (uint32_t));
723 pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = false;
724
725 return ENC_RETURN_SUCCESS;
726 }
ReleaseScreenBlockFeatureStorage(CMemoryAlign * pMa,SScreenBlockFeatureStorage * pScreenBlockFeatureStorage)727 int32_t ReleaseScreenBlockFeatureStorage (CMemoryAlign* pMa, SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
728 if (pMa && pScreenBlockFeatureStorage) {
729 if (pScreenBlockFeatureStorage->pTimesOfFeatureValue) {
730 pMa->WelsFree (pScreenBlockFeatureStorage->pTimesOfFeatureValue, "pScreenBlockFeatureStorage->pTimesOfFeatureValue");
731 pScreenBlockFeatureStorage->pTimesOfFeatureValue = NULL;
732 }
733
734 if (pScreenBlockFeatureStorage->pLocationOfFeature) {
735 pMa->WelsFree (pScreenBlockFeatureStorage->pLocationOfFeature, "pScreenBlockFeatureStorage->pLocationOfFeature");
736 pScreenBlockFeatureStorage->pLocationOfFeature = NULL;
737 }
738
739 if (pScreenBlockFeatureStorage->pLocationPointer) {
740 pMa->WelsFree (pScreenBlockFeatureStorage->pLocationPointer, "pScreenBlockFeatureStorage->pLocationPointer");
741 pScreenBlockFeatureStorage->pLocationPointer = NULL;
742 }
743
744 if (pScreenBlockFeatureStorage->pFeatureValuePointerList) {
745 pMa->WelsFree (pScreenBlockFeatureStorage->pFeatureValuePointerList,
746 "pScreenBlockFeatureStorage->pFeatureValuePointerList");
747 pScreenBlockFeatureStorage->pFeatureValuePointerList = NULL;
748 }
749
750 return ENC_RETURN_SUCCESS;
751 }
752 return ENC_RETURN_UNEXPECTED;
753 }
754
755 //preprocess related
SumOf8x8SingleBlock_c(uint8_t * pRef,const int32_t kiRefStride)756 int32_t SumOf8x8SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride) {
757 int32_t iSum = 0, i;
758 for (i = 0; i < 8; i++) {
759 iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3];
760 iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7];
761 pRef += kiRefStride;
762 }
763 return iSum;
764 }
SumOf16x16SingleBlock_c(uint8_t * pRef,const int32_t kiRefStride)765 int32_t SumOf16x16SingleBlock_c (uint8_t* pRef, const int32_t kiRefStride) {
766 int32_t iSum = 0, i;
767 for (i = 0; i < 16; i++) {
768 iSum += pRef[0] + pRef[1] + pRef[2] + pRef[3];
769 iSum += pRef[4] + pRef[5] + pRef[6] + pRef[7];
770 iSum += pRef[8] + pRef[9] + pRef[10] + pRef[11];
771 iSum += pRef[12] + pRef[13] + pRef[14] + pRef[15];
772 pRef += kiRefStride;
773 }
774 return iSum;
775 }
776
SumOf8x8BlockOfFrame_c(uint8_t * pRefPicture,const int32_t kiWidth,const int32_t kiHeight,const int32_t kiRefStride,uint16_t * pFeatureOfBlock,uint32_t pTimesOfFeatureValue[])777 void SumOf8x8BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
778 const int32_t kiRefStride,
779 uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) {
780 int32_t x, y;
781 uint8_t* pRef;
782 uint16_t* pBuffer;
783 int32_t iSum;
784 for (y = 0; y < kiHeight; y++) {
785 pRef = pRefPicture + kiRefStride * y;
786 pBuffer = pFeatureOfBlock + kiWidth * y;
787 for (x = 0; x < kiWidth; x++) {
788 iSum = SumOf8x8SingleBlock_c (pRef + x, kiRefStride);
789
790 pBuffer[x] = iSum;
791 pTimesOfFeatureValue[iSum]++;
792 }
793 }
794 }
795
SumOf16x16BlockOfFrame_c(uint8_t * pRefPicture,const int32_t kiWidth,const int32_t kiHeight,const int32_t kiRefStride,uint16_t * pFeatureOfBlock,uint32_t pTimesOfFeatureValue[])796 void SumOf16x16BlockOfFrame_c (uint8_t* pRefPicture, const int32_t kiWidth, const int32_t kiHeight,
797 const int32_t kiRefStride,
798 uint16_t* pFeatureOfBlock, uint32_t pTimesOfFeatureValue[]) {
799 //TODO: this is similar to SumOf8x8BlockOfFrame_c expect the calling of single block func, refactor-able?
800 int32_t x, y;
801 uint8_t* pRef;
802 uint16_t* pBuffer;
803 int32_t iSum;
804 for (y = 0; y < kiHeight; y++) {
805 pRef = pRefPicture + kiRefStride * y;
806 pBuffer = pFeatureOfBlock + kiWidth * y;
807 for (x = 0; x < kiWidth; x++) {
808 iSum = SumOf16x16SingleBlock_c (pRef + x, kiRefStride);
809
810 pBuffer[x] = iSum;
811 pTimesOfFeatureValue[iSum]++;
812 }
813 }
814 }
815
InitializeHashforFeature_c(uint32_t * pTimesOfFeatureValue,uint16_t * pBuf,const int32_t kiListSize,uint16_t ** pLocationOfFeature,uint16_t ** pFeatureValuePointerList)816 void InitializeHashforFeature_c (uint32_t* pTimesOfFeatureValue, uint16_t* pBuf, const int32_t kiListSize,
817 uint16_t** pLocationOfFeature, uint16_t** pFeatureValuePointerList) {
818 //assign location pointer
819 uint16_t* pBufPos = pBuf;
820 for (int32_t i = 0 ; i < kiListSize; ++i) {
821 pLocationOfFeature[i] =
822 pFeatureValuePointerList[i] = pBufPos;
823 pBufPos += (pTimesOfFeatureValue[i] << 1);
824 }
825 }
FillQpelLocationByFeatureValue_c(uint16_t * pFeatureOfBlock,const int32_t kiWidth,const int32_t kiHeight,uint16_t ** pFeatureValuePointerList)826 void FillQpelLocationByFeatureValue_c (uint16_t* pFeatureOfBlock, const int32_t kiWidth, const int32_t kiHeight,
827 uint16_t** pFeatureValuePointerList) {
828 //assign each pixel's position
829 uint16_t* pSrcPointer = pFeatureOfBlock;
830 int32_t iQpelY = 0;
831 for (int32_t y = 0; y < kiHeight; y++) {
832 for (int32_t x = 0; x < kiWidth; x++) {
833 uint16_t uiFeature = pSrcPointer[x];
834 pFeatureValuePointerList[uiFeature][0] = x << 2;
835 pFeatureValuePointerList[uiFeature][1] = iQpelY;
836 pFeatureValuePointerList[uiFeature] += 2;
837 }
838 iQpelY += 4;
839 pSrcPointer += kiWidth;
840 }
841 }
842
CalculateFeatureOfBlock(SWelsFuncPtrList * pFunc,SPicture * pRef,SScreenBlockFeatureStorage * pScreenBlockFeatureStorage)843 bool CalculateFeatureOfBlock (SWelsFuncPtrList* pFunc, SPicture* pRef,
844 SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
845 uint16_t* pFeatureOfBlock = pScreenBlockFeatureStorage->pFeatureOfBlockPointer;
846 uint32_t* pTimesOfFeatureValue = pScreenBlockFeatureStorage->pTimesOfFeatureValue;
847 uint16_t** pLocationOfFeature = pScreenBlockFeatureStorage->pLocationOfFeature;
848 uint16_t* pBuf = pScreenBlockFeatureStorage->pLocationPointer;
849
850 if (NULL == pFeatureOfBlock || NULL == pTimesOfFeatureValue || NULL == pLocationOfFeature || NULL == pBuf
851 || NULL == pRef->pData[0]) {
852 return false;
853 }
854
855 uint8_t* pRefData = pRef->pData[0];
856 const int32_t iRefStride = pRef->iLineSize[0];
857 int32_t iIs16x16 = pScreenBlockFeatureStorage->iIs16x16;
858 const int32_t iEdgeDiscard = (iIs16x16 ? 16 : 8); //this is to save complexity of padding on pRef
859 const int32_t iWidth = pRef->iWidthInPixel - iEdgeDiscard;
860 const int32_t kiHeight = pRef->iHeightInPixel - iEdgeDiscard;
861 const int32_t kiActualListSize = pScreenBlockFeatureStorage->iActualListSize;
862
863 memset (pTimesOfFeatureValue, 0, sizeof (int32_t)*kiActualListSize);
864 (pFunc->pfCalculateBlockFeatureOfFrame[iIs16x16]) (pRefData, iWidth, kiHeight, iRefStride, pFeatureOfBlock,
865 pTimesOfFeatureValue);
866
867 //assign pLocationOfFeature pointer
868 pFunc->pfInitializeHashforFeature (pTimesOfFeatureValue, pBuf, kiActualListSize,
869 pLocationOfFeature, pScreenBlockFeatureStorage->pFeatureValuePointerList);
870
871 //assign each pixel's pLocationOfFeature
872 pFunc->pfFillQpelLocationByFeatureValue (pFeatureOfBlock, iWidth, kiHeight,
873 pScreenBlockFeatureStorage->pFeatureValuePointerList);
874 return true;
875 }
876
PerformFMEPreprocess(SWelsFuncPtrList * pFunc,SPicture * pRef,uint16_t * pFeatureOfBlock,SScreenBlockFeatureStorage * pScreenBlockFeatureStorage)877 void PerformFMEPreprocess (SWelsFuncPtrList* pFunc, SPicture* pRef, uint16_t* pFeatureOfBlock,
878 SScreenBlockFeatureStorage* pScreenBlockFeatureStorage) {
879 pScreenBlockFeatureStorage->pFeatureOfBlockPointer = pFeatureOfBlock;
880 pScreenBlockFeatureStorage->bRefBlockFeatureCalculated = CalculateFeatureOfBlock (pFunc, pRef,
881 pScreenBlockFeatureStorage);
882
883 if (pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
884 uint32_t uiRefPictureAvgQstepx16 = QStepx16ByQp[WelsMedian (0, pRef->iFrameAverageQp, 51)];
885 uint32_t uiSadCostThreshold16x16 = ((30 * (uiRefPictureAvgQstepx16 + 160)) >> 3);
886 pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_16x16] = uiSadCostThreshold16x16;
887 pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_8x8] = (uiSadCostThreshold16x16 >> 2);
888 pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_16x8]
889 = pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_8x16]
890 = pScreenBlockFeatureStorage->uiSadCostThreshold[BLOCK_4x4] = UINT_MAX;
891 }
892 }
893
894 //search related
SetFeatureSearchIn(SWelsFuncPtrList * pFunc,const SWelsME & sMe,const SSlice * pSlice,SScreenBlockFeatureStorage * pRefFeatureStorage,const int32_t kiEncStride,const int32_t kiRefStride,SFeatureSearchIn * pFeatureSearchIn)895 bool SetFeatureSearchIn (SWelsFuncPtrList* pFunc, const SWelsME& sMe,
896 const SSlice* pSlice, SScreenBlockFeatureStorage* pRefFeatureStorage,
897 const int32_t kiEncStride, const int32_t kiRefStride,
898 SFeatureSearchIn* pFeatureSearchIn) {
899 pFeatureSearchIn->pSad = pFunc->sSampleDealingFuncs.pfSampleSad[sMe.uiBlockSize];
900 pFeatureSearchIn->iFeatureOfCurrent = pFunc->pfCalculateSingleBlockFeature[BLOCK_16x16 == sMe.uiBlockSize] (sMe.pEncMb,
901 kiEncStride);
902
903 pFeatureSearchIn->pEnc = sMe.pEncMb;
904 pFeatureSearchIn->pColoRef = sMe.pColoRefMb;
905 pFeatureSearchIn->iEncStride = kiEncStride;
906 pFeatureSearchIn->iRefStride = kiRefStride;
907 pFeatureSearchIn->uiSadCostThresh = sMe.uiSadCostThreshold;
908
909 pFeatureSearchIn->iCurPixX = sMe.iCurMeBlockPixX;
910 pFeatureSearchIn->iCurPixXQpel = (pFeatureSearchIn->iCurPixX << 2);
911 pFeatureSearchIn->iCurPixY = sMe.iCurMeBlockPixY;
912 pFeatureSearchIn->iCurPixYQpel = (pFeatureSearchIn->iCurPixY << 2);
913
914 pFeatureSearchIn->pTimesOfFeature = pRefFeatureStorage->pTimesOfFeatureValue;
915 pFeatureSearchIn->pQpelLocationOfFeature = pRefFeatureStorage->pLocationOfFeature;
916 pFeatureSearchIn->pMvdCostX = sMe.pMvdCost - pFeatureSearchIn->iCurPixXQpel - sMe.sMvp.iMvX;
917 pFeatureSearchIn->pMvdCostY = sMe.pMvdCost - pFeatureSearchIn->iCurPixYQpel - sMe.sMvp.iMvY;
918
919 pFeatureSearchIn->iMinQpelX = pFeatureSearchIn->iCurPixXQpel + ((pSlice->sMvStartMin.iMvX) * (1 << 2));
920 pFeatureSearchIn->iMinQpelY = pFeatureSearchIn->iCurPixYQpel + ((pSlice->sMvStartMin.iMvY) * (1 << 2));
921 pFeatureSearchIn->iMaxQpelX = pFeatureSearchIn->iCurPixXQpel + ((pSlice->sMvStartMax.iMvX) * (1 << 2));
922 pFeatureSearchIn->iMaxQpelY = pFeatureSearchIn->iCurPixYQpel + ((pSlice->sMvStartMax.iMvY) * (1 << 2));
923
924 if (NULL == pFeatureSearchIn->pSad || NULL == pFeatureSearchIn->pTimesOfFeature
925 || NULL == pFeatureSearchIn->pQpelLocationOfFeature) {
926 return false;
927 }
928 return true;
929 }
SaveFeatureSearchOut(const SMVUnitXY sBestMv,const uint32_t uiBestSadCost,uint8_t * pRef,SFeatureSearchOut * pFeatureSearchOut)930 void SaveFeatureSearchOut (const SMVUnitXY sBestMv, const uint32_t uiBestSadCost, uint8_t* pRef,
931 SFeatureSearchOut* pFeatureSearchOut) {
932 pFeatureSearchOut->sBestMv = sBestMv;
933 pFeatureSearchOut->uiBestSadCost = uiBestSadCost;
934 pFeatureSearchOut->pBestRef = pRef;
935 }
936
FeatureSearchOne(SFeatureSearchIn & sFeatureSearchIn,const int32_t iFeatureDifference,const uint32_t kuiExpectedSearchTimes,SFeatureSearchOut * pFeatureSearchOut)937 bool FeatureSearchOne (SFeatureSearchIn& sFeatureSearchIn, const int32_t iFeatureDifference,
938 const uint32_t kuiExpectedSearchTimes,
939 SFeatureSearchOut* pFeatureSearchOut) {
940 const int32_t iFeatureOfRef = (sFeatureSearchIn.iFeatureOfCurrent + iFeatureDifference);
941 if (iFeatureOfRef < 0 || iFeatureOfRef >= LIST_SIZE)
942 return true;
943
944 PSampleSadSatdCostFunc pSad = sFeatureSearchIn.pSad;
945 uint8_t* pEnc = sFeatureSearchIn.pEnc;
946 uint8_t* pColoRef = sFeatureSearchIn.pColoRef;
947 const int32_t iEncStride = sFeatureSearchIn.iEncStride;
948 const int32_t iRefStride = sFeatureSearchIn.iRefStride;
949 const uint16_t uiSadCostThresh = sFeatureSearchIn.uiSadCostThresh;
950
951 const int32_t iCurPixX = sFeatureSearchIn.iCurPixX;
952 const int32_t iCurPixY = sFeatureSearchIn.iCurPixY;
953 const int32_t iCurPixXQpel = sFeatureSearchIn.iCurPixXQpel;
954 const int32_t iCurPixYQpel = sFeatureSearchIn.iCurPixYQpel;
955
956 const int32_t iMinQpelX = sFeatureSearchIn.iMinQpelX;
957 const int32_t iMinQpelY = sFeatureSearchIn.iMinQpelY;
958 const int32_t iMaxQpelX = sFeatureSearchIn.iMaxQpelX;
959 const int32_t iMaxQpelY = sFeatureSearchIn.iMaxQpelY;
960
961 const int32_t iSearchTimes = WELS_MIN (sFeatureSearchIn.pTimesOfFeature[iFeatureOfRef], kuiExpectedSearchTimes);
962 const int32_t iSearchTimesx2 = (iSearchTimes << 1);
963 const uint16_t* pQpelPosition = sFeatureSearchIn.pQpelLocationOfFeature[iFeatureOfRef];
964
965 SMVUnitXY sBestMv;
966 uint32_t uiBestCost, uiTmpCost;
967 uint8_t* pBestRef, *pCurRef;
968 int32_t iQpelX, iQpelY;
969 int32_t iIntepelX, iIntepelY;
970 int32_t i;
971
972 sBestMv.iMvX = pFeatureSearchOut->sBestMv.iMvX;
973 sBestMv.iMvY = pFeatureSearchOut->sBestMv.iMvY;
974 uiBestCost = pFeatureSearchOut->uiBestSadCost;
975 pBestRef = pFeatureSearchOut->pBestRef;
976
977 for (i = 0; i < iSearchTimesx2; i += 2) {
978 iQpelX = pQpelPosition[i];
979 iQpelY = pQpelPosition[i + 1];
980
981 if ((iQpelX > iMaxQpelX) || (iQpelX < iMinQpelX)
982 || (iQpelY > iMaxQpelY) || (iQpelY < iMinQpelY)
983 || (iQpelX == iCurPixXQpel) || (iQpelY == iCurPixYQpel))
984 continue;
985
986 uiTmpCost = sFeatureSearchIn.pMvdCostX[ iQpelX ] + sFeatureSearchIn.pMvdCostY[ iQpelY ];
987 if (uiTmpCost + iFeatureDifference >= uiBestCost)
988 continue;
989
990 iIntepelX = (iQpelX >> 2) - iCurPixX;
991 iIntepelY = (iQpelY >> 2) - iCurPixY;
992 pCurRef = &pColoRef[iIntepelX + iIntepelY * iRefStride];
993 uiTmpCost += pSad (pEnc, iEncStride, pCurRef, iRefStride);
994 if (uiTmpCost < uiBestCost) {
995 sBestMv.iMvX = iIntepelX;
996 sBestMv.iMvY = iIntepelY;
997 uiBestCost = uiTmpCost;
998 pBestRef = pCurRef;
999
1000 if (uiBestCost < uiSadCostThresh)
1001 break;
1002 }
1003 }
1004 SaveFeatureSearchOut (sBestMv, uiBestCost, pBestRef, pFeatureSearchOut);
1005 return (i < iSearchTimesx2);
1006 }
1007
1008
MotionEstimateFeatureFullSearch(SFeatureSearchIn & sFeatureSearchIn,const uint32_t kuiMaxSearchPoint,SWelsME * pMe)1009 void MotionEstimateFeatureFullSearch (SFeatureSearchIn& sFeatureSearchIn,
1010 const uint32_t kuiMaxSearchPoint,
1011 SWelsME* pMe) {
1012 SFeatureSearchOut sFeatureSearchOut = { { 0 } };//TODO: this can be refactored and removed
1013 sFeatureSearchOut.uiBestSadCost = pMe->uiSadCost;
1014 sFeatureSearchOut.sBestMv = pMe->sMv;
1015 sFeatureSearchOut.pBestRef = pMe->pRefMb;
1016
1017 int32_t iFeatureDifference = 0;//TODO: change it according to computational-complexity setting when needed
1018 FeatureSearchOne (sFeatureSearchIn, iFeatureDifference, kuiMaxSearchPoint, &sFeatureSearchOut);
1019 if (sFeatureSearchOut.uiBestSadCost < pMe->uiSadCost) { //TODO: this may be refactored and removed
1020 UpdateMeResults (sFeatureSearchOut.sBestMv,
1021 sFeatureSearchOut.uiBestSadCost, sFeatureSearchOut.pBestRef,
1022 pMe);
1023 }
1024 }
1025
1026 //switch related
CountFMECostDown(const SDqLayer * pCurLayer)1027 static uint32_t CountFMECostDown (const SDqLayer* pCurLayer) {
1028 uint32_t uiCostDownSum = 0;
1029 const int32_t kiSliceCount = GetCurrentSliceNum (pCurLayer);
1030 if (kiSliceCount >= 1) {
1031 int32_t iSliceIndex = 0;
1032 SSlice* pSlice = pCurLayer->ppSliceInLayer[iSliceIndex];
1033 while (iSliceIndex < kiSliceCount) {
1034 pSlice = pCurLayer->ppSliceInLayer[iSliceIndex];
1035 uiCostDownSum += pSlice->uiSliceFMECostDown;
1036 ++ iSliceIndex;
1037 }
1038 }
1039 return uiCostDownSum;
1040 }
1041 #define FMESWITCH_MBAVERCOSTSAVING_THRESHOLD (2) //empirically set.
1042 #define FMESWITCH_GOODFRAMECOUNT_MAX (5) //empirically set.
UpdateFMEGoodFrameCount(const uint32_t iAvMBNormalizedRDcostDown,uint8_t & uiFMEGoodFrameCount)1043 static void UpdateFMEGoodFrameCount (const uint32_t iAvMBNormalizedRDcostDown, uint8_t& uiFMEGoodFrameCount) {
1044 //this strategy may be changed, here the number is derived from empirical-numbers
1045 // uiFMEGoodFrameCount lies in [0,FMESWITCH_GOODFRAMECOUNT_MAX]
1046 if (iAvMBNormalizedRDcostDown > FMESWITCH_MBAVERCOSTSAVING_THRESHOLD) {
1047 if (uiFMEGoodFrameCount < FMESWITCH_GOODFRAMECOUNT_MAX)
1048 ++ uiFMEGoodFrameCount;
1049 } else {
1050 if (uiFMEGoodFrameCount > 0)
1051 -- uiFMEGoodFrameCount;
1052 }
1053 }
UpdateFMESwitch(SDqLayer * pCurLayer)1054 void UpdateFMESwitch (SDqLayer* pCurLayer) {
1055 const uint32_t iFMECost = CountFMECostDown (pCurLayer);
1056 const uint32_t iAvMBNormalizedRDcostDown = iFMECost / (pCurLayer->iMbWidth * pCurLayer->iMbHeight);
1057 UpdateFMEGoodFrameCount (iAvMBNormalizedRDcostDown, pCurLayer->pFeatureSearchPreparation->uiFMEGoodFrameCount);
1058 }
UpdateFMESwitchNull(SDqLayer * pCurLayer)1059 void UpdateFMESwitchNull (SDqLayer* pCurLayer) {
1060 }
1061 /////////////////////////
1062 // Search function options
1063 /////////////////////////
WelsDiamondCrossSearch(SWelsFuncPtrList * pFunc,SWelsME * pMe,SSlice * pSlice,const int32_t kiEncStride,const int32_t kiRefStride)1064 void WelsDiamondCrossSearch (SWelsFuncPtrList* pFunc, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride,
1065 const int32_t kiRefStride) {
1066 // Step 1: diamond search
1067 WelsDiamondSearch (pFunc, pMe, pSlice, kiEncStride, kiRefStride);
1068
1069 // Step 2: CROSS search
1070 pMe->uiSadCostThreshold = pMe->pRefFeatureStorage->uiSadCostThreshold[pMe->uiBlockSize];
1071 if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
1072 WelsMotionCrossSearch (pFunc, pMe, pSlice, kiEncStride, kiRefStride);
1073 }
1074 }
WelsDiamondCrossFeatureSearch(SWelsFuncPtrList * pFunc,SWelsME * pMe,SSlice * pSlice,const int32_t kiEncStride,const int32_t kiRefStride)1075 void WelsDiamondCrossFeatureSearch (SWelsFuncPtrList* pFunc, SWelsME* pMe, SSlice* pSlice, const int32_t kiEncStride,
1076 const int32_t kiRefStride) {
1077 // Step 1: diamond search + cross
1078 WelsDiamondCrossSearch (pFunc, pMe, pSlice, kiEncStride, kiRefStride);
1079
1080 // Step 2: FeatureSearch
1081 if (pMe->uiSadCost >= pMe->uiSadCostThreshold) {
1082 pSlice->uiSliceFMECostDown += pMe->uiSadCost;
1083
1084 uint32_t uiMaxSearchPoint = INT_MAX;//TODO: change it according to computational-complexity setting
1085 SFeatureSearchIn sFeatureSearchIn = {0};
1086 if (SetFeatureSearchIn (pFunc, *pMe, pSlice, pMe->pRefFeatureStorage,
1087 kiEncStride, kiRefStride,
1088 &sFeatureSearchIn)) {
1089 MotionEstimateFeatureFullSearch (sFeatureSearchIn, uiMaxSearchPoint, pMe);
1090 }
1091 pSlice->uiSliceFMECostDown -= pMe->uiSadCost;
1092 }
1093 }
1094
1095
1096 } // namespace WelsEnc
1097
1098