• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*!
2  * \copy
3  *     Copyright (c)  2009-2013, Cisco Systems
4  *     All rights reserved.
5  *
6  *     Redistribution and use in source and binary forms, with or without
7  *     modification, are permitted provided that the following conditions
8  *     are met:
9  *
10  *        * Redistributions of source code must retain the above copyright
11  *          notice, this list of conditions and the following disclaimer.
12  *
13  *        * Redistributions in binary form must reproduce the above copyright
14  *          notice, this list of conditions and the following disclaimer in
15  *          the documentation and/or other materials provided with the
16  *          distribution.
17  *
18  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21  *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22  *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23  *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24  *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25  *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26  *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  *     POSSIBILITY OF SUCH DAMAGE.
30  *
31  *
32  * \file    sample.c
33  *
34  * \brief   compute SAD and SATD
35  *
36  * \date    2009.06.02 Created
37  *
38  *************************************************************************************
39  */
40 
41 #include "sample.h"
42 #include "sad_common.h"
43 #include "intra_pred_common.h"
44 #include "mc.h"
45 #include "cpu_core.h"
46 
47 namespace WelsEnc {
WelsSampleSatd4x4_c(uint8_t * pSample1,int32_t iStride1,uint8_t * pSample2,int32_t iStride2)48 int32_t WelsSampleSatd4x4_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) {
49   int32_t iSatdSum = 0;
50   int32_t pSampleMix[4][4] = {{ 0 }};
51   int32_t iSample0, iSample1, iSample2, iSample3;
52   int32_t i = 0;
53   uint8_t* pSrc1 = pSample1;
54   uint8_t* pSrc2 = pSample2;
55 
56   //step 1: get the difference
57   for (i = 0; i < 4; i++) {
58     pSampleMix[i][0] = pSrc1[0] - pSrc2[0];
59     pSampleMix[i][1] = pSrc1[1] - pSrc2[1];
60     pSampleMix[i][2] = pSrc1[2] - pSrc2[2];
61     pSampleMix[i][3] = pSrc1[3] - pSrc2[3];
62 
63     pSrc1 += iStride1;
64     pSrc2 += iStride2;
65   }
66 
67   //step 2: horizontal transform
68   for (i = 0; i < 4; i++) {
69     iSample0 = pSampleMix[i][0] + pSampleMix[i][2];
70     iSample1 = pSampleMix[i][1] + pSampleMix[i][3];
71     iSample2 = pSampleMix[i][0] - pSampleMix[i][2];
72     iSample3 = pSampleMix[i][1] - pSampleMix[i][3];
73 
74     pSampleMix[i][0] = iSample0 + iSample1;
75     pSampleMix[i][1] = iSample2 + iSample3;
76     pSampleMix[i][2] = iSample2 - iSample3;
77     pSampleMix[i][3] = iSample0 - iSample1;
78   }
79 
80   //step 3: vertical transform and get the sum of SATD
81   for (i = 0; i < 4; i++) {
82     iSample0 = pSampleMix[0][i] + pSampleMix[2][i];
83     iSample1 = pSampleMix[1][i] + pSampleMix[3][i];
84     iSample2 = pSampleMix[0][i] - pSampleMix[2][i];
85     iSample3 = pSampleMix[1][i] - pSampleMix[3][i];
86 
87     pSampleMix[0][i] = iSample0 + iSample1;
88     pSampleMix[1][i] = iSample2 + iSample3;
89     pSampleMix[2][i] = iSample2 - iSample3;
90     pSampleMix[3][i] = iSample0 - iSample1;
91 
92     iSatdSum += (WELS_ABS (pSampleMix[0][i]) + WELS_ABS (pSampleMix[1][i]) + WELS_ABS (pSampleMix[2][i]) + WELS_ABS (
93                    pSampleMix[3][i]));
94   }
95 
96   return ((iSatdSum + 1) >> 1);
97 }
98 
WelsSampleSatd8x4_c(uint8_t * pSample1,int32_t iStride1,uint8_t * pSample2,int32_t iStride2)99 int32_t WelsSampleSatd8x4_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) {
100   int32_t iSatdSum = 0;
101   iSatdSum += WelsSampleSatd4x4_c (pSample1,   iStride1, pSample2,   iStride2);
102   iSatdSum += WelsSampleSatd4x4_c (pSample1 + 4, iStride1, pSample2 + 4, iStride2);
103   return iSatdSum;
104 }
105 
WelsSampleSatd4x8_c(uint8_t * pSample1,int32_t iStride1,uint8_t * pSample2,int32_t iStride2)106 int32_t WelsSampleSatd4x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) {
107   int32_t iSatdSum = 0;
108   iSatdSum += WelsSampleSatd4x4_c (pSample1,                   iStride1, pSample2,                   iStride2);
109   iSatdSum += WelsSampleSatd4x4_c (pSample1 + (iStride1 << 2), iStride1, pSample2 + (iStride2 << 2), iStride2);
110   return iSatdSum;
111 }
112 
WelsSampleSatd8x8_c(uint8_t * pSample1,int32_t iStride1,uint8_t * pSample2,int32_t iStride2)113 int32_t WelsSampleSatd8x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) {
114   int32_t iSatdSum = 0;
115 
116   iSatdSum += WelsSampleSatd4x4_c (pSample1,                     iStride1, pSample2,                     iStride2);
117   iSatdSum += WelsSampleSatd4x4_c (pSample1 + 4,                   iStride1, pSample2 + 4,                   iStride2);
118   iSatdSum += WelsSampleSatd4x4_c (pSample1 + (iStride1 << 2),   iStride1, pSample2 + (iStride2 << 2),   iStride2);
119   iSatdSum += WelsSampleSatd4x4_c (pSample1 + (iStride1 << 2) + 4, iStride1, pSample2 + (iStride2 << 2) + 4, iStride2);
120 
121   return iSatdSum;
122 }
WelsSampleSatd16x8_c(uint8_t * pSample1,int32_t iStride1,uint8_t * pSample2,int32_t iStride2)123 int32_t WelsSampleSatd16x8_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) {
124   int32_t iSatdSum = 0;
125 
126   iSatdSum += WelsSampleSatd8x8_c (pSample1,   iStride1, pSample2,   iStride2);
127   iSatdSum += WelsSampleSatd8x8_c (pSample1 + 8, iStride1, pSample2 + 8, iStride2);
128 
129   return iSatdSum;
130 }
WelsSampleSatd8x16_c(uint8_t * pSample1,int32_t iStride1,uint8_t * pSample2,int32_t iStride2)131 int32_t WelsSampleSatd8x16_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) {
132   int32_t iSatdSum = 0;
133 
134   iSatdSum += WelsSampleSatd8x8_c (pSample1,                   iStride1, pSample2,                   iStride2);
135   iSatdSum += WelsSampleSatd8x8_c (pSample1 + (iStride1 << 3), iStride1, pSample2 + (iStride2 << 3), iStride2);
136 
137   return iSatdSum;
138 }
WelsSampleSatd16x16_c(uint8_t * pSample1,int32_t iStride1,uint8_t * pSample2,int32_t iStride2)139 int32_t WelsSampleSatd16x16_c (uint8_t* pSample1, int32_t iStride1, uint8_t* pSample2, int32_t iStride2) {
140   int32_t iSatdSum = 0;
141 
142   iSatdSum += WelsSampleSatd8x8_c (pSample1,                     iStride1, pSample2,                     iStride2);
143   iSatdSum += WelsSampleSatd8x8_c (pSample1 + 8,                   iStride1, pSample2 + 8,                   iStride2);
144   iSatdSum += WelsSampleSatd8x8_c (pSample1 + (iStride1 << 3),   iStride1, pSample2 + (iStride2 << 3),   iStride2);
145   iSatdSum += WelsSampleSatd8x8_c (pSample1 + (iStride1 << 3) + 8, iStride1, pSample2 + (iStride2 << 3) + 8, iStride2);
146 
147   return iSatdSum;
148 }
149 
150 
151 extern void WelsI4x4LumaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
152 extern void WelsI4x4LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
153 extern void WelsI4x4LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
154 
WelsSampleSatdIntra4x4Combined3_c(uint8_t * pDec,int32_t iDecStride,uint8_t * pEnc,int32_t iEncStride,uint8_t * pDst,int32_t * pBestMode,int32_t iLambda2,int32_t iLambda1,int32_t iLambda0)155 int32_t WelsSampleSatdIntra4x4Combined3_c (uint8_t* pDec, int32_t iDecStride, uint8_t* pEnc, int32_t iEncStride,
156     uint8_t* pDst,
157     int32_t* pBestMode, int32_t iLambda2, int32_t iLambda1, int32_t iLambda0) {
158   int32_t iBestMode = -1;
159   int32_t iCurCost, iBestCost = INT_MAX;
160   ENFORCE_STACK_ALIGN_2D (uint8_t, uiLocalBuffer, 3, 16, 16)
161 
162   WelsI4x4LumaPredDc_c (uiLocalBuffer[2], pDec, iDecStride);
163   iCurCost = WelsSampleSatd4x4_c (uiLocalBuffer[2], 4, pEnc, iEncStride) + iLambda2;
164   if (iCurCost < iBestCost) {
165     iBestMode = 2;
166     iBestCost = iCurCost;
167   }
168 
169   WelsI4x4LumaPredH_c (uiLocalBuffer[1], pDec, iDecStride);
170   iCurCost = WelsSampleSatd4x4_c (uiLocalBuffer[1], 4, pEnc, iEncStride) + iLambda1;
171   if (iCurCost < iBestCost) {
172     iBestMode = 1;
173     iBestCost = iCurCost;
174   }
175   WelsI4x4LumaPredV_c (uiLocalBuffer[0], pDec, iDecStride);
176   iCurCost = WelsSampleSatd4x4_c (uiLocalBuffer[0], 4, pEnc, iEncStride) + iLambda0;
177   if (iCurCost < iBestCost) {
178     iBestMode = 0;
179     iBestCost = iCurCost;
180   }
181 
182   memcpy (pDst, uiLocalBuffer[iBestMode], 16 * sizeof (uint8_t)); // confirmed_safe_unsafe_usage
183   *pBestMode = iBestMode;
184 
185   return iBestCost;
186 }
187 extern void WelsIChromaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
188 extern void WelsIChromaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
189 extern void WelsIChromaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
190 
WelsSampleSatdIntra8x8Combined3_c(uint8_t * pDecCb,int32_t iDecStride,uint8_t * pEncCb,int32_t iEncStride,int32_t * pBestMode,int32_t iLambda,uint8_t * pDstChroma,uint8_t * pDecCr,uint8_t * pEncCr)191 int32_t WelsSampleSatdIntra8x8Combined3_c (uint8_t* pDecCb, int32_t iDecStride, uint8_t* pEncCb, int32_t iEncStride,
192     int32_t* pBestMode, int32_t iLambda, uint8_t* pDstChroma, uint8_t* pDecCr, uint8_t* pEncCr) {
193   int32_t iBestMode = -1;
194   int32_t iCurCost, iBestCost = INT_MAX;
195 
196   WelsIChromaPredV_c (pDstChroma, pDecCb, iDecStride);
197   WelsIChromaPredV_c (pDstChroma + 64, pDecCr, iDecStride);
198   iCurCost = WelsSampleSatd8x8_c (pDstChroma, 8, pEncCb, iEncStride);
199   iCurCost += WelsSampleSatd8x8_c (pDstChroma + 64, 8, pEncCr, iEncStride) + iLambda * 2;
200 
201   if (iCurCost < iBestCost) {
202     iBestMode = 2;
203     iBestCost = iCurCost;
204   }
205 
206   WelsIChromaPredH_c (pDstChroma, pDecCb, iDecStride);
207   WelsIChromaPredH_c (pDstChroma + 64, pDecCr, iDecStride);
208   iCurCost = WelsSampleSatd8x8_c (pDstChroma, 8, pEncCb, iEncStride);
209   iCurCost += WelsSampleSatd8x8_c (pDstChroma + 64, 8, pEncCr, iEncStride) + iLambda * 2;
210   if (iCurCost < iBestCost) {
211     iBestMode = 1;
212     iBestCost = iCurCost;
213   }
214   WelsIChromaPredDc_c (pDstChroma, pDecCb, iDecStride);
215   WelsIChromaPredDc_c (pDstChroma + 64, pDecCr, iDecStride);
216   iCurCost = WelsSampleSatd8x8_c (pDstChroma, 8, pEncCb, iEncStride);
217   iCurCost += WelsSampleSatd8x8_c (pDstChroma + 64, 8, pEncCr, iEncStride);
218   if (iCurCost < iBestCost) {
219     iBestMode = 0;
220     iBestCost = iCurCost;
221   }
222 
223   *pBestMode = iBestMode;
224 
225   return iBestCost;
226 
227 
228 }
WelsSampleSadIntra8x8Combined3_c(uint8_t * pDecCb,int32_t iDecStride,uint8_t * pEncCb,int32_t iEncStride,int32_t * pBestMode,int32_t iLambda,uint8_t * pDstChroma,uint8_t * pDecCr,uint8_t * pEncCr)229 int32_t WelsSampleSadIntra8x8Combined3_c (uint8_t* pDecCb, int32_t iDecStride, uint8_t* pEncCb, int32_t iEncStride,
230     int32_t* pBestMode, int32_t iLambda, uint8_t* pDstChroma, uint8_t* pDecCr, uint8_t* pEncCr) {
231   int32_t iBestMode = -1;
232   int32_t iCurCost, iBestCost = INT_MAX;
233 
234   WelsIChromaPredV_c (pDstChroma, pDecCb, iDecStride);
235   WelsIChromaPredV_c (pDstChroma + 64, pDecCr, iDecStride);
236   iCurCost = WelsSampleSad8x8_c (pDstChroma, 8, pEncCb, iEncStride);
237   iCurCost += WelsSampleSad8x8_c (pDstChroma + 64, 8, pEncCr, iEncStride) + iLambda * 2;
238 
239   if (iCurCost < iBestCost) {
240     iBestMode = 2;
241     iBestCost = iCurCost;
242   }
243 
244   WelsIChromaPredH_c (pDstChroma, pDecCb, iDecStride);
245   WelsIChromaPredH_c (pDstChroma + 64, pDecCr, iDecStride);
246   iCurCost = WelsSampleSad8x8_c (pDstChroma, 8, pEncCb, iEncStride);
247   iCurCost += WelsSampleSad8x8_c (pDstChroma + 64, 8, pEncCr, iEncStride) + iLambda * 2;
248   if (iCurCost < iBestCost) {
249     iBestMode = 1;
250     iBestCost = iCurCost;
251   }
252   WelsIChromaPredDc_c (pDstChroma, pDecCb, iDecStride);
253   WelsIChromaPredDc_c (pDstChroma + 64, pDecCr, iDecStride);
254   iCurCost = WelsSampleSad8x8_c (pDstChroma, 8, pEncCb, iEncStride);
255   iCurCost += WelsSampleSad8x8_c (pDstChroma + 64, 8, pEncCr, iEncStride);
256   if (iCurCost < iBestCost) {
257     iBestMode = 0;
258     iBestCost = iCurCost;
259   }
260 
261   *pBestMode = iBestMode;
262 
263   return iBestCost;
264 
265 }
266 
267 extern void WelsI16x16LumaPredDc_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
268 //extern void WelsI16x16LumaPredH_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
269 //extern void WelsI16x16LumaPredV_c (uint8_t* pPred, uint8_t* pRef, const int32_t iStride);
270 
WelsSampleSatdIntra16x16Combined3_c(uint8_t * pDec,int32_t iDecStride,uint8_t * pEnc,int32_t iEncStride,int32_t * pBestMode,int32_t iLambda,uint8_t * pDst)271 int32_t WelsSampleSatdIntra16x16Combined3_c (uint8_t* pDec, int32_t iDecStride, uint8_t* pEnc, int32_t iEncStride,
272     int32_t* pBestMode, int32_t iLambda, uint8_t* pDst) {
273   int32_t iBestMode = -1;
274   int32_t iCurCost, iBestCost = INT_MAX;
275 
276   WelsI16x16LumaPredV_c (pDst, pDec, iDecStride);
277   iCurCost = WelsSampleSatd16x16_c (pDst, 16, pEnc, iEncStride);
278 
279   if (iCurCost < iBestCost) {
280     iBestMode = 0;
281     iBestCost = iCurCost;
282   }
283 
284   WelsI16x16LumaPredH_c (pDst, pDec, iDecStride);
285   iCurCost = WelsSampleSatd16x16_c (pDst, 16, pEnc, iEncStride) + iLambda * 2;
286   if (iCurCost < iBestCost) {
287     iBestMode = 1;
288     iBestCost = iCurCost;
289   }
290   WelsI16x16LumaPredDc_c (pDst, pDec, iDecStride);
291   iCurCost = WelsSampleSatd16x16_c (pDst, 16, pEnc, iEncStride) + iLambda * 2;
292   if (iCurCost < iBestCost) {
293     iBestMode = 2;
294     iBestCost = iCurCost;
295   }
296 
297   *pBestMode = iBestMode;
298 
299   return iBestCost;
300 
301 
302 }
WelsSampleSadIntra16x16Combined3_c(uint8_t * pDec,int32_t iDecStride,uint8_t * pEnc,int32_t iEncStride,int32_t * pBestMode,int32_t iLambda,uint8_t * pDst)303 int32_t WelsSampleSadIntra16x16Combined3_c (uint8_t* pDec, int32_t iDecStride, uint8_t* pEnc, int32_t iEncStride,
304     int32_t* pBestMode, int32_t iLambda, uint8_t* pDst) {
305   int32_t iBestMode = -1;
306   int32_t iCurCost, iBestCost = INT_MAX;
307 
308   WelsI16x16LumaPredV_c (pDst, pDec, iDecStride);
309   iCurCost = WelsSampleSad16x16_c (pDst, 16, pEnc, iEncStride);
310 
311   if (iCurCost < iBestCost) {
312     iBestMode = 0;
313     iBestCost = iCurCost;
314   }
315 
316   WelsI16x16LumaPredH_c (pDst, pDec, iDecStride);
317   iCurCost = WelsSampleSad16x16_c (pDst, 16, pEnc, iEncStride) + iLambda * 2;
318   if (iCurCost < iBestCost) {
319     iBestMode = 1;
320     iBestCost = iCurCost;
321   }
322   WelsI16x16LumaPredDc_c (pDst, pDec, iDecStride);
323   iCurCost = WelsSampleSad16x16_c (pDst, 16, pEnc, iEncStride) + iLambda * 2;
324   if (iCurCost < iBestCost) {
325     iBestMode = 2;
326     iBestCost = iCurCost;
327   }
328 
329   *pBestMode = iBestMode;
330 
331   return iBestCost;
332 
333 
334 }
335 
WelsInitSampleSadFunc(SWelsFuncPtrList * pFuncList,uint32_t uiCpuFlag)336 void WelsInitSampleSadFunc (SWelsFuncPtrList* pFuncList, uint32_t uiCpuFlag) {
337   //pfSampleSad init
338   pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] = WelsSampleSad16x16_c;
339   pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x8 ] = WelsSampleSad16x8_c;
340   pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x16 ] = WelsSampleSad8x16_c;
341   pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8  ] = WelsSampleSad8x8_c;
342   pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x4  ] = WelsSampleSad4x4_c;
343   pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x4  ] = WelsSampleSad8x4_c;
344   pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x8  ] = WelsSampleSad4x8_c;
345 
346   //pfSampleSatd init
347   pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_c;
348   pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8 ] = WelsSampleSatd16x8_c;
349   pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16 ] = WelsSampleSatd8x16_c;
350   pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8  ] = WelsSampleSatd8x8_c;
351   pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4  ] = WelsSampleSatd4x4_c;
352   pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x4  ] = WelsSampleSatd8x4_c;
353   pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x8  ] = WelsSampleSatd4x8_c;
354 
355   pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x16] = WelsSampleSadFour16x16_c;
356   pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x8] = WelsSampleSadFour16x8_c;
357   pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x16] = WelsSampleSadFour8x16_c;
358   pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_c;
359   pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_4x4] = WelsSampleSadFour4x4_c;
360   pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x4] = WelsSampleSadFour8x4_c;
361   pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_4x8] = WelsSampleSadFour4x8_c;
362 
363   pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd   = NULL;
364   pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd   = NULL;
365   pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Sad    = NULL;
366   pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd = NULL;
367   pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad  = NULL;
368 
369 #if defined (X86_ASM)
370   if (uiCpuFlag & WELS_CPU_MMXEXT) {
371     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x4  ] = WelsSampleSad4x4_mmx;
372   }
373 
374   if (uiCpuFlag & WELS_CPU_SSE2) {
375     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] = WelsSampleSad16x16_sse2;
376     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x8 ] = WelsSampleSad16x8_sse2;
377     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x16] = WelsSampleSad8x16_sse2;
378     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] = WelsSampleSad8x8_sse21;
379 
380     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x16] = WelsSampleSadFour16x16_sse2;
381     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x8] = WelsSampleSadFour16x8_sse2;
382     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x16] = WelsSampleSadFour8x16_sse2;
383     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_sse2;
384     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_4x4] = WelsSampleSadFour4x4_sse2;
385 
386     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4  ] = WelsSampleSatd4x4_sse2;
387     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8  ] = WelsSampleSatd8x8_sse2;
388     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16 ] = WelsSampleSatd8x16_sse2;
389     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8 ] = WelsSampleSatd16x8_sse2;
390     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_sse2;
391     pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd = WelsSampleSatdThree4x4_sse2;
392   }
393 
394   if (uiCpuFlag & WELS_CPU_SSSE3) {
395     pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad = WelsIntra16x16Combined3Sad_ssse3;
396   }
397 
398   if (uiCpuFlag & WELS_CPU_SSE41) {
399     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_sse41;
400     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8] = WelsSampleSatd16x8_sse41;
401     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16] = WelsSampleSatd8x16_sse41;
402     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8] = WelsSampleSatd8x8_sse41;
403     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4] = WelsSampleSatd4x4_sse41;
404     pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd = WelsIntra16x16Combined3Satd_sse41;
405     pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd = WelsIntraChroma8x8Combined3Satd_sse41;
406   }
407 #if defined(HAVE_AVX2)
408   if (uiCpuFlag & WELS_CPU_AVX2) {
409     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_avx2;
410     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8]  = WelsSampleSatd16x8_avx2;
411     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16]  = WelsSampleSatd8x16_avx2;
412     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8]   = WelsSampleSatd8x8_avx2;
413   }
414 #endif
415 #endif //(X86_ASM)
416 
417 #if defined (HAVE_NEON)
418   if (uiCpuFlag & WELS_CPU_NEON) {
419     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x4  ] = WelsSampleSad4x4_neon;
420     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] = WelsSampleSad16x16_neon;
421     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x8 ] = WelsSampleSad16x8_neon;
422     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x16] = WelsSampleSad8x16_neon;
423     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] = WelsSampleSad8x8_neon;
424 
425     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x16] = WelsSampleSadFour16x16_neon;
426     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x8] = WelsSampleSadFour16x8_neon;
427     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x16] = WelsSampleSadFour8x16_neon;
428     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_neon;
429     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_4x4] = WelsSampleSadFour4x4_neon;
430 
431     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4  ] = WelsSampleSatd4x4_neon;
432     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8  ] = WelsSampleSatd8x8_neon;
433     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16 ] = WelsSampleSatd8x16_neon;
434     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8 ] = WelsSampleSatd16x8_neon;
435     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_neon;
436 
437     pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd   = WelsIntra4x4Combined3Satd_neon;
438     pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd   = WelsIntra8x8Combined3Satd_neon;
439     pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Sad    = WelsIntra8x8Combined3Sad_neon;
440     pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd = WelsIntra16x16Combined3Satd_neon;
441     pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad  = WelsIntra16x16Combined3Sad_neon;
442   }
443 #endif
444 
445 #if defined (HAVE_NEON_AARCH64)
446   if (uiCpuFlag & WELS_CPU_NEON) {
447     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x4  ] = WelsSampleSad4x4_AArch64_neon;
448     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] = WelsSampleSad16x16_AArch64_neon;
449     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x8 ] = WelsSampleSad16x8_AArch64_neon;
450     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x16] = WelsSampleSad8x16_AArch64_neon;
451     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] = WelsSampleSad8x8_AArch64_neon;
452 
453     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x16] = WelsSampleSadFour16x16_AArch64_neon;
454     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x8] = WelsSampleSadFour16x8_AArch64_neon;
455     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x16] = WelsSampleSadFour8x16_AArch64_neon;
456     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_AArch64_neon;
457     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_4x4] = WelsSampleSadFour4x4_AArch64_neon;
458 
459     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4  ] = WelsSampleSatd4x4_AArch64_neon;
460     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8  ] = WelsSampleSatd8x8_AArch64_neon;
461     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16 ] = WelsSampleSatd8x16_AArch64_neon;
462     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8 ] = WelsSampleSatd16x8_AArch64_neon;
463     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_AArch64_neon;
464 
465     pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd   = WelsIntra4x4Combined3Satd_AArch64_neon;
466     pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd   = WelsIntra8x8Combined3Satd_AArch64_neon;
467     pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Sad    = WelsIntra8x8Combined3Sad_AArch64_neon;
468     pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd = WelsIntra16x16Combined3Satd_AArch64_neon;
469     pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad  = WelsIntra16x16Combined3Sad_AArch64_neon;
470   }
471 #endif
472 
473 #if defined (HAVE_MMI)
474   if (uiCpuFlag & WELS_CPU_MMI) {
475     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] = WelsSampleSad16x16_mmi;
476     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x8 ] = WelsSampleSad16x8_mmi;
477     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x16] = WelsSampleSad8x16_mmi;
478     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] = WelsSampleSad8x8_mmi;
479     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x4  ] = WelsSampleSad4x4_mmi;
480 
481     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4  ] = WelsSampleSatd4x4_mmi;
482     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8  ] = WelsSampleSatd8x8_mmi;
483     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16 ] = WelsSampleSatd8x16_mmi;
484     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8 ] = WelsSampleSatd16x8_mmi;
485     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_mmi;
486 
487     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x16] = WelsSampleSadFour16x16_mmi;
488     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x8] = WelsSampleSadFour16x8_mmi;
489     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x16] = WelsSampleSadFour8x16_mmi;
490     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_mmi;
491   }
492 #endif//HAVE_MMI
493 
494 #if defined (HAVE_LASX)
495   if (uiCpuFlag & WELS_CPU_LASX) {
496     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_4x4] = WelsSampleSad4x4_lasx;
497     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x8] = WelsSampleSad8x8_lasx;
498     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_8x16] = WelsSampleSad8x16_lasx;
499     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x8] = WelsSampleSad16x8_lasx;
500     pFuncList->sSampleDealingFuncs.pfSampleSad[BLOCK_16x16] = WelsSampleSad16x16_lasx;
501 
502     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x16] = WelsSampleSadFour16x16_lasx;
503     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_16x8] = WelsSampleSadFour16x8_lasx;
504     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x16] = WelsSampleSadFour8x16_lasx;
505     pFuncList->sSampleDealingFuncs.pfSample4Sad[BLOCK_8x8] = WelsSampleSadFour8x8_lasx;
506 
507     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_4x4] = WelsSampleSatd4x4_lasx;
508     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x8] = WelsSampleSatd8x8_lasx;
509     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_8x16] = WelsSampleSatd8x16_lasx;
510     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x8] = WelsSampleSatd16x8_lasx;
511     pFuncList->sSampleDealingFuncs.pfSampleSatd[BLOCK_16x16] = WelsSampleSatd16x16_lasx;
512   }
513 #endif
514 }
515 
516 } // namespace WelsEnc
517