1 #include <gtest/gtest.h>
2 #include <math.h>
3
4 #include "cpu_core.h"
5 #include "cpu.h"
6 #include "sample.h"
7 #include "sad_common.h"
8 #include "get_intra_predictor.h"
9
10 using namespace WelsEnc;
11
12 #define GENERATE_Intra16x16_UT(func, ref, ASM, CPUFLAGS) \
13 TEST (IntraSadSatdFuncTest, func) { \
14 const int32_t iLineSizeDec = 32; \
15 const int32_t iLineSizeEnc = 32; \
16 int32_t tmpa, tmpb; \
17 int32_t iBestMode_c, iBestMode_a, iLambda = 50; \
18 if (ASM) {\
19 int32_t iCpuCores = 0; \
20 uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
21 if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
22 return; \
23 } \
24 ENFORCE_STACK_ALIGN_1D (uint8_t, pDec, iLineSizeDec << 5, 16); \
25 ENFORCE_STACK_ALIGN_1D (uint8_t, pEnc, iLineSizeEnc << 5, 16); \
26 ENFORCE_STACK_ALIGN_1D (uint8_t, pDst, 512, 16); \
27 for (int i = 0; i < (iLineSizeDec << 5); i++) \
28 pDec[i] = rand() % 256; \
29 for (int i = 0; i < (iLineSizeEnc << 5); i++) \
30 pEnc[i] = rand() % 256; \
31 for (int i = 0; i < 512; i++) \
32 pDst[i] = rand() % 256; \
33 tmpa = ref (pDec + 128, iLineSizeDec, pEnc, iLineSizeEnc, &iBestMode_c, iLambda, pDst); \
34 tmpb = func (pDec + 128, iLineSizeDec, pEnc, iLineSizeEnc, &iBestMode_a, iLambda, pDst); \
35 ASSERT_EQ (tmpa, tmpb); \
36 ASSERT_EQ (iBestMode_c, iBestMode_a); \
37 }
38
39 #define GENERATE_Intra4x4_UT(func, ASM, CPUFLAGS) \
40 TEST (IntraSadSatdFuncTest, func) { \
41 const int32_t iLineSizeDec = 32; \
42 const int32_t iLineSizeEnc = 32; \
43 int32_t tmpa, tmpb; \
44 int32_t iBestMode_c, iBestMode_a, iLambda = 50; \
45 int32_t lambda[2] = {iLambda << 2, iLambda}; \
46 int32_t iPredMode = rand() % 3; \
47 if (ASM) {\
48 int32_t iCpuCores = 0; \
49 uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
50 if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
51 return; \
52 } \
53 ENFORCE_STACK_ALIGN_1D (uint8_t, pDec, iLineSizeDec << 5, 16); \
54 ENFORCE_STACK_ALIGN_1D (uint8_t, pEnc, iLineSizeEnc << 5, 16); \
55 ENFORCE_STACK_ALIGN_1D (uint8_t, pDst, 512, 16); \
56 for (int i = 0; i < (iLineSizeDec << 5); i++) \
57 pDec[i] = rand() % 256; \
58 for (int i = 0; i < (iLineSizeEnc << 5); i++) \
59 pEnc[i] = rand() % 256; \
60 for (int i = 0; i < 512; i++) \
61 pDst[i] = rand() % 256; \
62 tmpa = WelsSampleSatdIntra4x4Combined3_c (pDec + 128, iLineSizeDec, pEnc, iLineSizeEnc, pDst, &iBestMode_c, \
63 lambda[iPredMode == 2], lambda[iPredMode == 1], lambda[iPredMode == 0]); \
64 tmpb = func (pDec + 128, iLineSizeDec, pEnc, iLineSizeEnc, pDst, &iBestMode_a, \
65 lambda[iPredMode == 2], lambda[iPredMode == 1], lambda[iPredMode == 0]); \
66 ASSERT_EQ (tmpa, tmpb); \
67 ASSERT_EQ (iBestMode_c, iBestMode_a); \
68 }
69
70 #define GENERATE_Intra8x8_UT(func, ref, ASM, CPUFLAGS) \
71 TEST (IntraSadSatdFuncTest, func) { \
72 const int32_t iLineSizeDec = 32; \
73 const int32_t iLineSizeEnc = 32; \
74 int32_t tmpa, tmpb; \
75 int32_t iBestMode_c, iBestMode_a, iLambda = 50; \
76 if (ASM) {\
77 int32_t iCpuCores = 0; \
78 uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
79 if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
80 return; \
81 } \
82 ENFORCE_STACK_ALIGN_1D (uint8_t, pDecCb, iLineSizeDec << 5, 16); \
83 ENFORCE_STACK_ALIGN_1D (uint8_t, pEncCb, iLineSizeEnc << 5, 16); \
84 ENFORCE_STACK_ALIGN_1D (uint8_t, pDecCr, iLineSizeDec << 5, 16); \
85 ENFORCE_STACK_ALIGN_1D (uint8_t, pEncCr, iLineSizeEnc << 5, 16); \
86 ENFORCE_STACK_ALIGN_1D (uint8_t, pDstChma, 512, 16); \
87 for (int i = 0; i < (iLineSizeDec << 5); i++) { \
88 pDecCb[i] = rand() % 256; \
89 pDecCr[i] = rand() % 256; \
90 } \
91 for (int i = 0; i < (iLineSizeEnc << 5); i++) { \
92 pEncCb[i] = rand() % 256; \
93 pEncCr[i] = rand() % 256; \
94 } \
95 for (int i = 0; i < 512; i++) \
96 pDstChma[i] = rand() % 256; \
97 tmpa = ref (pDecCb + 128, iLineSizeDec, pEncCb, iLineSizeEnc, &iBestMode_c, iLambda, \
98 pDstChma, pDecCr + 128, pEncCr); \
99 tmpb = func (pDecCb + 128, iLineSizeDec, pEncCb, iLineSizeEnc, &iBestMode_a, iLambda, \
100 pDstChma, pDecCr + 128, pEncCr); \
101 ASSERT_EQ (tmpa, tmpb); \
102 ASSERT_EQ (iBestMode_c, iBestMode_a); \
103 }
104
105 #ifdef X86_ASM
106 GENERATE_Intra16x16_UT (WelsIntra16x16Combined3Sad_ssse3, WelsSampleSadIntra16x16Combined3_c, 1, WELS_CPU_SSSE3)
107 GENERATE_Intra16x16_UT (WelsIntra16x16Combined3Satd_sse41, WelsSampleSatdIntra16x16Combined3_c, 1, WELS_CPU_SSE41)
108 GENERATE_Intra8x8_UT (WelsIntraChroma8x8Combined3Satd_sse41, WelsSampleSatdIntra8x8Combined3_c, 1, WELS_CPU_SSE41)
109 GENERATE_Intra4x4_UT (WelsSampleSatdThree4x4_sse2, 1, WELS_CPU_SSE2)
110 #endif
111
112 #ifdef HAVE_NEON
113 GENERATE_Intra16x16_UT (WelsIntra16x16Combined3Sad_neon, WelsSampleSadIntra16x16Combined3_c, 1, WELS_CPU_NEON)
114 GENERATE_Intra16x16_UT (WelsIntra16x16Combined3Satd_neon, WelsSampleSatdIntra16x16Combined3_c, 1, WELS_CPU_NEON)
115 GENERATE_Intra8x8_UT (WelsIntra8x8Combined3Satd_neon, WelsSampleSatdIntra8x8Combined3_c, 1, WELS_CPU_NEON)
116 GENERATE_Intra8x8_UT (WelsIntra8x8Combined3Sad_neon, WelsSampleSadIntra8x8Combined3_c, 1, WELS_CPU_NEON)
117 GENERATE_Intra4x4_UT (WelsIntra4x4Combined3Satd_neon, 1, WELS_CPU_NEON)
118 #endif
119
120 #ifdef HAVE_NEON_AARCH64
121 GENERATE_Intra16x16_UT (WelsIntra16x16Combined3Sad_AArch64_neon, WelsSampleSadIntra16x16Combined3_c, 1, WELS_CPU_NEON)
122 GENERATE_Intra16x16_UT (WelsIntra16x16Combined3Satd_AArch64_neon, WelsSampleSatdIntra16x16Combined3_c, 1, WELS_CPU_NEON)
123 GENERATE_Intra8x8_UT (WelsIntra8x8Combined3Satd_AArch64_neon, WelsSampleSatdIntra8x8Combined3_c, 1, WELS_CPU_NEON)
124 GENERATE_Intra8x8_UT (WelsIntra8x8Combined3Sad_AArch64_neon, WelsSampleSadIntra8x8Combined3_c, 1, WELS_CPU_NEON)
125 GENERATE_Intra4x4_UT (WelsIntra4x4Combined3Satd_AArch64_neon, 1, WELS_CPU_NEON)
126 #endif
127
128 #define ASSERT_MEMORY_FAIL2X(A, B) \
129 if (NULL == B) { \
130 pMemAlign->WelsFree(A, "Sad_SrcA");\
131 ASSERT_TRUE(0); \
132 }
133
134 #define ASSERT_MEMORY_FAIL3X(A, B, C) \
135 if (NULL == C) { \
136 pMemAlign->WelsFree(A, "Sad_SrcA"); \
137 pMemAlign->WelsFree(B, "Sad_SrcB"); \
138 ASSERT_TRUE(0); \
139 }
140
141 #define PIXEL_STRIDE 32
142
143 class SadSatdCFuncTest : public testing::Test {
144 public:
SetUp()145 virtual void SetUp() {
146 pMemAlign = new CMemoryAlign (0);
147
148 m_iStrideA = rand() % 256 + PIXEL_STRIDE;
149 m_iStrideB = rand() % 256 + PIXEL_STRIDE;
150 m_pPixSrcA = (uint8_t*)pMemAlign->WelsMalloc (m_iStrideA << 5, "Sad_m_pPixSrcA");
151 ASSERT_TRUE (NULL != m_pPixSrcA);
152 m_pPixSrcB = (uint8_t*)pMemAlign->WelsMalloc (m_iStrideB << 5, "Sad_m_pPixSrcB");
153 ASSERT_MEMORY_FAIL2X (m_pPixSrcA, m_pPixSrcB)
154 m_pSad = (int32_t*)pMemAlign->WelsMalloc (4 * sizeof (int32_t), "m_pSad");
155 ASSERT_MEMORY_FAIL3X (m_pPixSrcA, m_pPixSrcB, m_pSad)
156 }
TearDown()157 virtual void TearDown() {
158 pMemAlign->WelsFree (m_pPixSrcA, "Sad_m_pPixSrcA");
159 pMemAlign->WelsFree (m_pPixSrcB, "Sad_m_pPixSrcB");
160 pMemAlign->WelsFree (m_pSad, "m_pSad");
161 delete pMemAlign;
162 }
163 public:
164 uint8_t* m_pPixSrcA;
165 uint8_t* m_pPixSrcB;
166 int32_t m_iStrideA;
167 int32_t m_iStrideB;
168 int32_t* m_pSad;
169
170 CMemoryAlign* pMemAlign;
171 };
172
TEST_F(SadSatdCFuncTest,WelsSampleSad4x4_c)173 TEST_F (SadSatdCFuncTest, WelsSampleSad4x4_c) {
174 for (int i = 0; i < (m_iStrideA << 2); i++)
175 m_pPixSrcA[i] = rand() % 256;
176 for (int i = 0; i < (m_iStrideB << 2); i++)
177 m_pPixSrcB[i] = rand() % 256;
178 uint8_t* pPixA = m_pPixSrcA;
179 uint8_t* pPixB = m_pPixSrcB;
180
181 int32_t iSumSad = 0;
182 for (int i = 0; i < 4; i++) {
183 for (int j = 0; j < 4; j++)
184 iSumSad += abs (pPixA[j] - pPixB[j]);
185 pPixA += m_iStrideA;
186 pPixB += m_iStrideB;
187 }
188 EXPECT_EQ (WelsSampleSad4x4_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
189 }
190
TEST_F(SadSatdCFuncTest,WelsSampleSad8x4_c)191 TEST_F (SadSatdCFuncTest, WelsSampleSad8x4_c) {
192 for (int i = 0; i < (m_iStrideA << 2); i++)
193 m_pPixSrcA[i] = rand() % 256;
194 for (int i = 0; i < (m_iStrideB << 2); i++)
195 m_pPixSrcB[i] = rand() % 256;
196 uint8_t* pPixA = m_pPixSrcA;
197 uint8_t* pPixB = m_pPixSrcB;
198
199 int32_t iSumSad = 0;
200 for (int i = 0; i < 4; i++) {
201 for (int j = 0; j < 8; j++)
202 iSumSad += abs (pPixA[j] - pPixB[j]);
203 pPixA += m_iStrideA;
204 pPixB += m_iStrideB;
205 }
206 EXPECT_EQ (WelsSampleSad8x4_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
207 }
208
TEST_F(SadSatdCFuncTest,WelsSampleSad4x8_c)209 TEST_F (SadSatdCFuncTest, WelsSampleSad4x8_c) {
210 for (int i = 0; i < (m_iStrideA << 3); i++)
211 m_pPixSrcA[i] = rand() % 256;
212 for (int i = 0; i < (m_iStrideB << 3); i++)
213 m_pPixSrcB[i] = rand() % 256;
214 uint8_t* pPixA = m_pPixSrcA;
215 uint8_t* pPixB = m_pPixSrcB;
216
217 int32_t iSumSad = 0;
218 for (int i = 0; i < 8; i++) {
219 for (int j = 0; j < 4; j++)
220 iSumSad += abs (pPixA[j] - pPixB[j]);
221 pPixA += m_iStrideA;
222 pPixB += m_iStrideB;
223 }
224 EXPECT_EQ (WelsSampleSad4x8_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
225 }
226
TEST_F(SadSatdCFuncTest,WelsSampleSad8x8_c)227 TEST_F (SadSatdCFuncTest, WelsSampleSad8x8_c) {
228 for (int i = 0; i < (m_iStrideA << 3); i++)
229 m_pPixSrcA[i] = rand() % 256;
230 for (int i = 0; i < (m_iStrideB << 3); i++)
231 m_pPixSrcB[i] = rand() % 256;
232 uint8_t* pPixA = m_pPixSrcA;
233 uint8_t* pPixB = m_pPixSrcB;
234
235 int32_t iSumSad = 0;
236 for (int i = 0; i < 8; i++) {
237 for (int j = 0; j < 8; j++)
238 iSumSad += abs (pPixA[j] - pPixB[j]);
239
240 pPixA += m_iStrideA;
241 pPixB += m_iStrideB;
242 }
243 EXPECT_EQ (WelsSampleSad8x8_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
244 }
245
TEST_F(SadSatdCFuncTest,WelsSampleSad16x8_c)246 TEST_F (SadSatdCFuncTest, WelsSampleSad16x8_c) {
247 for (int i = 0; i < (m_iStrideA << 3); i++)
248 m_pPixSrcA[i] = rand() % 256;
249 for (int i = 0; i < (m_iStrideB << 3); i++)
250 m_pPixSrcB[i] = rand() % 256;
251 uint8_t* pPixA = m_pPixSrcA;
252 uint8_t* pPixB = m_pPixSrcB;
253
254 int32_t iSumSad = 0;
255 for (int i = 0; i < 8; i++) {
256 for (int j = 0; j < 16; j++)
257 iSumSad += abs (pPixA[j] - pPixB[j]);
258
259 pPixA += m_iStrideA;
260 pPixB += m_iStrideB;
261 }
262 EXPECT_EQ (WelsSampleSad16x8_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
263 }
264
TEST_F(SadSatdCFuncTest,WelsSampleSad8x16_c)265 TEST_F (SadSatdCFuncTest, WelsSampleSad8x16_c) {
266 for (int i = 0; i < (m_iStrideA << 4); i++)
267 m_pPixSrcA[i] = rand() % 256;
268 for (int i = 0; i < (m_iStrideB << 4); i++)
269 m_pPixSrcB[i] = rand() % 256;
270 uint8_t* pPixA = m_pPixSrcA;
271 uint8_t* pPixB = m_pPixSrcB;
272
273 int32_t iSumSad = 0;
274 for (int i = 0; i < 16; i++) {
275 for (int j = 0; j < 8; j++)
276 iSumSad += abs (pPixA[j] - pPixB[j]);
277
278 pPixA += m_iStrideA;
279 pPixB += m_iStrideB;
280 }
281 EXPECT_EQ (WelsSampleSad8x16_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
282 }
283
TEST_F(SadSatdCFuncTest,WelsSampleSad16x16_c)284 TEST_F (SadSatdCFuncTest, WelsSampleSad16x16_c) {
285 for (int i = 0; i < (m_iStrideA << 4); i++)
286 m_pPixSrcA[i] = rand() % 256;
287 for (int i = 0; i < (m_iStrideB << 4); i++)
288 m_pPixSrcB[i] = rand() % 256;
289 uint8_t* pPixA = m_pPixSrcA;
290 uint8_t* pPixB = m_pPixSrcB;
291
292 int32_t iSumSad = 0;
293 for (int i = 0; i < 16; i++) {
294 for (int j = 0; j < 16; j++)
295 iSumSad += abs (pPixA[j] - pPixB[j]);
296
297 pPixA += m_iStrideA;
298 pPixB += m_iStrideB;
299 }
300 EXPECT_EQ (WelsSampleSad16x16_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
301 }
302
TEST_F(SadSatdCFuncTest,WelsSampleSatd4x4_c)303 TEST_F (SadSatdCFuncTest, WelsSampleSatd4x4_c) {
304 for (int i = 0; i < (m_iStrideA << 2); i++)
305 m_pPixSrcA[i] = rand() % 256;
306 for (int i = 0; i < (m_iStrideB << 2); i++)
307 m_pPixSrcB[i] = rand() % 256;
308 uint8_t* pPixA = m_pPixSrcA;
309 uint8_t* pPixB = m_pPixSrcB;
310
311 int32_t W[16], T[16], Y[16], k = 0;
312 for (int i = 0; i < 4; i++) {
313 for (int j = 0; j < 4; j++)
314 W[k++] = pPixA[j] - pPixB[j];
315 pPixA += m_iStrideA;
316 pPixB += m_iStrideB;
317 }
318
319 T[0] = W[0] + W[4] + W[8] + W[12];
320 T[1] = W[1] + W[5] + W[9] + W[13];
321 T[2] = W[2] + W[6] + W[10] + W[14];
322 T[3] = W[3] + W[7] + W[11] + W[15];
323
324 T[4] = W[0] + W[4] - W[8] - W[12];
325 T[5] = W[1] + W[5] - W[9] - W[13];
326 T[6] = W[2] + W[6] - W[10] - W[14];
327 T[7] = W[3] + W[7] - W[11] - W[15];
328
329 T[8] = W[0] - W[4] - W[8] + W[12];
330 T[9] = W[1] - W[5] - W[9] + W[13];
331 T[10] = W[2] - W[6] - W[10] + W[14];
332 T[11] = W[3] - W[7] - W[11] + W[15];
333
334 T[12] = W[0] - W[4] + W[8] - W[12];
335 T[13] = W[1] - W[5] + W[9] - W[13];
336 T[14] = W[2] - W[6] + W[10] - W[14];
337 T[15] = W[3] - W[7] + W[11] - W[15];
338
339 Y[0] = T[0] + T[1] + T[2] + T[3];
340 Y[1] = T[0] + T[1] - T[2] - T[3];
341 Y[2] = T[0] - T[1] - T[2] + T[3];
342 Y[3] = T[0] - T[1] + T[2] - T[3];
343
344 Y[4] = T[4] + T[5] + T[6] + T[7];
345 Y[5] = T[4] + T[5] - T[6] - T[7];
346 Y[6] = T[4] - T[5] - T[6] + T[7];
347 Y[7] = T[4] - T[5] + T[6] - T[7];
348
349 Y[8] = T[8] + T[9] + T[10] + T[11];
350 Y[9] = T[8] + T[9] - T[10] - T[11];
351 Y[10] = T[8] - T[9] - T[10] + T[11];
352 Y[11] = T[8] - T[9] + T[10] - T[11];
353
354 Y[12] = T[12] + T[13] + T[14] + T[15];
355 Y[13] = T[12] + T[13] - T[14] - T[15];
356 Y[14] = T[12] - T[13] - T[14] + T[15];
357 Y[15] = T[12] - T[13] + T[14] - T[15];
358
359 int32_t iSumSatd = 0;
360 for (int i = 0; i < 16; i++)
361 iSumSatd += abs (Y[i]);
362
363 EXPECT_EQ (WelsSampleSatd4x4_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), (iSumSatd + 1) >> 1);
364 }
365
TEST_F(SadSatdCFuncTest,WelsSampleSadFour16x16_c)366 TEST_F (SadSatdCFuncTest, WelsSampleSadFour16x16_c) {
367 for (int i = 0; i < (m_iStrideA << 5); i++)
368 m_pPixSrcA[i] = rand() % 256;
369 for (int i = 0; i < (m_iStrideB << 5); i++)
370 m_pPixSrcB[i] = rand() % 256;
371 uint8_t* pPixA = m_pPixSrcA;
372 uint8_t* pPixB = m_pPixSrcB + m_iStrideB;
373
374 int32_t iSumSad = 0;
375 for (int i = 0; i < 16; i++) {
376 for (int j = 0; j < 16; j++) {
377 iSumSad += abs (pPixA[j] - pPixB[j - 1]);
378 iSumSad += abs (pPixA[j] - pPixB[j + 1]);
379 iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]);
380 iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]);
381 }
382 pPixA += m_iStrideA;
383 pPixB += m_iStrideB;
384 }
385 WelsSampleSadFour16x16_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad);
386 EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad);
387 }
388
TEST_F(SadSatdCFuncTest,WelsSampleSadFour16x8_c)389 TEST_F (SadSatdCFuncTest, WelsSampleSadFour16x8_c) {
390 for (int i = 0; i < (m_iStrideA << 5); i++)
391 m_pPixSrcA[i] = rand() % 256;
392 for (int i = 0; i < (m_iStrideB << 5); i++)
393 m_pPixSrcB[i] = rand() % 256;
394 uint8_t* pPixA = m_pPixSrcA;
395 uint8_t* pPixB = m_pPixSrcB + m_iStrideB;
396
397 int32_t iSumSad = 0;
398 for (int i = 0; i < 8; i++) {
399 for (int j = 0; j < 16; j++) {
400 iSumSad += abs (pPixA[j] - pPixB[j - 1]);
401 iSumSad += abs (pPixA[j] - pPixB[j + 1]);
402 iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]);
403 iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]);
404 }
405 pPixA += m_iStrideA;
406 pPixB += m_iStrideB;
407 }
408
409 WelsSampleSadFour16x8_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad);
410 EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad);
411 }
412
TEST_F(SadSatdCFuncTest,WelsSampleSadFour8x16_c)413 TEST_F (SadSatdCFuncTest, WelsSampleSadFour8x16_c) {
414 for (int i = 0; i < (m_iStrideA << 5); i++)
415 m_pPixSrcA[i] = rand() % 256;
416 for (int i = 0; i < (m_iStrideB << 5); i++)
417 m_pPixSrcB[i] = rand() % 256;
418 uint8_t* pPixA = m_pPixSrcA;
419 uint8_t* pPixB = m_pPixSrcB + m_iStrideB;
420
421 int32_t iSumSad = 0;
422 for (int i = 0; i < 16; i++) {
423 for (int j = 0; j < 8; j++) {
424 iSumSad += abs (pPixA[j] - pPixB[j - 1]);
425 iSumSad += abs (pPixA[j] - pPixB[j + 1]);
426 iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]);
427 iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]);
428 }
429 pPixA += m_iStrideA;
430 pPixB += m_iStrideB;
431 }
432
433 WelsSampleSadFour8x16_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad);
434 EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad);
435 }
436
TEST_F(SadSatdCFuncTest,WelsSampleSadFour8x8_c)437 TEST_F (SadSatdCFuncTest, WelsSampleSadFour8x8_c) {
438 for (int i = 0; i < (m_iStrideA << 4); i++)
439 m_pPixSrcA[i] = rand() % 256;
440 for (int i = 0; i < (m_iStrideB << 4); i++)
441 m_pPixSrcB[i] = rand() % 256;
442 uint8_t* pPixA = m_pPixSrcA;
443 uint8_t* pPixB = m_pPixSrcB + m_iStrideB;
444
445 int32_t iSumSad = 0;
446 for (int i = 0; i < 8; i++) {
447 for (int j = 0; j < 8; j++) {
448 iSumSad += abs (pPixA[j] - pPixB[j - 1]);
449 iSumSad += abs (pPixA[j] - pPixB[j + 1]);
450 iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]);
451 iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]);
452 }
453 pPixA += m_iStrideA;
454 pPixB += m_iStrideB;
455 }
456 WelsSampleSadFour8x8_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad);
457 EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad);
458 }
459
TEST_F(SadSatdCFuncTest,WelsSampleSadFour4x4_c)460 TEST_F (SadSatdCFuncTest, WelsSampleSadFour4x4_c) {
461 for (int i = 0; i < (m_iStrideA << 3); i++)
462 m_pPixSrcA[i] = rand() % 256;
463 for (int i = 0; i < (m_iStrideB << 3); i++)
464 m_pPixSrcB[i] = rand() % 256;
465 uint8_t* pPixA = m_pPixSrcA;
466 uint8_t* pPixB = m_pPixSrcB + m_iStrideB;
467
468 int32_t iSumSad = 0;
469 for (int i = 0; i < 4; i++) {
470 for (int j = 0; j < 4; j++) {
471 iSumSad += abs (pPixA[j] - pPixB[j - 1]);
472 iSumSad += abs (pPixA[j] - pPixB[j + 1]);
473 iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]);
474 iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]);
475 }
476 pPixA += m_iStrideA;
477 pPixB += m_iStrideB;
478 }
479 WelsSampleSadFour4x4_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad);
480 EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad);
481 }
482
TEST_F(SadSatdCFuncTest,WelsSampleSadFour8x4_c)483 TEST_F (SadSatdCFuncTest, WelsSampleSadFour8x4_c) {
484 for (int i = 0; i < (m_iStrideA << 3); i++)
485 m_pPixSrcA[i] = rand() % 256;
486 for (int i = 0; i < (m_iStrideB << 3); i++)
487 m_pPixSrcB[i] = rand() % 256;
488 uint8_t* pPixA = m_pPixSrcA;
489 uint8_t* pPixB = m_pPixSrcB + m_iStrideB;
490
491 int32_t iSumSad = 0;
492 for (int i = 0; i < 4; i++) {
493 for (int j = 0; j < 8; j++) {
494 iSumSad += abs (pPixA[j] - pPixB[j - 1]);
495 iSumSad += abs (pPixA[j] - pPixB[j + 1]);
496 iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]);
497 iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]);
498 }
499 pPixA += m_iStrideA;
500 pPixB += m_iStrideB;
501 }
502 WelsSampleSadFour8x4_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad);
503 EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad);
504 }
505
TEST_F(SadSatdCFuncTest,WelsSampleSadFour4x8_c)506 TEST_F (SadSatdCFuncTest, WelsSampleSadFour4x8_c) {
507 for (int i = 0; i < (m_iStrideA << 4); i++)
508 m_pPixSrcA[i] = rand() % 256;
509 for (int i = 0; i < (m_iStrideB << 4); i++)
510 m_pPixSrcB[i] = rand() % 256;
511 uint8_t* pPixA = m_pPixSrcA;
512 uint8_t* pPixB = m_pPixSrcB + m_iStrideB;
513
514 int32_t iSumSad = 0;
515 for (int i = 0; i < 8; i++) {
516 for (int j = 0; j < 4; j++) {
517 iSumSad += abs (pPixA[j] - pPixB[j - 1]);
518 iSumSad += abs (pPixA[j] - pPixB[j + 1]);
519 iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]);
520 iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]);
521 }
522 pPixA += m_iStrideA;
523 pPixB += m_iStrideB;
524 }
525 WelsSampleSadFour4x8_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad);
526 EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad);
527 }
528
529 class SadSatdAssemblyFuncTest : public testing::Test {
530 public:
SetUp()531 virtual void SetUp() {
532 int32_t iCpuCores = 0;
533 m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores);
534 pMemAlign = new CMemoryAlign (16);
535 m_iStrideA = m_iStrideB = PIXEL_STRIDE;
536 m_pPixSrcA = (uint8_t*)pMemAlign->WelsMalloc (m_iStrideA << 5, "Sad_m_pPixSrcA");
537 ASSERT_TRUE (NULL != m_pPixSrcA);
538 m_pPixSrcB = (uint8_t*)pMemAlign->WelsMalloc (m_iStrideB << 5, "Sad_m_pPixSrcB");
539 ASSERT_MEMORY_FAIL2X (m_pPixSrcA, m_pPixSrcB)
540 m_pSad = (int32_t*)pMemAlign->WelsMalloc (4 * sizeof (int32_t), "m_pSad");
541 ASSERT_MEMORY_FAIL3X (m_pPixSrcA, m_pPixSrcB, m_pSad)
542 }
TearDown()543 virtual void TearDown() {
544 pMemAlign->WelsFree (m_pPixSrcA, "Sad_m_pPixSrcA");
545 pMemAlign->WelsFree (m_pPixSrcB, "Sad_m_pPixSrcB");
546 pMemAlign->WelsFree (m_pSad, "m_pSad");
547 delete pMemAlign;
548 }
549 public:
550 uint32_t m_uiCpuFeatureFlag;
551 uint8_t* m_pPixSrcA;
552 uint8_t* m_pPixSrcB;
553 int32_t m_iStrideA;
554 int32_t m_iStrideB;
555 int32_t* m_pSad;
556
557 CMemoryAlign* pMemAlign;
558 };
559
560 #define GENERATE_Sad4x4_UT(func, ref, CPUFLAGS) \
561 TEST_F (SadSatdAssemblyFuncTest, func) { \
562 if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
563 return; \
564 for (int i = 0; i < (m_iStrideA << 2); i++) \
565 m_pPixSrcA[i] = rand() % 256; \
566 for (int i = 0; i < (m_iStrideB << 2); i++) \
567 m_pPixSrcB[i] = rand() % 256; \
568 EXPECT_EQ (ref (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), func (m_pPixSrcA, \
569 m_iStrideA, m_pPixSrcB, m_iStrideB)); \
570 }
571
572 #define GENERATE_Sad8x8_UT(func, ref, CPUFLAGS) \
573 TEST_F (SadSatdAssemblyFuncTest, func) { \
574 if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
575 return; \
576 for (int i = 0; i < (m_iStrideA << 3); i++) \
577 m_pPixSrcA[i] = rand() % 256; \
578 for (int i = 0; i < (m_iStrideB << 3); i++) \
579 m_pPixSrcB[i] = rand() % 256; \
580 EXPECT_EQ (ref (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), func (m_pPixSrcA, \
581 m_iStrideA, m_pPixSrcB, m_iStrideB)); \
582 }
583
584 #define GENERATE_Sad8x16_UT(func, ref, CPUFLAGS) \
585 TEST_F (SadSatdAssemblyFuncTest, func) { \
586 if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
587 return; \
588 for (int i = 0; i < (m_iStrideA << 4); i++) \
589 m_pPixSrcA[i] = rand() % 256; \
590 for (int i = 0; i < (m_iStrideB << 4); i++) \
591 m_pPixSrcB[i] = rand() % 256; \
592 EXPECT_EQ (ref (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), func (m_pPixSrcA, \
593 m_iStrideA, m_pPixSrcB, m_iStrideB)); \
594 }
595
596 #define GENERATE_Sad16x8_UT(func, ref, CPUFLAGS) \
597 TEST_F (SadSatdAssemblyFuncTest, func) { \
598 if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
599 return; \
600 for (int i = 0; i < (m_iStrideA << 3); i++) \
601 m_pPixSrcA[i] = rand() % 256; \
602 for (int i = 0; i < (m_iStrideB << 3); i++) \
603 m_pPixSrcB[i] = rand() % 256; \
604 EXPECT_EQ (ref (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), func (m_pPixSrcA, \
605 m_iStrideA, m_pPixSrcB, m_iStrideB)); \
606 }
607
608 #define GENERATE_Sad16x16_UT(func, ref, CPUFLAGS) \
609 TEST_F (SadSatdAssemblyFuncTest, func) { \
610 if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
611 return; \
612 for (int i = 0; i < (m_iStrideA << 4); i++) \
613 m_pPixSrcA[i] = rand() % 256; \
614 for (int i = 0; i < (m_iStrideB << 4); i++) \
615 m_pPixSrcB[i] = rand() % 256; \
616 EXPECT_EQ (ref (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), func (m_pPixSrcA, \
617 m_iStrideA, m_pPixSrcB, m_iStrideB)); \
618 }
619
620 #ifdef X86_ASM
621 GENERATE_Sad4x4_UT (WelsSampleSad4x4_mmx, WelsSampleSad4x4_c, WELS_CPU_MMXEXT)
622 GENERATE_Sad8x8_UT (WelsSampleSad8x8_sse21, WelsSampleSad8x8_c, WELS_CPU_SSE2)
623 GENERATE_Sad8x16_UT (WelsSampleSad8x16_sse2, WelsSampleSad8x16_c, WELS_CPU_SSE2)
624 GENERATE_Sad16x8_UT (WelsSampleSad16x8_sse2, WelsSampleSad16x8_c, WELS_CPU_SSE2)
625 GENERATE_Sad16x16_UT (WelsSampleSad16x16_sse2, WelsSampleSad16x16_c, WELS_CPU_SSE2)
626
627 GENERATE_Sad4x4_UT (WelsSampleSatd4x4_sse2, WelsSampleSatd4x4_c, WELS_CPU_SSE2)
628 GENERATE_Sad8x8_UT (WelsSampleSatd8x8_sse2, WelsSampleSatd8x8_c, WELS_CPU_SSE2)
629 GENERATE_Sad8x16_UT (WelsSampleSatd8x16_sse2, WelsSampleSatd8x16_c, WELS_CPU_SSE2)
630 GENERATE_Sad16x8_UT (WelsSampleSatd16x8_sse2, WelsSampleSatd16x8_c, WELS_CPU_SSE2)
631 GENERATE_Sad16x16_UT (WelsSampleSatd16x16_sse2, WelsSampleSatd16x16_c, WELS_CPU_SSE2)
632
633 GENERATE_Sad4x4_UT (WelsSampleSatd4x4_sse41, WelsSampleSatd4x4_c, WELS_CPU_SSE41)
634 GENERATE_Sad8x8_UT (WelsSampleSatd8x8_sse41, WelsSampleSatd8x8_c, WELS_CPU_SSE41)
635 GENERATE_Sad8x16_UT (WelsSampleSatd8x16_sse41, WelsSampleSatd8x16_c, WELS_CPU_SSE41)
636 GENERATE_Sad16x8_UT (WelsSampleSatd16x8_sse41, WelsSampleSatd16x8_c, WELS_CPU_SSE41)
637 GENERATE_Sad16x16_UT (WelsSampleSatd16x16_sse41, WelsSampleSatd16x16_c, WELS_CPU_SSE41)
638
639 #ifdef HAVE_AVX2
640 GENERATE_Sad8x8_UT (WelsSampleSatd8x8_avx2, WelsSampleSatd8x8_c, WELS_CPU_AVX2)
641 GENERATE_Sad8x16_UT (WelsSampleSatd8x16_avx2, WelsSampleSatd8x16_c, WELS_CPU_AVX2)
642 GENERATE_Sad16x8_UT (WelsSampleSatd16x8_avx2, WelsSampleSatd16x8_c, WELS_CPU_AVX2)
643 GENERATE_Sad16x16_UT (WelsSampleSatd16x16_avx2, WelsSampleSatd16x16_c, WELS_CPU_AVX2)
644 #endif //HAVE_AVX2
645 #endif
646
647 #ifdef HAVE_NEON
648 GENERATE_Sad4x4_UT (WelsSampleSad4x4_neon, WelsSampleSad4x4_c, WELS_CPU_NEON)
649 GENERATE_Sad8x8_UT (WelsSampleSad8x8_neon, WelsSampleSad8x8_c, WELS_CPU_NEON)
650 GENERATE_Sad8x16_UT (WelsSampleSad8x16_neon, WelsSampleSad8x16_c, WELS_CPU_NEON)
651 GENERATE_Sad16x8_UT (WelsSampleSad16x8_neon, WelsSampleSad16x8_c, WELS_CPU_NEON)
652 GENERATE_Sad16x16_UT (WelsSampleSad16x16_neon, WelsSampleSad16x16_c, WELS_CPU_NEON)
653
654 GENERATE_Sad4x4_UT (WelsSampleSatd4x4_neon, WelsSampleSatd4x4_c, WELS_CPU_NEON)
655 GENERATE_Sad8x8_UT (WelsSampleSatd8x8_neon, WelsSampleSatd8x8_c, WELS_CPU_NEON)
656 GENERATE_Sad8x16_UT (WelsSampleSatd8x16_neon, WelsSampleSatd8x16_c, WELS_CPU_NEON)
657 GENERATE_Sad16x8_UT (WelsSampleSatd16x8_neon, WelsSampleSatd16x8_c, WELS_CPU_NEON)
658 GENERATE_Sad16x16_UT (WelsSampleSatd16x16_neon, WelsSampleSatd16x16_c, WELS_CPU_NEON)
659 #endif
660
661 #ifdef HAVE_NEON_AARCH64
662 GENERATE_Sad4x4_UT (WelsSampleSad4x4_AArch64_neon, WelsSampleSad4x4_c, WELS_CPU_NEON)
663 GENERATE_Sad8x8_UT (WelsSampleSad8x8_AArch64_neon, WelsSampleSad8x8_c, WELS_CPU_NEON)
664 GENERATE_Sad8x16_UT (WelsSampleSad8x16_AArch64_neon, WelsSampleSad8x16_c, WELS_CPU_NEON)
665 GENERATE_Sad16x8_UT (WelsSampleSad16x8_AArch64_neon, WelsSampleSad16x8_c, WELS_CPU_NEON)
666 GENERATE_Sad16x16_UT (WelsSampleSad16x16_AArch64_neon, WelsSampleSad16x16_c, WELS_CPU_NEON)
667
668 GENERATE_Sad4x4_UT (WelsSampleSatd4x4_AArch64_neon, WelsSampleSatd4x4_c, WELS_CPU_NEON)
669 GENERATE_Sad8x8_UT (WelsSampleSatd8x8_AArch64_neon, WelsSampleSatd8x8_c, WELS_CPU_NEON)
670 GENERATE_Sad8x16_UT (WelsSampleSatd8x16_AArch64_neon, WelsSampleSatd8x16_c, WELS_CPU_NEON)
671 GENERATE_Sad16x8_UT (WelsSampleSatd16x8_AArch64_neon, WelsSampleSatd16x8_c, WELS_CPU_NEON)
672 GENERATE_Sad16x16_UT (WelsSampleSatd16x16_AArch64_neon, WelsSampleSatd16x16_c, WELS_CPU_NEON)
673 #endif
674
675 #ifdef HAVE_MMI
676 GENERATE_Sad4x4_UT (WelsSampleSad4x4_mmi, WelsSampleSad4x4_c, WELS_CPU_MMI)
677 GENERATE_Sad8x8_UT (WelsSampleSad8x8_mmi, WelsSampleSad8x8_c, WELS_CPU_MMI)
678 GENERATE_Sad8x16_UT (WelsSampleSad8x16_mmi, WelsSampleSad8x16_c, WELS_CPU_MMI)
679 GENERATE_Sad16x8_UT (WelsSampleSad16x8_mmi, WelsSampleSad16x8_c, WELS_CPU_MMI)
680 GENERATE_Sad16x16_UT (WelsSampleSad16x16_mmi, WelsSampleSad16x16_c, WELS_CPU_MMI)
681
682 GENERATE_Sad4x4_UT (WelsSampleSatd4x4_mmi, WelsSampleSatd4x4_c, WELS_CPU_MMI)
683 GENERATE_Sad8x8_UT (WelsSampleSatd8x8_mmi, WelsSampleSatd8x8_c, WELS_CPU_MMI)
684 GENERATE_Sad8x16_UT (WelsSampleSatd8x16_mmi, WelsSampleSatd8x16_c, WELS_CPU_MMI)
685 GENERATE_Sad16x8_UT (WelsSampleSatd16x8_mmi, WelsSampleSatd16x8_c, WELS_CPU_MMI)
686 GENERATE_Sad16x16_UT (WelsSampleSatd16x16_mmi, WelsSampleSatd16x16_c, WELS_CPU_MMI)
687 #endif
688
689 #ifdef HAVE_LASX
690 GENERATE_Sad4x4_UT (WelsSampleSad4x4_lasx, WelsSampleSad4x4_c, WELS_CPU_LASX)
691 GENERATE_Sad8x8_UT (WelsSampleSad8x8_lasx, WelsSampleSad8x8_c, WELS_CPU_LASX)
692 GENERATE_Sad8x16_UT (WelsSampleSad8x16_lasx, WelsSampleSad8x16_c, WELS_CPU_LASX)
693 GENERATE_Sad16x8_UT (WelsSampleSad16x8_lasx, WelsSampleSad16x8_c, WELS_CPU_LASX)
694 GENERATE_Sad16x16_UT (WelsSampleSad16x16_lasx, WelsSampleSad16x16_c, WELS_CPU_LASX)
695
696 GENERATE_Sad4x4_UT (WelsSampleSatd4x4_lasx, WelsSampleSatd4x4_c, WELS_CPU_LASX)
697 GENERATE_Sad8x8_UT (WelsSampleSatd8x8_lasx, WelsSampleSatd8x8_c, WELS_CPU_LASX)
698 GENERATE_Sad8x16_UT (WelsSampleSatd8x16_lasx, WelsSampleSatd8x16_c, WELS_CPU_LASX)
699 GENERATE_Sad16x8_UT (WelsSampleSatd16x8_lasx, WelsSampleSatd16x8_c, WELS_CPU_LASX)
700 GENERATE_Sad16x16_UT (WelsSampleSatd16x16_lasx, WelsSampleSatd16x16_c, WELS_CPU_LASX)
701 #endif
702
703 #define GENERATE_SadFour_UT(func, CPUFLAGS, width, height) \
704 TEST_F (SadSatdAssemblyFuncTest, func) { \
705 if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
706 return; \
707 for (int i = 0; i < (m_iStrideA << 5); i++) \
708 m_pPixSrcA[i] = rand() % 256; \
709 for (int i = 0; i < (m_iStrideB << 5); i++) \
710 m_pPixSrcB[i] = rand() % 256; \
711 uint8_t* pPixA = m_pPixSrcA; \
712 uint8_t* pPixB = m_pPixSrcB + m_iStrideB; \
713 int32_t iSumSad = 0; \
714 for (int i = 0; i < height; i++) { \
715 for (int j = 0; j < width; j++) { \
716 iSumSad += abs (pPixA[j] - pPixB[j - 1]); \
717 iSumSad += abs (pPixA[j] - pPixB[j + 1]); \
718 iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]); \
719 iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]); \
720 } \
721 pPixA += m_iStrideA; \
722 pPixB += m_iStrideB; \
723 } \
724 func (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad); \
725 EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad); \
726 }
727
728 #ifdef X86_ASM
729 GENERATE_SadFour_UT (WelsSampleSadFour4x4_sse2, WELS_CPU_SSE2, 4, 4)
730 GENERATE_SadFour_UT (WelsSampleSadFour8x8_sse2, WELS_CPU_SSE2, 8, 8)
731 GENERATE_SadFour_UT (WelsSampleSadFour8x16_sse2, WELS_CPU_SSE2, 8, 16)
732 GENERATE_SadFour_UT (WelsSampleSadFour16x8_sse2, WELS_CPU_SSE2, 16, 8)
733 GENERATE_SadFour_UT (WelsSampleSadFour16x16_sse2, WELS_CPU_SSE2, 16, 16)
734 #endif
735
736 #ifdef HAVE_NEON
737 GENERATE_SadFour_UT (WelsSampleSadFour4x4_neon, WELS_CPU_NEON, 4, 4)
738 GENERATE_SadFour_UT (WelsSampleSadFour8x8_neon, WELS_CPU_NEON, 8, 8)
739 GENERATE_SadFour_UT (WelsSampleSadFour8x16_neon, WELS_CPU_NEON, 8, 16)
740 GENERATE_SadFour_UT (WelsSampleSadFour16x8_neon, WELS_CPU_NEON, 16, 8)
741 GENERATE_SadFour_UT (WelsSampleSadFour16x16_neon, WELS_CPU_NEON, 16, 16)
742 #endif
743
744 #ifdef HAVE_NEON_AARCH64
745 GENERATE_SadFour_UT (WelsSampleSadFour4x4_AArch64_neon, WELS_CPU_NEON, 4, 4)
746 GENERATE_SadFour_UT (WelsSampleSadFour8x8_AArch64_neon, WELS_CPU_NEON, 8, 8)
747 GENERATE_SadFour_UT (WelsSampleSadFour8x16_AArch64_neon, WELS_CPU_NEON, 8, 16)
748 GENERATE_SadFour_UT (WelsSampleSadFour16x8_AArch64_neon, WELS_CPU_NEON, 16, 8)
749 GENERATE_SadFour_UT (WelsSampleSadFour16x16_AArch64_neon, WELS_CPU_NEON, 16, 16)
750 #endif
751
752 #ifdef HAVE_MMI
753 GENERATE_SadFour_UT (WelsSampleSadFour8x8_mmi, WELS_CPU_MMI, 8, 8)
754 GENERATE_SadFour_UT (WelsSampleSadFour8x16_mmi, WELS_CPU_MMI, 8, 16)
755 GENERATE_SadFour_UT (WelsSampleSadFour16x8_mmi, WELS_CPU_MMI, 16, 8)
756 GENERATE_SadFour_UT (WelsSampleSadFour16x16_mmi, WELS_CPU_MMI, 16, 16)
757 #endif
758
759 #ifdef HAVE_LASX
760 GENERATE_SadFour_UT (WelsSampleSadFour4x4_lasx, WELS_CPU_LASX, 4, 4)
761 GENERATE_SadFour_UT (WelsSampleSadFour8x8_lasx, WELS_CPU_LASX, 8, 8)
762 GENERATE_SadFour_UT (WelsSampleSadFour8x16_lasx, WELS_CPU_LASX, 8, 16)
763 GENERATE_SadFour_UT (WelsSampleSadFour16x8_lasx, WELS_CPU_LASX, 16, 8)
764 GENERATE_SadFour_UT (WelsSampleSadFour16x16_lasx, WELS_CPU_LASX, 16, 16)
765 #endif
766