• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <gtest/gtest.h>
2 #include <math.h>
3 
4 #include "cpu_core.h"
5 #include "cpu.h"
6 #include "sample.h"
7 #include "sad_common.h"
8 #include "get_intra_predictor.h"
9 
10 using namespace WelsEnc;
11 
12 #define GENERATE_Intra16x16_UT(func, ref, ASM, CPUFLAGS) \
13 TEST (IntraSadSatdFuncTest, func) { \
14   const int32_t iLineSizeDec = 32; \
15   const int32_t iLineSizeEnc = 32; \
16   int32_t tmpa, tmpb; \
17   int32_t iBestMode_c, iBestMode_a, iLambda = 50; \
18   if (ASM) {\
19     int32_t iCpuCores = 0; \
20     uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
21     if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
22       return; \
23   } \
24   ENFORCE_STACK_ALIGN_1D (uint8_t, pDec, iLineSizeDec << 5, 16); \
25   ENFORCE_STACK_ALIGN_1D (uint8_t, pEnc, iLineSizeEnc << 5, 16); \
26   ENFORCE_STACK_ALIGN_1D (uint8_t, pDst, 512, 16); \
27   for (int i = 0; i < (iLineSizeDec << 5); i++) \
28     pDec[i] = rand() % 256; \
29   for (int i = 0; i < (iLineSizeEnc << 5); i++) \
30     pEnc[i] = rand() % 256; \
31   for (int i = 0; i < 512; i++) \
32     pDst[i] = rand() % 256; \
33   tmpa = ref (pDec + 128, iLineSizeDec, pEnc, iLineSizeEnc, &iBestMode_c, iLambda, pDst); \
34   tmpb = func (pDec + 128, iLineSizeDec, pEnc, iLineSizeEnc, &iBestMode_a, iLambda, pDst); \
35   ASSERT_EQ (tmpa, tmpb); \
36   ASSERT_EQ (iBestMode_c, iBestMode_a); \
37 }
38 
39 #define GENERATE_Intra4x4_UT(func, ASM, CPUFLAGS) \
40 TEST (IntraSadSatdFuncTest, func) { \
41   const int32_t iLineSizeDec = 32; \
42   const int32_t iLineSizeEnc = 32; \
43   int32_t tmpa, tmpb; \
44   int32_t iBestMode_c, iBestMode_a, iLambda = 50; \
45   int32_t lambda[2] = {iLambda << 2, iLambda}; \
46   int32_t iPredMode = rand() % 3; \
47   if (ASM) {\
48     int32_t iCpuCores = 0; \
49     uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
50     if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
51      return; \
52   } \
53   ENFORCE_STACK_ALIGN_1D (uint8_t, pDec, iLineSizeDec << 5, 16); \
54   ENFORCE_STACK_ALIGN_1D (uint8_t, pEnc, iLineSizeEnc << 5, 16); \
55   ENFORCE_STACK_ALIGN_1D (uint8_t, pDst, 512, 16); \
56   for (int i = 0; i < (iLineSizeDec << 5); i++) \
57     pDec[i] = rand() % 256; \
58   for (int i = 0; i < (iLineSizeEnc << 5); i++) \
59     pEnc[i] = rand() % 256; \
60   for (int i = 0; i < 512; i++) \
61     pDst[i] = rand() % 256; \
62   tmpa = WelsSampleSatdIntra4x4Combined3_c (pDec + 128, iLineSizeDec, pEnc, iLineSizeEnc, pDst, &iBestMode_c, \
63          lambda[iPredMode == 2], lambda[iPredMode == 1], lambda[iPredMode == 0]); \
64   tmpb = func (pDec + 128, iLineSizeDec, pEnc, iLineSizeEnc, pDst, &iBestMode_a, \
65                                       lambda[iPredMode == 2], lambda[iPredMode == 1], lambda[iPredMode == 0]); \
66   ASSERT_EQ (tmpa, tmpb); \
67   ASSERT_EQ (iBestMode_c, iBestMode_a); \
68 }
69 
70 #define GENERATE_Intra8x8_UT(func, ref, ASM, CPUFLAGS) \
71 TEST (IntraSadSatdFuncTest, func) { \
72   const int32_t iLineSizeDec = 32; \
73   const int32_t iLineSizeEnc = 32; \
74   int32_t tmpa, tmpb; \
75   int32_t iBestMode_c, iBestMode_a, iLambda = 50; \
76   if (ASM) {\
77     int32_t iCpuCores = 0; \
78     uint32_t m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores); \
79     if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
80         return; \
81   } \
82   ENFORCE_STACK_ALIGN_1D (uint8_t, pDecCb, iLineSizeDec << 5, 16); \
83   ENFORCE_STACK_ALIGN_1D (uint8_t, pEncCb, iLineSizeEnc << 5, 16); \
84   ENFORCE_STACK_ALIGN_1D (uint8_t, pDecCr, iLineSizeDec << 5, 16); \
85   ENFORCE_STACK_ALIGN_1D (uint8_t, pEncCr, iLineSizeEnc << 5, 16); \
86   ENFORCE_STACK_ALIGN_1D (uint8_t, pDstChma, 512, 16); \
87   for (int i = 0; i < (iLineSizeDec << 5); i++) { \
88     pDecCb[i] = rand() % 256; \
89     pDecCr[i] = rand() % 256; \
90   } \
91   for (int i = 0; i < (iLineSizeEnc << 5); i++) { \
92     pEncCb[i] = rand() % 256; \
93     pEncCr[i] = rand() % 256; \
94   } \
95   for (int i = 0; i < 512; i++) \
96     pDstChma[i] = rand() % 256; \
97   tmpa = ref (pDecCb + 128, iLineSizeDec, pEncCb, iLineSizeEnc, &iBestMode_c, iLambda, \
98          pDstChma, pDecCr + 128, pEncCr); \
99   tmpb = func (pDecCb + 128, iLineSizeDec, pEncCb, iLineSizeEnc, &iBestMode_a, iLambda, \
100          pDstChma, pDecCr + 128, pEncCr); \
101   ASSERT_EQ (tmpa, tmpb); \
102   ASSERT_EQ (iBestMode_c, iBestMode_a); \
103 }
104 
105 #ifdef X86_ASM
106 GENERATE_Intra16x16_UT (WelsIntra16x16Combined3Sad_ssse3, WelsSampleSadIntra16x16Combined3_c, 1, WELS_CPU_SSSE3)
107 GENERATE_Intra16x16_UT (WelsIntra16x16Combined3Satd_sse41, WelsSampleSatdIntra16x16Combined3_c, 1, WELS_CPU_SSE41)
108 GENERATE_Intra8x8_UT (WelsIntraChroma8x8Combined3Satd_sse41, WelsSampleSatdIntra8x8Combined3_c, 1, WELS_CPU_SSE41)
109 GENERATE_Intra4x4_UT (WelsSampleSatdThree4x4_sse2, 1, WELS_CPU_SSE2)
110 #endif
111 
112 #ifdef HAVE_NEON
113 GENERATE_Intra16x16_UT (WelsIntra16x16Combined3Sad_neon, WelsSampleSadIntra16x16Combined3_c, 1, WELS_CPU_NEON)
114 GENERATE_Intra16x16_UT (WelsIntra16x16Combined3Satd_neon, WelsSampleSatdIntra16x16Combined3_c, 1, WELS_CPU_NEON)
115 GENERATE_Intra8x8_UT (WelsIntra8x8Combined3Satd_neon, WelsSampleSatdIntra8x8Combined3_c, 1, WELS_CPU_NEON)
116 GENERATE_Intra8x8_UT (WelsIntra8x8Combined3Sad_neon, WelsSampleSadIntra8x8Combined3_c, 1, WELS_CPU_NEON)
117 GENERATE_Intra4x4_UT (WelsIntra4x4Combined3Satd_neon, 1, WELS_CPU_NEON)
118 #endif
119 
120 #ifdef HAVE_NEON_AARCH64
121 GENERATE_Intra16x16_UT (WelsIntra16x16Combined3Sad_AArch64_neon, WelsSampleSadIntra16x16Combined3_c, 1, WELS_CPU_NEON)
122 GENERATE_Intra16x16_UT (WelsIntra16x16Combined3Satd_AArch64_neon, WelsSampleSatdIntra16x16Combined3_c, 1, WELS_CPU_NEON)
123 GENERATE_Intra8x8_UT (WelsIntra8x8Combined3Satd_AArch64_neon, WelsSampleSatdIntra8x8Combined3_c, 1, WELS_CPU_NEON)
124 GENERATE_Intra8x8_UT (WelsIntra8x8Combined3Sad_AArch64_neon, WelsSampleSadIntra8x8Combined3_c, 1, WELS_CPU_NEON)
125 GENERATE_Intra4x4_UT (WelsIntra4x4Combined3Satd_AArch64_neon, 1, WELS_CPU_NEON)
126 #endif
127 
128 #define ASSERT_MEMORY_FAIL2X(A, B)     \
129   if (NULL == B) {                     \
130     pMemAlign->WelsFree(A, "Sad_SrcA");\
131     ASSERT_TRUE(0);                    \
132   }
133 
134 #define ASSERT_MEMORY_FAIL3X(A, B, C)   \
135   if (NULL == C) {                      \
136     pMemAlign->WelsFree(A, "Sad_SrcA"); \
137     pMemAlign->WelsFree(B, "Sad_SrcB"); \
138     ASSERT_TRUE(0);                     \
139   }
140 
141 #define PIXEL_STRIDE 32
142 
143 class SadSatdCFuncTest : public testing::Test {
144  public:
SetUp()145   virtual void SetUp() {
146     pMemAlign = new CMemoryAlign (0);
147 
148     m_iStrideA = rand() % 256 + PIXEL_STRIDE;
149     m_iStrideB = rand() % 256 + PIXEL_STRIDE;
150     m_pPixSrcA = (uint8_t*)pMemAlign->WelsMalloc (m_iStrideA << 5, "Sad_m_pPixSrcA");
151     ASSERT_TRUE (NULL != m_pPixSrcA);
152     m_pPixSrcB = (uint8_t*)pMemAlign->WelsMalloc (m_iStrideB << 5, "Sad_m_pPixSrcB");
153     ASSERT_MEMORY_FAIL2X (m_pPixSrcA, m_pPixSrcB)
154     m_pSad = (int32_t*)pMemAlign->WelsMalloc (4 * sizeof (int32_t), "m_pSad");
155     ASSERT_MEMORY_FAIL3X (m_pPixSrcA, m_pPixSrcB, m_pSad)
156   }
TearDown()157   virtual void TearDown() {
158     pMemAlign->WelsFree (m_pPixSrcA, "Sad_m_pPixSrcA");
159     pMemAlign->WelsFree (m_pPixSrcB, "Sad_m_pPixSrcB");
160     pMemAlign->WelsFree (m_pSad, "m_pSad");
161     delete pMemAlign;
162   }
163  public:
164   uint8_t* m_pPixSrcA;
165   uint8_t* m_pPixSrcB;
166   int32_t m_iStrideA;
167   int32_t m_iStrideB;
168   int32_t* m_pSad;
169 
170   CMemoryAlign* pMemAlign;
171 };
172 
TEST_F(SadSatdCFuncTest,WelsSampleSad4x4_c)173 TEST_F (SadSatdCFuncTest, WelsSampleSad4x4_c) {
174   for (int i = 0; i < (m_iStrideA << 2); i++)
175     m_pPixSrcA[i] = rand() % 256;
176   for (int i = 0; i < (m_iStrideB << 2); i++)
177     m_pPixSrcB[i] = rand() % 256;
178   uint8_t* pPixA = m_pPixSrcA;
179   uint8_t* pPixB = m_pPixSrcB;
180 
181   int32_t iSumSad = 0;
182   for (int i = 0; i < 4; i++) {
183     for (int j = 0; j < 4; j++)
184       iSumSad += abs (pPixA[j] - pPixB[j]);
185     pPixA += m_iStrideA;
186     pPixB += m_iStrideB;
187   }
188   EXPECT_EQ (WelsSampleSad4x4_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
189 }
190 
TEST_F(SadSatdCFuncTest,WelsSampleSad8x4_c)191 TEST_F (SadSatdCFuncTest, WelsSampleSad8x4_c) {
192   for (int i = 0; i < (m_iStrideA << 2); i++)
193     m_pPixSrcA[i] = rand() % 256;
194   for (int i = 0; i < (m_iStrideB << 2); i++)
195     m_pPixSrcB[i] = rand() % 256;
196   uint8_t* pPixA = m_pPixSrcA;
197   uint8_t* pPixB = m_pPixSrcB;
198 
199   int32_t iSumSad = 0;
200   for (int i = 0; i < 4; i++) {
201     for (int j = 0; j < 8; j++)
202       iSumSad += abs (pPixA[j] - pPixB[j]);
203     pPixA += m_iStrideA;
204     pPixB += m_iStrideB;
205   }
206   EXPECT_EQ (WelsSampleSad8x4_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
207 }
208 
TEST_F(SadSatdCFuncTest,WelsSampleSad4x8_c)209 TEST_F (SadSatdCFuncTest, WelsSampleSad4x8_c) {
210   for (int i = 0; i < (m_iStrideA << 3); i++)
211     m_pPixSrcA[i] = rand() % 256;
212   for (int i = 0; i < (m_iStrideB << 3); i++)
213     m_pPixSrcB[i] = rand() % 256;
214   uint8_t* pPixA = m_pPixSrcA;
215   uint8_t* pPixB = m_pPixSrcB;
216 
217   int32_t iSumSad = 0;
218   for (int i = 0; i < 8; i++) {
219     for (int j = 0; j < 4; j++)
220       iSumSad += abs (pPixA[j] - pPixB[j]);
221     pPixA += m_iStrideA;
222     pPixB += m_iStrideB;
223   }
224   EXPECT_EQ (WelsSampleSad4x8_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
225 }
226 
TEST_F(SadSatdCFuncTest,WelsSampleSad8x8_c)227 TEST_F (SadSatdCFuncTest, WelsSampleSad8x8_c) {
228   for (int i = 0; i < (m_iStrideA << 3); i++)
229     m_pPixSrcA[i] = rand() % 256;
230   for (int i = 0; i < (m_iStrideB << 3); i++)
231     m_pPixSrcB[i] = rand() % 256;
232   uint8_t* pPixA = m_pPixSrcA;
233   uint8_t* pPixB = m_pPixSrcB;
234 
235   int32_t iSumSad = 0;
236   for (int i = 0; i < 8; i++) {
237     for (int j = 0; j < 8; j++)
238       iSumSad += abs (pPixA[j] - pPixB[j]);
239 
240     pPixA += m_iStrideA;
241     pPixB += m_iStrideB;
242   }
243   EXPECT_EQ (WelsSampleSad8x8_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
244 }
245 
TEST_F(SadSatdCFuncTest,WelsSampleSad16x8_c)246 TEST_F (SadSatdCFuncTest, WelsSampleSad16x8_c) {
247   for (int i = 0; i < (m_iStrideA << 3); i++)
248     m_pPixSrcA[i] = rand() % 256;
249   for (int i = 0; i < (m_iStrideB << 3); i++)
250     m_pPixSrcB[i] = rand() % 256;
251   uint8_t* pPixA = m_pPixSrcA;
252   uint8_t* pPixB = m_pPixSrcB;
253 
254   int32_t iSumSad = 0;
255   for (int i = 0; i < 8; i++) {
256     for (int j = 0; j < 16; j++)
257       iSumSad += abs (pPixA[j] - pPixB[j]);
258 
259     pPixA += m_iStrideA;
260     pPixB += m_iStrideB;
261   }
262   EXPECT_EQ (WelsSampleSad16x8_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
263 }
264 
TEST_F(SadSatdCFuncTest,WelsSampleSad8x16_c)265 TEST_F (SadSatdCFuncTest, WelsSampleSad8x16_c) {
266   for (int i = 0; i < (m_iStrideA << 4); i++)
267     m_pPixSrcA[i] = rand() % 256;
268   for (int i = 0; i < (m_iStrideB << 4); i++)
269     m_pPixSrcB[i] = rand() % 256;
270   uint8_t* pPixA = m_pPixSrcA;
271   uint8_t* pPixB = m_pPixSrcB;
272 
273   int32_t iSumSad = 0;
274   for (int i = 0; i < 16; i++) {
275     for (int j = 0; j < 8; j++)
276       iSumSad += abs (pPixA[j] - pPixB[j]);
277 
278     pPixA += m_iStrideA;
279     pPixB += m_iStrideB;
280   }
281   EXPECT_EQ (WelsSampleSad8x16_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
282 }
283 
TEST_F(SadSatdCFuncTest,WelsSampleSad16x16_c)284 TEST_F (SadSatdCFuncTest, WelsSampleSad16x16_c) {
285   for (int i = 0; i < (m_iStrideA << 4); i++)
286     m_pPixSrcA[i] = rand() % 256;
287   for (int i = 0; i < (m_iStrideB << 4); i++)
288     m_pPixSrcB[i] = rand() % 256;
289   uint8_t* pPixA = m_pPixSrcA;
290   uint8_t* pPixB = m_pPixSrcB;
291 
292   int32_t iSumSad = 0;
293   for (int i = 0; i < 16; i++) {
294     for (int j = 0; j < 16; j++)
295       iSumSad += abs (pPixA[j] - pPixB[j]);
296 
297     pPixA += m_iStrideA;
298     pPixB += m_iStrideB;
299   }
300   EXPECT_EQ (WelsSampleSad16x16_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), iSumSad);
301 }
302 
TEST_F(SadSatdCFuncTest,WelsSampleSatd4x4_c)303 TEST_F (SadSatdCFuncTest, WelsSampleSatd4x4_c) {
304   for (int i = 0; i < (m_iStrideA << 2); i++)
305     m_pPixSrcA[i] = rand() % 256;
306   for (int i = 0; i < (m_iStrideB << 2); i++)
307     m_pPixSrcB[i] = rand() % 256;
308   uint8_t* pPixA = m_pPixSrcA;
309   uint8_t* pPixB = m_pPixSrcB;
310 
311   int32_t W[16], T[16], Y[16], k = 0;
312   for (int i = 0; i < 4; i++) {
313     for (int j = 0; j < 4; j++)
314       W[k++] = pPixA[j] - pPixB[j];
315     pPixA += m_iStrideA;
316     pPixB += m_iStrideB;
317   }
318 
319   T[0] = W[0] + W[4] + W[8] + W[12];
320   T[1] = W[1] + W[5] + W[9] + W[13];
321   T[2] = W[2] + W[6] + W[10] + W[14];
322   T[3] = W[3] + W[7] + W[11] + W[15];
323 
324   T[4] = W[0] + W[4] - W[8] - W[12];
325   T[5] = W[1] + W[5] - W[9] - W[13];
326   T[6] = W[2] + W[6] - W[10] - W[14];
327   T[7] = W[3] + W[7] - W[11] - W[15];
328 
329   T[8] = W[0] - W[4] - W[8] + W[12];
330   T[9] = W[1] - W[5] - W[9] + W[13];
331   T[10] = W[2] - W[6] - W[10] + W[14];
332   T[11] = W[3] - W[7] - W[11] + W[15];
333 
334   T[12] = W[0] - W[4] + W[8] - W[12];
335   T[13] = W[1] - W[5] + W[9] - W[13];
336   T[14] = W[2] - W[6] + W[10] - W[14];
337   T[15] = W[3] - W[7] + W[11] - W[15];
338 
339   Y[0] = T[0] + T[1] + T[2] + T[3];
340   Y[1] = T[0] + T[1] - T[2] - T[3];
341   Y[2] = T[0] - T[1] - T[2] + T[3];
342   Y[3] = T[0] - T[1] + T[2] - T[3];
343 
344   Y[4] = T[4] + T[5] + T[6] + T[7];
345   Y[5] = T[4] + T[5] - T[6] - T[7];
346   Y[6] = T[4] - T[5] - T[6] + T[7];
347   Y[7] = T[4] - T[5] + T[6] - T[7];
348 
349   Y[8] = T[8] + T[9] + T[10] + T[11];
350   Y[9] = T[8] + T[9] - T[10] - T[11];
351   Y[10] = T[8] - T[9] - T[10] + T[11];
352   Y[11] = T[8] - T[9] + T[10] - T[11];
353 
354   Y[12] = T[12] + T[13] + T[14] + T[15];
355   Y[13] = T[12] + T[13] - T[14] - T[15];
356   Y[14] = T[12] - T[13] - T[14] + T[15];
357   Y[15] = T[12] - T[13] + T[14] - T[15];
358 
359   int32_t iSumSatd = 0;
360   for (int i = 0; i < 16; i++)
361     iSumSatd += abs (Y[i]);
362 
363   EXPECT_EQ (WelsSampleSatd4x4_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), (iSumSatd + 1) >> 1);
364 }
365 
TEST_F(SadSatdCFuncTest,WelsSampleSadFour16x16_c)366 TEST_F (SadSatdCFuncTest, WelsSampleSadFour16x16_c) {
367   for (int i = 0; i < (m_iStrideA << 5); i++)
368     m_pPixSrcA[i] = rand() % 256;
369   for (int i = 0; i < (m_iStrideB << 5); i++)
370     m_pPixSrcB[i] = rand() % 256;
371   uint8_t* pPixA = m_pPixSrcA;
372   uint8_t* pPixB = m_pPixSrcB + m_iStrideB;
373 
374   int32_t iSumSad = 0;
375   for (int i = 0; i < 16; i++) {
376     for (int j = 0; j < 16; j++) {
377       iSumSad += abs (pPixA[j] - pPixB[j - 1]);
378       iSumSad += abs (pPixA[j] - pPixB[j + 1]);
379       iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]);
380       iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]);
381     }
382     pPixA += m_iStrideA;
383     pPixB += m_iStrideB;
384   }
385   WelsSampleSadFour16x16_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad);
386   EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad);
387 }
388 
TEST_F(SadSatdCFuncTest,WelsSampleSadFour16x8_c)389 TEST_F (SadSatdCFuncTest, WelsSampleSadFour16x8_c) {
390   for (int i = 0; i < (m_iStrideA << 5); i++)
391     m_pPixSrcA[i] = rand() % 256;
392   for (int i = 0; i < (m_iStrideB << 5); i++)
393     m_pPixSrcB[i] = rand() % 256;
394   uint8_t* pPixA = m_pPixSrcA;
395   uint8_t* pPixB = m_pPixSrcB + m_iStrideB;
396 
397   int32_t iSumSad = 0;
398   for (int i = 0; i < 8; i++) {
399     for (int j = 0; j < 16; j++) {
400       iSumSad += abs (pPixA[j] - pPixB[j - 1]);
401       iSumSad += abs (pPixA[j] - pPixB[j + 1]);
402       iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]);
403       iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]);
404     }
405     pPixA += m_iStrideA;
406     pPixB += m_iStrideB;
407   }
408 
409   WelsSampleSadFour16x8_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad);
410   EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad);
411 }
412 
TEST_F(SadSatdCFuncTest,WelsSampleSadFour8x16_c)413 TEST_F (SadSatdCFuncTest, WelsSampleSadFour8x16_c) {
414   for (int i = 0; i < (m_iStrideA << 5); i++)
415     m_pPixSrcA[i] = rand() % 256;
416   for (int i = 0; i < (m_iStrideB << 5); i++)
417     m_pPixSrcB[i] = rand() % 256;
418   uint8_t* pPixA = m_pPixSrcA;
419   uint8_t* pPixB = m_pPixSrcB + m_iStrideB;
420 
421   int32_t iSumSad = 0;
422   for (int i = 0; i < 16; i++) {
423     for (int j = 0; j < 8; j++) {
424       iSumSad += abs (pPixA[j] - pPixB[j - 1]);
425       iSumSad += abs (pPixA[j] - pPixB[j + 1]);
426       iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]);
427       iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]);
428     }
429     pPixA += m_iStrideA;
430     pPixB += m_iStrideB;
431   }
432 
433   WelsSampleSadFour8x16_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad);
434   EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad);
435 }
436 
TEST_F(SadSatdCFuncTest,WelsSampleSadFour8x8_c)437 TEST_F (SadSatdCFuncTest, WelsSampleSadFour8x8_c) {
438   for (int i = 0; i < (m_iStrideA << 4); i++)
439     m_pPixSrcA[i] = rand() % 256;
440   for (int i = 0; i < (m_iStrideB << 4); i++)
441     m_pPixSrcB[i] = rand() % 256;
442   uint8_t* pPixA = m_pPixSrcA;
443   uint8_t* pPixB = m_pPixSrcB + m_iStrideB;
444 
445   int32_t iSumSad = 0;
446   for (int i = 0; i < 8; i++) {
447     for (int j = 0; j < 8; j++) {
448       iSumSad += abs (pPixA[j] - pPixB[j - 1]);
449       iSumSad += abs (pPixA[j] - pPixB[j + 1]);
450       iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]);
451       iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]);
452     }
453     pPixA += m_iStrideA;
454     pPixB += m_iStrideB;
455   }
456   WelsSampleSadFour8x8_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad);
457   EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad);
458 }
459 
TEST_F(SadSatdCFuncTest,WelsSampleSadFour4x4_c)460 TEST_F (SadSatdCFuncTest, WelsSampleSadFour4x4_c) {
461   for (int i = 0; i < (m_iStrideA << 3); i++)
462     m_pPixSrcA[i] = rand() % 256;
463   for (int i = 0; i < (m_iStrideB << 3); i++)
464     m_pPixSrcB[i] = rand() % 256;
465   uint8_t* pPixA = m_pPixSrcA;
466   uint8_t* pPixB = m_pPixSrcB + m_iStrideB;
467 
468   int32_t iSumSad = 0;
469   for (int i = 0; i < 4; i++) {
470     for (int j = 0; j < 4; j++) {
471       iSumSad += abs (pPixA[j] - pPixB[j - 1]);
472       iSumSad += abs (pPixA[j] - pPixB[j + 1]);
473       iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]);
474       iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]);
475     }
476     pPixA += m_iStrideA;
477     pPixB += m_iStrideB;
478   }
479   WelsSampleSadFour4x4_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad);
480   EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad);
481 }
482 
TEST_F(SadSatdCFuncTest,WelsSampleSadFour8x4_c)483 TEST_F (SadSatdCFuncTest, WelsSampleSadFour8x4_c) {
484   for (int i = 0; i < (m_iStrideA << 3); i++)
485     m_pPixSrcA[i] = rand() % 256;
486   for (int i = 0; i < (m_iStrideB << 3); i++)
487     m_pPixSrcB[i] = rand() % 256;
488   uint8_t* pPixA = m_pPixSrcA;
489   uint8_t* pPixB = m_pPixSrcB + m_iStrideB;
490 
491   int32_t iSumSad = 0;
492   for (int i = 0; i < 4; i++) {
493     for (int j = 0; j < 8; j++) {
494       iSumSad += abs (pPixA[j] - pPixB[j - 1]);
495       iSumSad += abs (pPixA[j] - pPixB[j + 1]);
496       iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]);
497       iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]);
498     }
499     pPixA += m_iStrideA;
500     pPixB += m_iStrideB;
501   }
502   WelsSampleSadFour8x4_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad);
503   EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad);
504 }
505 
TEST_F(SadSatdCFuncTest,WelsSampleSadFour4x8_c)506 TEST_F (SadSatdCFuncTest, WelsSampleSadFour4x8_c) {
507   for (int i = 0; i < (m_iStrideA << 4); i++)
508     m_pPixSrcA[i] = rand() % 256;
509   for (int i = 0; i < (m_iStrideB << 4); i++)
510     m_pPixSrcB[i] = rand() % 256;
511   uint8_t* pPixA = m_pPixSrcA;
512   uint8_t* pPixB = m_pPixSrcB + m_iStrideB;
513 
514   int32_t iSumSad = 0;
515   for (int i = 0; i < 8; i++) {
516     for (int j = 0; j < 4; j++) {
517       iSumSad += abs (pPixA[j] - pPixB[j - 1]);
518       iSumSad += abs (pPixA[j] - pPixB[j + 1]);
519       iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]);
520       iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]);
521     }
522     pPixA += m_iStrideA;
523     pPixB += m_iStrideB;
524   }
525   WelsSampleSadFour4x8_c (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad);
526   EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad);
527 }
528 
529 class SadSatdAssemblyFuncTest : public testing::Test {
530  public:
SetUp()531   virtual void SetUp() {
532     int32_t iCpuCores = 0;
533     m_uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores);
534     pMemAlign = new CMemoryAlign (16);
535     m_iStrideA = m_iStrideB = PIXEL_STRIDE;
536     m_pPixSrcA = (uint8_t*)pMemAlign->WelsMalloc (m_iStrideA << 5, "Sad_m_pPixSrcA");
537     ASSERT_TRUE (NULL != m_pPixSrcA);
538     m_pPixSrcB = (uint8_t*)pMemAlign->WelsMalloc (m_iStrideB << 5, "Sad_m_pPixSrcB");
539     ASSERT_MEMORY_FAIL2X (m_pPixSrcA, m_pPixSrcB)
540     m_pSad = (int32_t*)pMemAlign->WelsMalloc (4 * sizeof (int32_t), "m_pSad");
541     ASSERT_MEMORY_FAIL3X (m_pPixSrcA, m_pPixSrcB, m_pSad)
542   }
TearDown()543   virtual void TearDown() {
544     pMemAlign->WelsFree (m_pPixSrcA, "Sad_m_pPixSrcA");
545     pMemAlign->WelsFree (m_pPixSrcB, "Sad_m_pPixSrcB");
546     pMemAlign->WelsFree (m_pSad, "m_pSad");
547     delete pMemAlign;
548   }
549  public:
550   uint32_t m_uiCpuFeatureFlag;
551   uint8_t* m_pPixSrcA;
552   uint8_t* m_pPixSrcB;
553   int32_t m_iStrideA;
554   int32_t m_iStrideB;
555   int32_t* m_pSad;
556 
557   CMemoryAlign* pMemAlign;
558 };
559 
560 #define GENERATE_Sad4x4_UT(func, ref, CPUFLAGS) \
561 TEST_F (SadSatdAssemblyFuncTest, func) { \
562   if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
563     return; \
564   for (int i = 0; i < (m_iStrideA << 2); i++) \
565     m_pPixSrcA[i] = rand() % 256; \
566   for (int i = 0; i < (m_iStrideB << 2); i++) \
567     m_pPixSrcB[i] = rand() % 256; \
568   EXPECT_EQ (ref (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), func (m_pPixSrcA, \
569              m_iStrideA, m_pPixSrcB, m_iStrideB)); \
570 }
571 
572 #define GENERATE_Sad8x8_UT(func, ref, CPUFLAGS) \
573 TEST_F (SadSatdAssemblyFuncTest, func) { \
574   if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
575     return; \
576   for (int i = 0; i < (m_iStrideA << 3); i++) \
577     m_pPixSrcA[i] = rand() % 256; \
578   for (int i = 0; i < (m_iStrideB << 3); i++) \
579     m_pPixSrcB[i] = rand() % 256; \
580   EXPECT_EQ (ref (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), func (m_pPixSrcA, \
581              m_iStrideA, m_pPixSrcB, m_iStrideB)); \
582 }
583 
584 #define GENERATE_Sad8x16_UT(func, ref, CPUFLAGS) \
585 TEST_F (SadSatdAssemblyFuncTest, func) { \
586   if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
587     return; \
588   for (int i = 0; i < (m_iStrideA << 4); i++) \
589     m_pPixSrcA[i] = rand() % 256; \
590   for (int i = 0; i < (m_iStrideB << 4); i++) \
591     m_pPixSrcB[i] = rand() % 256; \
592   EXPECT_EQ (ref (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), func (m_pPixSrcA, \
593              m_iStrideA, m_pPixSrcB, m_iStrideB)); \
594 }
595 
596 #define GENERATE_Sad16x8_UT(func, ref, CPUFLAGS) \
597 TEST_F (SadSatdAssemblyFuncTest, func) { \
598   if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
599     return; \
600   for (int i = 0; i < (m_iStrideA << 3); i++) \
601     m_pPixSrcA[i] = rand() % 256; \
602   for (int i = 0; i < (m_iStrideB << 3); i++) \
603     m_pPixSrcB[i] = rand() % 256; \
604   EXPECT_EQ (ref (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), func (m_pPixSrcA, \
605              m_iStrideA, m_pPixSrcB, m_iStrideB)); \
606 }
607 
608 #define GENERATE_Sad16x16_UT(func, ref, CPUFLAGS) \
609 TEST_F (SadSatdAssemblyFuncTest, func) { \
610   if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
611     return; \
612   for (int i = 0; i < (m_iStrideA << 4); i++) \
613     m_pPixSrcA[i] = rand() % 256; \
614   for (int i = 0; i < (m_iStrideB << 4); i++) \
615     m_pPixSrcB[i] = rand() % 256; \
616   EXPECT_EQ (ref (m_pPixSrcA, m_iStrideA, m_pPixSrcB, m_iStrideB), func (m_pPixSrcA, \
617              m_iStrideA, m_pPixSrcB, m_iStrideB)); \
618 }
619 
620 #ifdef X86_ASM
621 GENERATE_Sad4x4_UT (WelsSampleSad4x4_mmx, WelsSampleSad4x4_c, WELS_CPU_MMXEXT)
622 GENERATE_Sad8x8_UT (WelsSampleSad8x8_sse21, WelsSampleSad8x8_c, WELS_CPU_SSE2)
623 GENERATE_Sad8x16_UT (WelsSampleSad8x16_sse2, WelsSampleSad8x16_c, WELS_CPU_SSE2)
624 GENERATE_Sad16x8_UT (WelsSampleSad16x8_sse2, WelsSampleSad16x8_c, WELS_CPU_SSE2)
625 GENERATE_Sad16x16_UT (WelsSampleSad16x16_sse2, WelsSampleSad16x16_c, WELS_CPU_SSE2)
626 
627 GENERATE_Sad4x4_UT (WelsSampleSatd4x4_sse2, WelsSampleSatd4x4_c, WELS_CPU_SSE2)
628 GENERATE_Sad8x8_UT (WelsSampleSatd8x8_sse2, WelsSampleSatd8x8_c, WELS_CPU_SSE2)
629 GENERATE_Sad8x16_UT (WelsSampleSatd8x16_sse2, WelsSampleSatd8x16_c, WELS_CPU_SSE2)
630 GENERATE_Sad16x8_UT (WelsSampleSatd16x8_sse2, WelsSampleSatd16x8_c, WELS_CPU_SSE2)
631 GENERATE_Sad16x16_UT (WelsSampleSatd16x16_sse2, WelsSampleSatd16x16_c, WELS_CPU_SSE2)
632 
633 GENERATE_Sad4x4_UT (WelsSampleSatd4x4_sse41, WelsSampleSatd4x4_c, WELS_CPU_SSE41)
634 GENERATE_Sad8x8_UT (WelsSampleSatd8x8_sse41, WelsSampleSatd8x8_c, WELS_CPU_SSE41)
635 GENERATE_Sad8x16_UT (WelsSampleSatd8x16_sse41, WelsSampleSatd8x16_c, WELS_CPU_SSE41)
636 GENERATE_Sad16x8_UT (WelsSampleSatd16x8_sse41, WelsSampleSatd16x8_c, WELS_CPU_SSE41)
637 GENERATE_Sad16x16_UT (WelsSampleSatd16x16_sse41, WelsSampleSatd16x16_c, WELS_CPU_SSE41)
638 
639 #ifdef HAVE_AVX2
640 GENERATE_Sad8x8_UT (WelsSampleSatd8x8_avx2, WelsSampleSatd8x8_c, WELS_CPU_AVX2)
641 GENERATE_Sad8x16_UT (WelsSampleSatd8x16_avx2, WelsSampleSatd8x16_c, WELS_CPU_AVX2)
642 GENERATE_Sad16x8_UT (WelsSampleSatd16x8_avx2, WelsSampleSatd16x8_c, WELS_CPU_AVX2)
643 GENERATE_Sad16x16_UT (WelsSampleSatd16x16_avx2, WelsSampleSatd16x16_c, WELS_CPU_AVX2)
644 #endif //HAVE_AVX2
645 #endif
646 
647 #ifdef HAVE_NEON
648 GENERATE_Sad4x4_UT (WelsSampleSad4x4_neon, WelsSampleSad4x4_c, WELS_CPU_NEON)
649 GENERATE_Sad8x8_UT (WelsSampleSad8x8_neon, WelsSampleSad8x8_c, WELS_CPU_NEON)
650 GENERATE_Sad8x16_UT (WelsSampleSad8x16_neon, WelsSampleSad8x16_c, WELS_CPU_NEON)
651 GENERATE_Sad16x8_UT (WelsSampleSad16x8_neon, WelsSampleSad16x8_c, WELS_CPU_NEON)
652 GENERATE_Sad16x16_UT (WelsSampleSad16x16_neon, WelsSampleSad16x16_c, WELS_CPU_NEON)
653 
654 GENERATE_Sad4x4_UT (WelsSampleSatd4x4_neon, WelsSampleSatd4x4_c, WELS_CPU_NEON)
655 GENERATE_Sad8x8_UT (WelsSampleSatd8x8_neon, WelsSampleSatd8x8_c, WELS_CPU_NEON)
656 GENERATE_Sad8x16_UT (WelsSampleSatd8x16_neon, WelsSampleSatd8x16_c, WELS_CPU_NEON)
657 GENERATE_Sad16x8_UT (WelsSampleSatd16x8_neon, WelsSampleSatd16x8_c, WELS_CPU_NEON)
658 GENERATE_Sad16x16_UT (WelsSampleSatd16x16_neon, WelsSampleSatd16x16_c, WELS_CPU_NEON)
659 #endif
660 
661 #ifdef HAVE_NEON_AARCH64
662 GENERATE_Sad4x4_UT (WelsSampleSad4x4_AArch64_neon, WelsSampleSad4x4_c, WELS_CPU_NEON)
663 GENERATE_Sad8x8_UT (WelsSampleSad8x8_AArch64_neon, WelsSampleSad8x8_c, WELS_CPU_NEON)
664 GENERATE_Sad8x16_UT (WelsSampleSad8x16_AArch64_neon, WelsSampleSad8x16_c, WELS_CPU_NEON)
665 GENERATE_Sad16x8_UT (WelsSampleSad16x8_AArch64_neon, WelsSampleSad16x8_c, WELS_CPU_NEON)
666 GENERATE_Sad16x16_UT (WelsSampleSad16x16_AArch64_neon, WelsSampleSad16x16_c, WELS_CPU_NEON)
667 
668 GENERATE_Sad4x4_UT (WelsSampleSatd4x4_AArch64_neon, WelsSampleSatd4x4_c, WELS_CPU_NEON)
669 GENERATE_Sad8x8_UT (WelsSampleSatd8x8_AArch64_neon, WelsSampleSatd8x8_c, WELS_CPU_NEON)
670 GENERATE_Sad8x16_UT (WelsSampleSatd8x16_AArch64_neon, WelsSampleSatd8x16_c, WELS_CPU_NEON)
671 GENERATE_Sad16x8_UT (WelsSampleSatd16x8_AArch64_neon, WelsSampleSatd16x8_c, WELS_CPU_NEON)
672 GENERATE_Sad16x16_UT (WelsSampleSatd16x16_AArch64_neon, WelsSampleSatd16x16_c, WELS_CPU_NEON)
673 #endif
674 
675 #ifdef HAVE_MMI
676 GENERATE_Sad4x4_UT (WelsSampleSad4x4_mmi, WelsSampleSad4x4_c, WELS_CPU_MMI)
677 GENERATE_Sad8x8_UT (WelsSampleSad8x8_mmi, WelsSampleSad8x8_c, WELS_CPU_MMI)
678 GENERATE_Sad8x16_UT (WelsSampleSad8x16_mmi, WelsSampleSad8x16_c, WELS_CPU_MMI)
679 GENERATE_Sad16x8_UT (WelsSampleSad16x8_mmi, WelsSampleSad16x8_c, WELS_CPU_MMI)
680 GENERATE_Sad16x16_UT (WelsSampleSad16x16_mmi, WelsSampleSad16x16_c, WELS_CPU_MMI)
681 
682 GENERATE_Sad4x4_UT (WelsSampleSatd4x4_mmi, WelsSampleSatd4x4_c, WELS_CPU_MMI)
683 GENERATE_Sad8x8_UT (WelsSampleSatd8x8_mmi, WelsSampleSatd8x8_c, WELS_CPU_MMI)
684 GENERATE_Sad8x16_UT (WelsSampleSatd8x16_mmi, WelsSampleSatd8x16_c, WELS_CPU_MMI)
685 GENERATE_Sad16x8_UT (WelsSampleSatd16x8_mmi, WelsSampleSatd16x8_c, WELS_CPU_MMI)
686 GENERATE_Sad16x16_UT (WelsSampleSatd16x16_mmi, WelsSampleSatd16x16_c, WELS_CPU_MMI)
687 #endif
688 
689 #ifdef HAVE_LASX
690 GENERATE_Sad4x4_UT (WelsSampleSad4x4_lasx, WelsSampleSad4x4_c, WELS_CPU_LASX)
691 GENERATE_Sad8x8_UT (WelsSampleSad8x8_lasx, WelsSampleSad8x8_c, WELS_CPU_LASX)
692 GENERATE_Sad8x16_UT (WelsSampleSad8x16_lasx, WelsSampleSad8x16_c, WELS_CPU_LASX)
693 GENERATE_Sad16x8_UT (WelsSampleSad16x8_lasx, WelsSampleSad16x8_c, WELS_CPU_LASX)
694 GENERATE_Sad16x16_UT (WelsSampleSad16x16_lasx, WelsSampleSad16x16_c, WELS_CPU_LASX)
695 
696 GENERATE_Sad4x4_UT (WelsSampleSatd4x4_lasx, WelsSampleSatd4x4_c, WELS_CPU_LASX)
697 GENERATE_Sad8x8_UT (WelsSampleSatd8x8_lasx, WelsSampleSatd8x8_c, WELS_CPU_LASX)
698 GENERATE_Sad8x16_UT (WelsSampleSatd8x16_lasx, WelsSampleSatd8x16_c, WELS_CPU_LASX)
699 GENERATE_Sad16x8_UT (WelsSampleSatd16x8_lasx, WelsSampleSatd16x8_c, WELS_CPU_LASX)
700 GENERATE_Sad16x16_UT (WelsSampleSatd16x16_lasx, WelsSampleSatd16x16_c, WELS_CPU_LASX)
701 #endif
702 
703 #define GENERATE_SadFour_UT(func, CPUFLAGS, width, height) \
704 TEST_F (SadSatdAssemblyFuncTest, func) { \
705   if (0 == (m_uiCpuFeatureFlag & CPUFLAGS)) \
706     return; \
707   for (int i = 0; i < (m_iStrideA << 5); i++) \
708     m_pPixSrcA[i] = rand() % 256; \
709   for (int i = 0; i < (m_iStrideB << 5); i++) \
710     m_pPixSrcB[i] = rand() % 256; \
711   uint8_t* pPixA = m_pPixSrcA; \
712   uint8_t* pPixB = m_pPixSrcB + m_iStrideB; \
713   int32_t iSumSad = 0; \
714   for (int i = 0; i < height; i++) { \
715     for (int j = 0; j < width; j++) { \
716       iSumSad += abs (pPixA[j] - pPixB[j - 1]); \
717       iSumSad += abs (pPixA[j] - pPixB[j + 1]); \
718       iSumSad += abs (pPixA[j] - pPixB[j - m_iStrideB]); \
719       iSumSad += abs (pPixA[j] - pPixB[j + m_iStrideB]); \
720     } \
721     pPixA += m_iStrideA; \
722     pPixB += m_iStrideB; \
723   } \
724   func (m_pPixSrcA, m_iStrideA, m_pPixSrcB + m_iStrideB, m_iStrideB, m_pSad); \
725   EXPECT_EQ (m_pSad[0] + m_pSad[1] + m_pSad[2] + m_pSad[3], iSumSad); \
726 }
727 
728 #ifdef X86_ASM
729 GENERATE_SadFour_UT (WelsSampleSadFour4x4_sse2, WELS_CPU_SSE2, 4, 4)
730 GENERATE_SadFour_UT (WelsSampleSadFour8x8_sse2, WELS_CPU_SSE2, 8, 8)
731 GENERATE_SadFour_UT (WelsSampleSadFour8x16_sse2, WELS_CPU_SSE2, 8, 16)
732 GENERATE_SadFour_UT (WelsSampleSadFour16x8_sse2, WELS_CPU_SSE2, 16, 8)
733 GENERATE_SadFour_UT (WelsSampleSadFour16x16_sse2, WELS_CPU_SSE2, 16, 16)
734 #endif
735 
736 #ifdef HAVE_NEON
737 GENERATE_SadFour_UT (WelsSampleSadFour4x4_neon, WELS_CPU_NEON, 4, 4)
738 GENERATE_SadFour_UT (WelsSampleSadFour8x8_neon, WELS_CPU_NEON, 8, 8)
739 GENERATE_SadFour_UT (WelsSampleSadFour8x16_neon, WELS_CPU_NEON, 8, 16)
740 GENERATE_SadFour_UT (WelsSampleSadFour16x8_neon, WELS_CPU_NEON, 16, 8)
741 GENERATE_SadFour_UT (WelsSampleSadFour16x16_neon, WELS_CPU_NEON, 16, 16)
742 #endif
743 
744 #ifdef HAVE_NEON_AARCH64
745 GENERATE_SadFour_UT (WelsSampleSadFour4x4_AArch64_neon, WELS_CPU_NEON, 4, 4)
746 GENERATE_SadFour_UT (WelsSampleSadFour8x8_AArch64_neon, WELS_CPU_NEON, 8, 8)
747 GENERATE_SadFour_UT (WelsSampleSadFour8x16_AArch64_neon, WELS_CPU_NEON, 8, 16)
748 GENERATE_SadFour_UT (WelsSampleSadFour16x8_AArch64_neon, WELS_CPU_NEON, 16, 8)
749 GENERATE_SadFour_UT (WelsSampleSadFour16x16_AArch64_neon, WELS_CPU_NEON, 16, 16)
750 #endif
751 
752 #ifdef HAVE_MMI
753 GENERATE_SadFour_UT (WelsSampleSadFour8x8_mmi, WELS_CPU_MMI, 8, 8)
754 GENERATE_SadFour_UT (WelsSampleSadFour8x16_mmi, WELS_CPU_MMI, 8, 16)
755 GENERATE_SadFour_UT (WelsSampleSadFour16x8_mmi, WELS_CPU_MMI, 16, 8)
756 GENERATE_SadFour_UT (WelsSampleSadFour16x16_mmi, WELS_CPU_MMI, 16, 16)
757 #endif
758 
759 #ifdef HAVE_LASX
760 GENERATE_SadFour_UT (WelsSampleSadFour4x4_lasx, WELS_CPU_LASX, 4, 4)
761 GENERATE_SadFour_UT (WelsSampleSadFour8x8_lasx, WELS_CPU_LASX, 8, 8)
762 GENERATE_SadFour_UT (WelsSampleSadFour8x16_lasx, WELS_CPU_LASX, 8, 16)
763 GENERATE_SadFour_UT (WelsSampleSadFour16x8_lasx, WELS_CPU_LASX, 16, 8)
764 GENERATE_SadFour_UT (WelsSampleSadFour16x16_lasx, WELS_CPU_LASX, 16, 16)
765 #endif
766