1 #include <gtest/gtest.h>
2 #include "cpu.h"
3 #include "ls_defines.h"
4 #include "encode_mb_aux.h"
5 #include "wels_common_basis.h"
6 #include <algorithm>
7 #include <cstddef>
8
9 using namespace WelsEnc;
10
11 #define ALLOC_MEMORY(type, name, num) type* name = (type*)cMemoryAlign.WelsMalloc(num*sizeof(type), #name);
12 #define FREE_MEMORY(name) cMemoryAlign.WelsFree(name, #name);
TEST(EncodeMbAuxTest,TestScan_4x4_ac_c)13 TEST (EncodeMbAuxTest, TestScan_4x4_ac_c) {
14 CMemoryAlign cMemoryAlign (0);
15 ALLOC_MEMORY (int16_t, iLevel, 16);
16 ALLOC_MEMORY (int16_t, iDctA, 16);
17 ALLOC_MEMORY (int16_t, iDctB, 16);
18 for (int i = 0; i < 16; i++) {
19 iDctA[i] = rand() % 256 + 1;
20 iDctB[i] = iDctA[i];
21 }
22 WelsScan4x4Ac_c (iLevel, iDctA);
23 EXPECT_EQ (iLevel[0], iDctB[1]);
24 EXPECT_EQ (iLevel[1], iDctB[4]);
25 EXPECT_EQ (iLevel[2], iDctB[8]);
26 EXPECT_EQ (iLevel[3], iDctB[5]);
27 EXPECT_EQ (iLevel[4], iDctB[2]);
28 EXPECT_EQ (iLevel[5], iDctB[3]);
29 EXPECT_EQ (iLevel[6], iDctB[6]);
30 EXPECT_EQ (iLevel[7], iDctB[9]);
31 EXPECT_EQ (iLevel[8], iDctB[12]);
32 EXPECT_EQ (iLevel[9], iDctB[13]);
33 EXPECT_EQ (iLevel[10], iDctB[10]);
34 EXPECT_EQ (iLevel[11], iDctB[7]);
35 EXPECT_EQ (iLevel[12], iDctB[11]);
36 EXPECT_EQ (iLevel[13], iDctB[14]);
37 EXPECT_EQ (iLevel[14], iDctB[15]);
38 EXPECT_EQ (iLevel[15], 0);
39 FREE_MEMORY (iLevel);
40 FREE_MEMORY (iDctA);
41 FREE_MEMORY (iDctB);
42 }
43
44 #ifdef X86_ASM
TEST(EncodeMbAuxTest,TestScan_4x4_ac_sse2)45 TEST (EncodeMbAuxTest, TestScan_4x4_ac_sse2) {
46 CMemoryAlign cMemoryAlign (0);
47 ALLOC_MEMORY (int16_t, iLevelA, 16);
48 ALLOC_MEMORY (int16_t, iLevelB, 16);
49 ALLOC_MEMORY (int16_t, iDct, 16);
50 for (int i = 0; i < 16; i++) {
51 iDct[i] = rand() % 256 + 1;
52 }
53 WelsScan4x4Ac_c (iLevelA, iDct);
54 WelsScan4x4Ac_sse2 (iLevelB, iDct);
55 for (int j = 0; j < 16; j++)
56 EXPECT_EQ (iLevelA[j], iLevelB[j]);
57 FREE_MEMORY (iLevelA);
58 FREE_MEMORY (iLevelB);
59 FREE_MEMORY (iDct);
60 }
TEST(EncodeMbAuxTest,WelsScan4x4DcAc_sse2)61 TEST (EncodeMbAuxTest, WelsScan4x4DcAc_sse2) {
62 CMemoryAlign cMemoryAlign (0);
63 ALLOC_MEMORY (int16_t, iLevelA, 32);
64 ALLOC_MEMORY (int16_t, iLevelB, 32);
65 ALLOC_MEMORY (int16_t, iDct, 32);
66 for (int i = 0; i < 32; i++)
67 iDct[i] = (rand() & 32767) - 16384;
68 WelsScan4x4DcAc_sse2 (iLevelA, iDct);
69 WelsScan4x4DcAc_c (iLevelB, iDct);
70 for (int i = 0; i < 16; i++)
71 EXPECT_EQ (iLevelA[i], iLevelB[i]);
72 FREE_MEMORY (iLevelA);
73 FREE_MEMORY (iLevelB);
74 FREE_MEMORY (iDct);
75 }
76 #endif
77 #ifdef HAVE_MMI
TEST(EncodeMbAuxTest,WelsScan4x4Ac_mmi)78 TEST (EncodeMbAuxTest, WelsScan4x4Ac_mmi) {
79 CMemoryAlign cMemoryAlign (0);
80 ALLOC_MEMORY (int16_t, iLevelA, 16);
81 ALLOC_MEMORY (int16_t, iLevelB, 16);
82 ALLOC_MEMORY (int16_t, iDct, 16);
83 for (int i = 0; i < 16; i++) {
84 iDct[i] = rand() % 256 + 1;
85 }
86 WelsScan4x4Ac_c (iLevelA, iDct);
87 WelsScan4x4Ac_mmi (iLevelB, iDct);
88 for (int j = 0; j < 16; j++)
89 EXPECT_EQ (iLevelA[j], iLevelB[j]);
90 FREE_MEMORY (iLevelA);
91 FREE_MEMORY (iLevelB);
92 FREE_MEMORY (iDct);
93 }
TEST(EncodeMbAuxTest,WelsScan4x4DcAc_mmi)94 TEST (EncodeMbAuxTest, WelsScan4x4DcAc_mmi) {
95 CMemoryAlign cMemoryAlign (0);
96 ALLOC_MEMORY (int16_t, iLevelA, 32);
97 ALLOC_MEMORY (int16_t, iLevelB, 32);
98 ALLOC_MEMORY (int16_t, iDct, 32);
99 for (int i = 0; i < 32; i++)
100 iDct[i] = (rand() & 32767) - 16384;
101 WelsScan4x4DcAc_mmi (iLevelA, iDct);
102 WelsScan4x4DcAc_c (iLevelB, iDct);
103 for (int i = 0; i < 16; i++)
104 EXPECT_EQ (iLevelA[i], iLevelB[i]);
105 FREE_MEMORY (iLevelA);
106 FREE_MEMORY (iLevelB);
107 FREE_MEMORY (iDct);
108 }
109 #endif
TEST(EncodeMbAuxTest,TestScan_4x4_dcc)110 TEST (EncodeMbAuxTest, TestScan_4x4_dcc) {
111 CMemoryAlign cMemoryAlign (0);
112 ALLOC_MEMORY (int16_t, iLevel, 16);
113 ALLOC_MEMORY (int16_t, iDctA, 16);
114 ALLOC_MEMORY (int16_t, iDctB, 16);
115 for (int i = 0; i < 16; i++)
116 iDctA[i] = iDctB[i] = rand() % 256 + 1;
117 WelsScan4x4Dc (iLevel, iDctA);
118 EXPECT_EQ (iLevel[0], iDctB[0]);
119 EXPECT_EQ (iLevel[1], iDctB[1]);
120 EXPECT_EQ (iLevel[2], iDctB[4]);
121 EXPECT_EQ (iLevel[3], iDctB[8]);
122 EXPECT_EQ (iLevel[4], iDctB[5]);
123 EXPECT_EQ (iLevel[5], iDctB[2]);
124 EXPECT_EQ (iLevel[6], iDctB[3]);
125 EXPECT_EQ (iLevel[7], iDctB[6]);
126 EXPECT_EQ (iLevel[8], iDctB[9]);
127 EXPECT_EQ (iLevel[9], iDctB[12]);
128 EXPECT_EQ (iLevel[10], iDctB[13]);
129 EXPECT_EQ (iLevel[11], iDctB[10]);
130 EXPECT_EQ (iLevel[12], iDctB[7]);
131 EXPECT_EQ (iLevel[13], iDctB[11]);
132 EXPECT_EQ (iLevel[14], iDctB[14]);
133 EXPECT_EQ (iLevel[15], iDctB[15]);
134 FREE_MEMORY (iLevel);
135 FREE_MEMORY (iDctA);
136 FREE_MEMORY (iDctB);
137 }
PixelSubWH(int16_t * iDiff,int iSize,uint8_t * pPix1,int iStride1,uint8_t * pPix2,int iStride2)138 static inline void PixelSubWH (int16_t* iDiff, int iSize, uint8_t* pPix1, int iStride1, uint8_t* pPix2, int iStride2) {
139 int y, x;
140 for (y = 0; y < iSize; y++) {
141 for (x = 0; x < iSize; x++)
142 iDiff[x + y * iSize] = pPix1[x] - pPix2[x];
143 pPix1 += iStride1;
144 pPix2 += iStride2;
145 }
146 }
147
148 #define FENC_STRIDE 16
149 #define FDEC_STRIDE 32
Sub4x4DctAnchor(int16_t iDct[4][4],uint8_t * pPix1,uint8_t * pPix2)150 static void Sub4x4DctAnchor (int16_t iDct[4][4], uint8_t* pPix1, uint8_t* pPix2) {
151 int16_t iDiff[4][4];
152 int16_t tmp[4][4];
153 int i;
154 PixelSubWH ((int16_t*)iDiff, 4, pPix1, FENC_STRIDE, pPix2, FDEC_STRIDE);
155 for (i = 0; i < 4; i++) {
156 const int a03 = iDiff[i][0] + iDiff[i][3];
157 const int a12 = iDiff[i][1] + iDiff[i][2];
158 const int s03 = iDiff[i][0] - iDiff[i][3];
159 const int s12 = iDiff[i][1] - iDiff[i][2];
160 tmp[0][i] = a03 + a12;
161 tmp[1][i] = 2 * s03 + s12;
162 tmp[2][i] = a03 - a12;
163 tmp[3][i] = s03 - 2 * s12;
164 }
165 for (i = 0; i < 4; i++) {
166 const int a03 = tmp[i][0] + tmp[i][3];
167 const int a12 = tmp[i][1] + tmp[i][2];
168 const int s03 = tmp[i][0] - tmp[i][3];
169 const int s12 = tmp[i][1] - tmp[i][2];
170 iDct[i][0] = a03 + a12;
171 iDct[i][1] = 2 * s03 + s12;
172 iDct[i][2] = a03 - a12;
173 iDct[i][3] = s03 - 2 * s12;
174 }
175 }
176
Sub8x8DctAnchor(int16_t iDct[4][4][4],uint8_t * pPix1,uint8_t * pPix2)177 static void Sub8x8DctAnchor (int16_t iDct[4][4][4], uint8_t* pPix1, uint8_t* pPix2) {
178 Sub4x4DctAnchor (iDct[0], &pPix1[0], &pPix2[0]);
179 Sub4x4DctAnchor (iDct[1], &pPix1[4], &pPix2[4]);
180 Sub4x4DctAnchor (iDct[2], &pPix1[4 * FENC_STRIDE + 0], &pPix2[4 * FDEC_STRIDE + 0]);
181 Sub4x4DctAnchor (iDct[3], &pPix1[4 * FENC_STRIDE + 4], &pPix2[4 * FDEC_STRIDE + 4]);
182 }
TestDctT4(PDctFunc func)183 static void TestDctT4 (PDctFunc func) {
184 int16_t iDctRef[4][4];
185 CMemoryAlign cMemoryAlign (0);
186 ALLOC_MEMORY (uint8_t, uiPix1, 16 * FENC_STRIDE);
187 ALLOC_MEMORY (uint8_t, uiPix2, 16 * FDEC_STRIDE);
188 ALLOC_MEMORY (int16_t, iDct, 16);
189 for (int i = 0; i < 4; i++) {
190 for (int j = 0; j < 4; j++) {
191 uiPix1[i * FENC_STRIDE + j] = rand() & 255;
192 uiPix2[i * FDEC_STRIDE + j] = rand() & 255;
193 }
194 }
195 Sub4x4DctAnchor (iDctRef, uiPix1, uiPix2);
196 func (iDct, uiPix1, FENC_STRIDE, uiPix2, FDEC_STRIDE);
197 for (int i = 0; i < 4; i++)
198 for (int j = 0; j < 4; j++)
199 EXPECT_EQ (iDctRef[j][i], iDct[i * 4 + j]);
200 FREE_MEMORY (uiPix1);
201 FREE_MEMORY (uiPix2);
202 FREE_MEMORY (iDct);
203 }
TestDctFourT4(PDctFunc func)204 static void TestDctFourT4 (PDctFunc func) {
205 int16_t iDctRef[4][4][4];
206 CMemoryAlign cMemoryAlign (0);
207 ALLOC_MEMORY (uint8_t, uiPix1, 16 * FENC_STRIDE);
208 ALLOC_MEMORY (uint8_t, uiPix2, 16 * FDEC_STRIDE);
209 ALLOC_MEMORY (int16_t, iDct, 16 * 4);
210 for (int i = 0; i < 8; i++) {
211 for (int j = 0; j < 8; j++) {
212 uiPix1[i * FENC_STRIDE + j] = rand() & 255;
213 uiPix2[i * FDEC_STRIDE + j] = rand() & 255;
214 }
215 }
216 Sub8x8DctAnchor (iDctRef, uiPix1, uiPix2);
217 func (iDct, uiPix1, FENC_STRIDE, uiPix2, FDEC_STRIDE);
218 for (int k = 0; k < 4; k++)
219 for (int i = 0; i < 4; i++)
220 for (int j = 0; j < 4; j++)
221 EXPECT_EQ (iDctRef[k][j][i], iDct[k * 16 + i * 4 + j]);
222 FREE_MEMORY (uiPix1);
223 FREE_MEMORY (uiPix2);
224 FREE_MEMORY (iDct);
225 }
TEST(EncodeMbAuxTest,WelsDctT4_c)226 TEST (EncodeMbAuxTest, WelsDctT4_c) {
227 TestDctT4 (WelsDctT4_c);
228 }
TEST(EncodeMbAuxTest,WelsDctFourT4_c)229 TEST (EncodeMbAuxTest, WelsDctFourT4_c) {
230 TestDctFourT4 (WelsDctFourT4_c);
231 }
232
233 #ifdef X86_ASM
TEST(EncodeMbAuxTest,WelsDctT4_mmx)234 TEST (EncodeMbAuxTest, WelsDctT4_mmx) {
235 TestDctT4 (WelsDctT4_mmx);
236 }
237
TEST(EncodeMbAuxTest,WelsDctT4_sse2)238 TEST (EncodeMbAuxTest, WelsDctT4_sse2) {
239 TestDctT4 (WelsDctT4_sse2);
240 }
241
TEST(EncodeMbAuxTest,WelsDctFourT4_sse2)242 TEST (EncodeMbAuxTest, WelsDctFourT4_sse2) {
243 TestDctFourT4 (WelsDctFourT4_sse2);
244 }
245
246 #ifdef HAVE_AVX2
TEST(EncodeMbAuxTest,WelsDctT4_avx2)247 TEST (EncodeMbAuxTest, WelsDctT4_avx2) {
248 if (WelsCPUFeatureDetect (0) & WELS_CPU_AVX2)
249 TestDctT4 (WelsDctT4_avx2);
250 }
251
TEST(EncodeMbAuxTest,WelsDctFourT4_avx2)252 TEST (EncodeMbAuxTest, WelsDctFourT4_avx2) {
253 if (WelsCPUFeatureDetect (0) & WELS_CPU_AVX2)
254 TestDctFourT4 (WelsDctFourT4_avx2);
255 }
256 #endif //HAVE_AVX2
257
TEST(EncodeMbAuxTest,WelsCalculateSingleCtr4x4_sse2)258 TEST (EncodeMbAuxTest, WelsCalculateSingleCtr4x4_sse2) {
259 CMemoryAlign cMemoryAlign (0);
260 ALLOC_MEMORY (int16_t, iDctC, 16);
261 ALLOC_MEMORY (int16_t, iDctS, 16);
262 for (int i = 0; i < 16; i++)
263 iDctC[i] = iDctS[i] = (rand() & 65535) - 32768;
264 WelsCalculateSingleCtr4x4_c (iDctC);
265 WelsCalculateSingleCtr4x4_sse2 (iDctS);
266 for (int i = 0; i < 16; i++)
267 EXPECT_EQ (iDctC[i], iDctS[i]);
268 FREE_MEMORY (iDctC);
269 FREE_MEMORY (iDctS);
270 }
271 #endif
272 #ifdef HAVE_MMI
TEST(EncodeMbAuxTest,WelsDctT4_mmi)273 TEST (EncodeMbAuxTest, WelsDctT4_mmi) {
274 TestDctT4 (WelsDctT4_mmi);
275 }
276
TEST(EncodeMbAuxTest,WelsDctFourT4_mmi)277 TEST (EncodeMbAuxTest, WelsDctFourT4_mmi) {
278 TestDctFourT4 (WelsDctFourT4_mmi);
279 }
280
TEST(EncodeMbAuxTest,WelsCalculateSingleCtr4x4_mmi)281 TEST (EncodeMbAuxTest, WelsCalculateSingleCtr4x4_mmi) {
282 CMemoryAlign cMemoryAlign (0);
283 ALLOC_MEMORY (int16_t, iDctC, 16);
284 ALLOC_MEMORY (int16_t, iDctS, 16);
285 for (int i = 0; i < 16; i++)
286 iDctC[i] = iDctS[i] = (rand() & 65535) - 32768;
287 WelsCalculateSingleCtr4x4_c (iDctC);
288 WelsCalculateSingleCtr4x4_mmi (iDctS);
289 for (int i = 0; i < 16; i++)
290 EXPECT_EQ (iDctC[i], iDctS[i]);
291 FREE_MEMORY (iDctC);
292 FREE_MEMORY (iDctS);
293 }
294 #endif
295
copy(uint8_t * pDst,int32_t iDStride,uint8_t * pSrc,int32_t iSStride,int32_t iWidth,int32_t iHeight)296 void copy (uint8_t* pDst, int32_t iDStride, uint8_t* pSrc, int32_t iSStride, int32_t iWidth, int32_t iHeight) {
297 for (int i = 0; i < iHeight; i++)
298 memcpy (pDst + i * iDStride, pSrc + i * iSStride, iWidth);
299 }
300
301 #define GENERATE_UT_FOR_COPY(width, height, function) \
302 TEST(EncodeMbAuxTest, function) { \
303 const int iSStride = 64; \
304 const int iDStride = 64; \
305 ENFORCE_STACK_ALIGN_1D (uint8_t, ref_src, iSStride*height, 16); \
306 ENFORCE_STACK_ALIGN_1D (uint8_t, ref_dst, iDStride*height, 16); \
307 ENFORCE_STACK_ALIGN_1D (uint8_t, dst, iDStride*height, 16); \
308 for(int i = 0; i < height; i++) \
309 for(int j = 0; j < width; j++) \
310 ref_src[i*iSStride+j] = rand() & 255; \
311 function(dst, iDStride, ref_src, iSStride); \
312 copy(ref_dst, iDStride, ref_src, iSStride, width, height); \
313 for(int i = 0; i < height; i++) \
314 for(int j = 0; j < width; j++) \
315 EXPECT_EQ(ref_dst[i*iDStride+j], dst[i*iDStride+j]); \
316 }
317
318 GENERATE_UT_FOR_COPY (4, 4, WelsCopy4x4_c);
319 GENERATE_UT_FOR_COPY (8, 4, WelsCopy8x4_c);
320 GENERATE_UT_FOR_COPY (4, 8, WelsCopy4x8_c);
321 GENERATE_UT_FOR_COPY (8, 8, WelsCopy8x8_c);
322 GENERATE_UT_FOR_COPY (8, 16, WelsCopy8x16_c);
323 GENERATE_UT_FOR_COPY (16, 8, WelsCopy16x8_c);
324 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16_c);
325 #ifdef X86_ASM
326 GENERATE_UT_FOR_COPY (16, 8, WelsCopy16x8NotAligned_sse2);
327 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16NotAligned_sse2);
328 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16_sse2);
329 #endif
330 #ifdef HAVE_MMI
331 GENERATE_UT_FOR_COPY (16, 8, WelsCopy16x8NotAligned_mmi);
332 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16NotAligned_mmi);
333 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16_mmi);
334 #endif
335 #ifdef HAVE_MSA
336 GENERATE_UT_FOR_COPY (8, 8, WelsCopy8x8_msa);
337 GENERATE_UT_FOR_COPY (8, 16, WelsCopy8x16_msa);
338 GENERATE_UT_FOR_COPY (16, 8, WelsCopy16x8_msa);
339 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16_msa);
340 #endif
341
342 #ifdef HAVE_LSX
343 GENERATE_UT_FOR_COPY (8, 8, WelsCopy8x8_lsx);
344 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16_lsx);
345 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16NotAligned_lsx);
346 #endif
347
348 namespace {
349
TestGetNoneZeroCount(PGetNoneZeroCountFunc func)350 void TestGetNoneZeroCount (PGetNoneZeroCountFunc func) {
351 ENFORCE_STACK_ALIGN_1D (int16_t, pLevel, 16, 16);
352 const int num_test_runs = 1000;
353 for (int run = 0; run < num_test_runs; run++) {
354 const bool all_zero = run == 0;
355 const bool all_nonzero = run == 1;
356 int result = 0;
357 for (int i = 0; i < 16; i++) {
358 const int r = rand();
359 if (all_zero)
360 pLevel[i] = 0;
361 else if (all_nonzero)
362 pLevel[i] = r % 0xFFFF - 0x8000 ? r % 0xFFFF - 0x8000 : 0x7FFF;
363 else
364 pLevel[i] = (r >> 16 & 1) * ((r & 0xFFFF) - 0x8000);
365 result += pLevel[i] != 0;
366 }
367 const int32_t nnz = func (pLevel);
368 EXPECT_EQ (nnz, result);
369 }
370 }
371
372 } // anon ns.
373
TEST(EncodeMbAuxTest,WelsGetNoneZeroCount_c)374 TEST (EncodeMbAuxTest, WelsGetNoneZeroCount_c) {
375 TestGetNoneZeroCount (WelsGetNoneZeroCount_c);
376 }
377 #ifdef X86_ASM
TEST(EncodeMbAuxTest,WelsGetNoneZeroCount_sse2)378 TEST (EncodeMbAuxTest, WelsGetNoneZeroCount_sse2) {
379 TestGetNoneZeroCount (WelsGetNoneZeroCount_sse2);
380 }
TEST(EncodeMbAuxTest,WelsGetNoneZeroCount_sse42)381 TEST (EncodeMbAuxTest, WelsGetNoneZeroCount_sse42) {
382 if (WelsCPUFeatureDetect (0) & WELS_CPU_SSE42)
383 TestGetNoneZeroCount (WelsGetNoneZeroCount_sse42);
384 }
385 #endif
386 #ifdef HAVE_MMI
TEST(EncodeMbAuxTest,WelsGetNoneZeroCount_mmi)387 TEST (EncodeMbAuxTest, WelsGetNoneZeroCount_mmi) {
388 TestGetNoneZeroCount (WelsGetNoneZeroCount_mmi);
389 }
390 #endif
391 #define WELS_ABS_LC(a) ((sign ^ (int32_t)(a)) - sign)
392 #define NEW_QUANT(pDct, ff, mf) (((ff)+ WELS_ABS_LC(pDct))*(mf)) >>16
393 #define WELS_NEW_QUANT(pDct,ff,mf) WELS_ABS_LC(NEW_QUANT(pDct, ff, mf))
394 namespace {
WelsQuant4x4MaxAnchor(int16_t * pDct,int16_t * ff,int16_t * mf)395 int16_t WelsQuant4x4MaxAnchor (int16_t* pDct, int16_t* ff, int16_t* mf) {
396 int16_t max_abs = 0;
397 for (int i = 0; i < 16; i++) {
398 const int j = i & 0x07;
399 const int32_t sign = WELS_SIGN (pDct[i]);
400 pDct[i] = NEW_QUANT (pDct[i], ff[j], mf[j]);
401 max_abs = std::max(max_abs, pDct[i]);
402 pDct[i] = WELS_ABS_LC (pDct[i]);
403 }
404 return max_abs;
405 }
WelsQuant4x4DcAnchor(int16_t * pDct,int16_t iFF,int16_t iMF)406 void WelsQuant4x4DcAnchor (int16_t* pDct, int16_t iFF, int16_t iMF) {
407 for (int i = 0; i < 16; i++) {
408 const int32_t sign = WELS_SIGN (pDct[i]);
409 pDct[i] = WELS_NEW_QUANT (pDct[i], iFF, iMF);
410 }
411 }
WelsQuantFour4x4Anchor(int16_t * pDct,int16_t * ff,int16_t * mf)412 void WelsQuantFour4x4Anchor (int16_t* pDct, int16_t* ff, int16_t* mf) {
413 for (int i = 0; i < 4; i++)
414 WelsQuant4x4MaxAnchor (pDct + 16 * i, ff, mf);
415 }
WelsQuantFour4x4MaxAnchor(int16_t * pDct,int16_t * ff,int16_t * mf,int16_t * max)416 void WelsQuantFour4x4MaxAnchor (int16_t* pDct, int16_t* ff, int16_t* mf, int16_t* max) {
417 for (int i = 0; i < 4; i++)
418 max[i] = WelsQuant4x4MaxAnchor (pDct + 16 * i, ff, mf);
419 }
TestWelsQuant4x4(PQuantizationFunc func)420 void TestWelsQuant4x4 (PQuantizationFunc func) {
421 const std::size_t f_size = 8;
422 const std::size_t dct_size = 16;
423 CMemoryAlign cMemoryAlign (0);
424 ALLOC_MEMORY (int16_t, ff, f_size);
425 ALLOC_MEMORY (int16_t, mf, f_size);
426 ALLOC_MEMORY (int16_t, iDctC, dct_size);
427 ALLOC_MEMORY (int16_t, iDctS, dct_size);
428 for (std::size_t i = 0; i < f_size; i++) {
429 ff[i] = rand() & 32767;
430 mf[i] = rand() & 32767;
431 }
432 for (std::size_t i = 0; i < dct_size; i++)
433 iDctC[i] = iDctS[i] = (rand() & 65535) - 32768;
434 WelsQuant4x4MaxAnchor (iDctC, ff, mf);
435 func (iDctS, ff, mf);
436 for (std::size_t i = 0; i < dct_size; i++)
437 EXPECT_EQ (iDctC[i], iDctS[i]);
438 FREE_MEMORY (ff);
439 FREE_MEMORY (mf);
440 FREE_MEMORY (iDctC);
441 FREE_MEMORY (iDctS);
442 }
TestWelsQuant4x4Dc(PQuantizationDcFunc func)443 void TestWelsQuant4x4Dc (PQuantizationDcFunc func) {
444 const std::size_t dct_size = 16;
445 const int16_t ff = rand() & 32767;
446 const int16_t mf = rand() & 32767;
447 CMemoryAlign cMemoryAlign (0);
448 ALLOC_MEMORY (int16_t, iDctC, dct_size);
449 ALLOC_MEMORY (int16_t, iDctS, dct_size);
450 for (std::size_t i = 0; i < dct_size; i++)
451 iDctC[i] = iDctS[i] = (rand() & 65535) - 32768;
452 WelsQuant4x4DcAnchor (iDctC, ff, mf);
453 func (iDctS, ff, mf);
454 for (std::size_t i = 0; i < dct_size; i++)
455 EXPECT_EQ (iDctC[i], iDctS[i]);
456 FREE_MEMORY (iDctC);
457 FREE_MEMORY (iDctS);
458 }
TestWelsQuantFour4x4(PQuantizationFunc func)459 void TestWelsQuantFour4x4 (PQuantizationFunc func) {
460 const std::size_t f_size = 8;
461 const std::size_t dct_size = 4 * 16;
462 CMemoryAlign cMemoryAlign (0);
463 ALLOC_MEMORY (int16_t, ff, f_size);
464 ALLOC_MEMORY (int16_t, mf, f_size);
465 ALLOC_MEMORY (int16_t, iDctC, dct_size);
466 ALLOC_MEMORY (int16_t, iDctS, dct_size);
467 for (std::size_t i = 0; i < f_size; i++) {
468 ff[i] = rand() & 32767;
469 mf[i] = rand() & 32767;
470 }
471 for (std::size_t i = 0; i < dct_size; i++)
472 iDctC[i] = iDctS[i] = (rand() & 65535) - 32768;
473 WelsQuantFour4x4Anchor (iDctC, ff, mf);
474 func (iDctS, ff, mf);
475 for (std::size_t i = 0; i < dct_size; i++)
476 EXPECT_EQ (iDctC[i], iDctS[i]);
477 FREE_MEMORY (ff);
478 FREE_MEMORY (mf);
479 FREE_MEMORY (iDctC);
480 FREE_MEMORY (iDctS);
481 }
TestWelsQuantFour4x4Max(PQuantizationMaxFunc func)482 void TestWelsQuantFour4x4Max (PQuantizationMaxFunc func) {
483 CMemoryAlign cMemoryAlign (0);
484 ALLOC_MEMORY (int16_t, ff, 8);
485 ALLOC_MEMORY (int16_t, mf, 8);
486 ALLOC_MEMORY (int16_t, iDctC, 64);
487 ALLOC_MEMORY (int16_t, iDctS, 64);
488 ALLOC_MEMORY (int16_t, iMaxC, 16);
489 ALLOC_MEMORY (int16_t, iMaxS, 16);
490 for (int i = 0; i < 8; i++) {
491 ff[i] = rand() & 32767;
492 mf[i] = rand() & 32767;
493 }
494 for (int i = 0; i < 64; i++)
495 iDctC[i] = iDctS[i] = (rand() & 65535) - 32767;
496 WelsQuantFour4x4MaxAnchor (iDctC, ff, mf, iMaxC);
497 func (iDctS, ff, mf, iMaxS);
498 for (int i = 0; i < 64; i++)
499 EXPECT_EQ (iDctC[i], iDctS[i]);
500 for (int i = 0; i < 4; i++)
501 EXPECT_EQ (iMaxC[i], iMaxS[i]);
502 FREE_MEMORY (ff);
503 FREE_MEMORY (mf);
504 FREE_MEMORY (iDctC);
505 FREE_MEMORY (iDctS);
506 FREE_MEMORY (iMaxC);
507 FREE_MEMORY (iMaxS);
508 }
509 } // anon ns
TEST(EncodeMbAuxTest,WelsQuant4x4_c)510 TEST (EncodeMbAuxTest, WelsQuant4x4_c) {
511 TestWelsQuant4x4 (WelsQuant4x4_c);
512 }
TEST(EncodeMbAuxTest,WelsQuant4x4Dc_c)513 TEST (EncodeMbAuxTest, WelsQuant4x4Dc_c) {
514 TestWelsQuant4x4Dc (WelsQuant4x4Dc_c);
515 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4_c)516 TEST (EncodeMbAuxTest, WelsQuantFour4x4_c) {
517 TestWelsQuantFour4x4 (WelsQuantFour4x4_c);
518 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4Max_c)519 TEST (EncodeMbAuxTest, WelsQuantFour4x4Max_c) {
520 TestWelsQuantFour4x4Max (WelsQuantFour4x4Max_c);
521 }
522 #ifdef X86_ASM
TEST(EncodeMbAuxTest,WelsQuant4x4_sse2)523 TEST (EncodeMbAuxTest, WelsQuant4x4_sse2) {
524 TestWelsQuant4x4 (WelsQuant4x4_sse2);
525 }
TEST(EncodeMbAuxTest,WelsQuant4x4Dc_sse2)526 TEST (EncodeMbAuxTest, WelsQuant4x4Dc_sse2) {
527 TestWelsQuant4x4Dc (WelsQuant4x4Dc_sse2);
528 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4_sse2)529 TEST (EncodeMbAuxTest, WelsQuantFour4x4_sse2) {
530 TestWelsQuantFour4x4 (WelsQuantFour4x4_sse2);
531 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4Max_sse2)532 TEST (EncodeMbAuxTest, WelsQuantFour4x4Max_sse2) {
533 TestWelsQuantFour4x4Max (WelsQuantFour4x4Max_sse2);
534 }
535 #ifdef HAVE_AVX2
TEST(EncodeMbAuxTest,WelsQuant4x4_avx2)536 TEST (EncodeMbAuxTest, WelsQuant4x4_avx2) {
537 if (WelsCPUFeatureDetect (0) & WELS_CPU_AVX2)
538 TestWelsQuant4x4 (WelsQuant4x4_avx2);
539 }
TEST(EncodeMbAuxTest,WelsQuant4x4Dc_avx2)540 TEST (EncodeMbAuxTest, WelsQuant4x4Dc_avx2) {
541 if (WelsCPUFeatureDetect (0) & WELS_CPU_AVX2)
542 TestWelsQuant4x4Dc (WelsQuant4x4Dc_avx2);
543 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4_avx2)544 TEST (EncodeMbAuxTest, WelsQuantFour4x4_avx2) {
545 if (WelsCPUFeatureDetect (0) & WELS_CPU_AVX2)
546 TestWelsQuantFour4x4 (WelsQuantFour4x4_avx2);
547 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4Max_avx2)548 TEST (EncodeMbAuxTest, WelsQuantFour4x4Max_avx2) {
549 if (WelsCPUFeatureDetect (0) & WELS_CPU_AVX2)
550 TestWelsQuantFour4x4Max (WelsQuantFour4x4Max_avx2);
551 }
552 #endif //HAVE_AVX2
553 #endif
554 #ifdef HAVE_MMI
TEST(EncodeMbAuxTest,WelsQuant4x4_mmi)555 TEST (EncodeMbAuxTest, WelsQuant4x4_mmi) {
556 if (WelsCPUFeatureDetect (0) & WELS_CPU_MMI)
557 TestWelsQuant4x4 (WelsQuant4x4_mmi);
558 }
TEST(EncodeMbAuxTest,WelsQuant4x4Dc_mmi)559 TEST (EncodeMbAuxTest, WelsQuant4x4Dc_mmi) {
560 if (WelsCPUFeatureDetect (0) & WELS_CPU_MMI)
561 TestWelsQuant4x4Dc (WelsQuant4x4Dc_mmi);
562 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4_mmi)563 TEST (EncodeMbAuxTest, WelsQuantFour4x4_mmi) {
564 if (WelsCPUFeatureDetect (0) & WELS_CPU_MMI)
565 TestWelsQuantFour4x4 (WelsQuantFour4x4_mmi);
566 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4Max_mmi)567 TEST (EncodeMbAuxTest, WelsQuantFour4x4Max_mmi) {
568 if (WelsCPUFeatureDetect (0) & WELS_CPU_MMI)
569 TestWelsQuantFour4x4Max (WelsQuantFour4x4Max_mmi);
570 }
571 #endif //HAVE_MMI
572
573 #ifdef HAVE_LSX
TEST(EncodeMbAuxTest,WelsQuantFour4x4Max_lsx)574 TEST (EncodeMbAuxTest, WelsQuantFour4x4Max_lsx) {
575 if (WelsCPUFeatureDetect (0) & WELS_CPU_LSX)
576 TestWelsQuantFour4x4Max (WelsQuantFour4x4Max_lsx);
577 }
578 #endif //HAVE_LSX
579
WelsHadamardQuant2x2SkipAnchor(int16_t * rs,int16_t ff,int16_t mf)580 int32_t WelsHadamardQuant2x2SkipAnchor (int16_t* rs, int16_t ff, int16_t mf) {
581 int16_t pDct[4], s[4];
582 int16_t threshold = ((1 << 16) - 1) / mf - ff;
583 s[0] = rs[0] + rs[32];
584 s[1] = rs[0] - rs[32];
585 s[2] = rs[16] + rs[48];
586 s[3] = rs[16] - rs[48];
587 pDct[0] = s[0] + s[2];
588 pDct[1] = s[0] - s[2];
589 pDct[2] = s[1] + s[3];
590 pDct[3] = s[1] - s[3];
591 return ((WELS_ABS (pDct[0]) > threshold) || (WELS_ABS (pDct[1]) > threshold) || (WELS_ABS (pDct[2]) > threshold)
592 || (WELS_ABS (pDct[3]) > threshold));
593 }
594
TEST(EncodeMbAuxTest,WelsHadamardQuant2x2Skip_c)595 TEST (EncodeMbAuxTest, WelsHadamardQuant2x2Skip_c) {
596 int16_t iRS[64];
597 int16_t ff, mf;
598 for (int i = 0; i < 64; i++)
599 iRS[i] = (rand() & 32767) - 16384;
600 ff = rand() & 32767;
601 mf = rand() & 32767;
602 EXPECT_EQ (WelsHadamardQuant2x2Skip_c (iRS, ff, mf), WelsHadamardQuant2x2SkipAnchor (iRS, ff, mf));
603 }
604
WelsHadamardQuant2x2Anchor(int16_t * rs,const int16_t ff,int16_t mf,int16_t * pDct,int16_t * block)605 int32_t WelsHadamardQuant2x2Anchor (int16_t* rs, const int16_t ff, int16_t mf, int16_t* pDct, int16_t* block) {
606 int16_t s[4];
607 int32_t sign, i, dc_nzc = 0;
608
609 s[0] = rs[0] + rs[32];
610 s[1] = rs[0] - rs[32];
611 s[2] = rs[16] + rs[48];
612 s[3] = rs[16] - rs[48];
613
614 rs[0] = 0;
615 rs[16] = 0;
616 rs[32] = 0;
617 rs[48] = 0;
618
619 pDct[0] = s[0] + s[2];
620 pDct[1] = s[0] - s[2];
621 pDct[2] = s[1] + s[3];
622 pDct[3] = s[1] - s[3];
623
624 sign = WELS_SIGN (pDct[0]);
625 pDct[0] = WELS_NEW_QUANT (pDct[0], ff, mf);
626 sign = WELS_SIGN (pDct[1]);
627 pDct[1] = WELS_NEW_QUANT (pDct[1], ff, mf);
628 sign = WELS_SIGN (pDct[2]);
629 pDct[2] = WELS_NEW_QUANT (pDct[2], ff, mf);
630 sign = WELS_SIGN (pDct[3]);
631 pDct[3] = WELS_NEW_QUANT (pDct[3], ff, mf);
632 ST64 (block, LD64 (pDct));
633 for (i = 0; i < 4; i++)
634 dc_nzc += (block[i] != 0);
635 return dc_nzc;
636 }
637
TEST(EncodeMbAuxTest,WelsHadamardQuant2x2_c)638 TEST (EncodeMbAuxTest, WelsHadamardQuant2x2_c) {
639 int16_t iRsC[64], iRsA[64];
640 int16_t ff, mf;
641 int16_t iBlockA[16], iBlockC[16], iDctA[4], iDctC[4];
642 for (int i = 0; i < 64; i++)
643 iRsA[i] = iRsC[i] = (rand() & 32767) - 16384;
644 for (int i = 0; i < 4; i++)
645 iDctA[i] = iDctC[i] = (rand() & 32767) - 16384;
646 ff = rand() & 32767;
647 mf = rand() & 32767;
648
649 int32_t iRetA = WelsHadamardQuant2x2Anchor (iRsA, ff, mf, iDctA, iBlockA);
650 int32_t iRetC = WelsHadamardQuant2x2_c (iRsC, ff, mf, iDctC, iBlockC);
651 EXPECT_EQ (iRetA, iRetC);
652 for (int i = 0; i < 4; i++)
653 EXPECT_EQ (iDctA[i], iDctC[i]);
654 }
655
WelsHadamardT4DcAnchor(int16_t * pLumaDc,int16_t * pDct)656 void WelsHadamardT4DcAnchor (int16_t* pLumaDc, int16_t* pDct) {
657 int32_t p[16], s[4];
658 int32_t i, iIdx;
659 for (i = 0 ; i < 16 ; i += 4) {
660 iIdx = ((i & 0x08) << 4) + ((i & 0x04) << 3);
661 s[0] = pDct[iIdx ] + pDct[iIdx + 80];
662 s[3] = pDct[iIdx ] - pDct[iIdx + 80];
663 s[1] = pDct[iIdx + 16] + pDct[iIdx + 64];
664 s[2] = pDct[iIdx + 16] - pDct[iIdx + 64];
665 p[i ] = s[0] + s[1];
666 p[i + 2] = s[0] - s[1];
667 p[i + 1] = s[3] + s[2];
668 p[i + 3] = s[3] - s[2];
669 }
670 for (i = 0 ; i < 4 ; i ++) {
671 s[0] = p[i ] + p[i + 12];
672 s[3] = p[i ] - p[i + 12];
673 s[1] = p[i + 4] + p[i + 8];
674 s[2] = p[i + 4] - p[i + 8];
675 pLumaDc[i ] = WELS_CLIP3 ((s[0] + s[1] + 1) >> 1, -32768, 32767);
676 pLumaDc[i + 8 ] = WELS_CLIP3 ((s[0] - s[1] + 1) >> 1, -32768, 32767);
677 pLumaDc[i + 4 ] = WELS_CLIP3 ((s[3] + s[2] + 1) >> 1, -32768, 32767);
678 pLumaDc[i + 12] = WELS_CLIP3 ((s[3] - s[2] + 1) >> 1, -32768, 32767);
679 }
680 }
TEST(EncodeMbAuxTest,WelsHadamardT4Dc_c)681 TEST (EncodeMbAuxTest, WelsHadamardT4Dc_c) {
682 CMemoryAlign cMemoryAlign (0);
683 ALLOC_MEMORY (int16_t, iDct, 128 * 16);
684 ALLOC_MEMORY (int16_t, iLumaDcR, 16);
685 ALLOC_MEMORY (int16_t, iLumaDcC, 16);
686 for (int i = 0; i < 128 * 16; i++)
687 iDct[i] = (rand() & 32767) - 16384;
688 WelsHadamardT4DcAnchor (iLumaDcR, iDct);
689 WelsHadamardT4Dc_c (iLumaDcC, iDct);
690 for (int i = 0; i < 16; i++)
691 EXPECT_EQ (iLumaDcR[i], iLumaDcC[i]);
692 FREE_MEMORY (iDct);
693 FREE_MEMORY (iLumaDcR);
694 FREE_MEMORY (iLumaDcC);
695 }
696 #ifdef X86_ASM
TEST(EncodeMbAuxTest,WelsHadamardT4Dc_sse2)697 TEST (EncodeMbAuxTest, WelsHadamardT4Dc_sse2) {
698 CMemoryAlign cMemoryAlign (0);
699 ALLOC_MEMORY (int16_t, iDct, 128 * 16);
700 ALLOC_MEMORY (int16_t, iLumaDcC, 16);
701 ALLOC_MEMORY (int16_t, iLumaDcS, 16);
702 for (int i = 0; i < 128 * 16; i++)
703 iDct[i] = (rand() & 32767) - 16384;
704 WelsHadamardT4Dc_c (iLumaDcC, iDct);
705 WelsHadamardT4Dc_sse2 (iLumaDcS, iDct);
706 for (int i = 0; i < 16; i++)
707 EXPECT_EQ (iLumaDcC[i], iLumaDcS[i]);
708 FREE_MEMORY (iDct);
709 FREE_MEMORY (iLumaDcC);
710 FREE_MEMORY (iLumaDcS);
711 }
712 #endif
713 #ifdef HAVE_MMI
TEST(EncodeMbAuxTest,WelsHadamardT4Dc_mmi)714 TEST (EncodeMbAuxTest, WelsHadamardT4Dc_mmi) {
715 CMemoryAlign cMemoryAlign (0);
716 ALLOC_MEMORY (int16_t, iDct, 128 * 16);
717 ALLOC_MEMORY (int16_t, iLumaDcC, 16);
718 ALLOC_MEMORY (int16_t, iLumaDcS, 16);
719 for (int i = 0; i < 128 * 16; i++)
720 iDct[i] = (rand() & 32767) - 16384;
721 WelsHadamardT4Dc_c (iLumaDcC, iDct);
722 WelsHadamardT4Dc_mmi (iLumaDcS, iDct);
723 for (int i = 0; i < 16; i++)
724 EXPECT_EQ (iLumaDcC[i], iLumaDcS[i]);
725 FREE_MEMORY (iDct);
726 FREE_MEMORY (iLumaDcC);
727 FREE_MEMORY (iLumaDcS);
728 }
729 #endif
730