• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <gtest/gtest.h>
2 #include "cpu.h"
3 #include "ls_defines.h"
4 #include "encode_mb_aux.h"
5 #include "wels_common_basis.h"
6 #include <algorithm>
7 #include <cstddef>
8 
9 using namespace WelsEnc;
10 
11 #define ALLOC_MEMORY(type, name, num) type* name = (type*)cMemoryAlign.WelsMalloc(num*sizeof(type), #name);
12 #define FREE_MEMORY(name) cMemoryAlign.WelsFree(name, #name);
TEST(EncodeMbAuxTest,TestScan_4x4_ac_c)13 TEST (EncodeMbAuxTest, TestScan_4x4_ac_c) {
14   CMemoryAlign cMemoryAlign (0);
15   ALLOC_MEMORY (int16_t, iLevel, 16);
16   ALLOC_MEMORY (int16_t, iDctA, 16);
17   ALLOC_MEMORY (int16_t, iDctB, 16);
18   for (int i = 0; i < 16; i++) {
19     iDctA[i] = rand() % 256 + 1;
20     iDctB[i] = iDctA[i];
21   }
22   WelsScan4x4Ac_c (iLevel, iDctA);
23   EXPECT_EQ (iLevel[0], iDctB[1]);
24   EXPECT_EQ (iLevel[1], iDctB[4]);
25   EXPECT_EQ (iLevel[2], iDctB[8]);
26   EXPECT_EQ (iLevel[3], iDctB[5]);
27   EXPECT_EQ (iLevel[4], iDctB[2]);
28   EXPECT_EQ (iLevel[5], iDctB[3]);
29   EXPECT_EQ (iLevel[6], iDctB[6]);
30   EXPECT_EQ (iLevel[7], iDctB[9]);
31   EXPECT_EQ (iLevel[8], iDctB[12]);
32   EXPECT_EQ (iLevel[9], iDctB[13]);
33   EXPECT_EQ (iLevel[10], iDctB[10]);
34   EXPECT_EQ (iLevel[11], iDctB[7]);
35   EXPECT_EQ (iLevel[12], iDctB[11]);
36   EXPECT_EQ (iLevel[13], iDctB[14]);
37   EXPECT_EQ (iLevel[14], iDctB[15]);
38   EXPECT_EQ (iLevel[15], 0);
39   FREE_MEMORY (iLevel);
40   FREE_MEMORY (iDctA);
41   FREE_MEMORY (iDctB);
42 }
43 
44 #ifdef X86_ASM
TEST(EncodeMbAuxTest,TestScan_4x4_ac_sse2)45 TEST (EncodeMbAuxTest, TestScan_4x4_ac_sse2) {
46   CMemoryAlign cMemoryAlign (0);
47   ALLOC_MEMORY (int16_t, iLevelA, 16);
48   ALLOC_MEMORY (int16_t, iLevelB, 16);
49   ALLOC_MEMORY (int16_t, iDct, 16);
50   for (int i = 0; i < 16; i++) {
51     iDct[i] = rand() % 256 + 1;
52   }
53   WelsScan4x4Ac_c (iLevelA, iDct);
54   WelsScan4x4Ac_sse2 (iLevelB, iDct);
55   for (int j = 0; j < 16; j++)
56     EXPECT_EQ (iLevelA[j], iLevelB[j]);
57   FREE_MEMORY (iLevelA);
58   FREE_MEMORY (iLevelB);
59   FREE_MEMORY (iDct);
60 }
TEST(EncodeMbAuxTest,WelsScan4x4DcAc_sse2)61 TEST (EncodeMbAuxTest, WelsScan4x4DcAc_sse2) {
62   CMemoryAlign cMemoryAlign (0);
63   ALLOC_MEMORY (int16_t, iLevelA, 32);
64   ALLOC_MEMORY (int16_t, iLevelB, 32);
65   ALLOC_MEMORY (int16_t, iDct, 32);
66   for (int i = 0; i < 32; i++)
67     iDct[i] = (rand() & 32767) - 16384;
68   WelsScan4x4DcAc_sse2 (iLevelA, iDct);
69   WelsScan4x4DcAc_c (iLevelB, iDct);
70   for (int i = 0; i < 16; i++)
71     EXPECT_EQ (iLevelA[i], iLevelB[i]);
72   FREE_MEMORY (iLevelA);
73   FREE_MEMORY (iLevelB);
74   FREE_MEMORY (iDct);
75 }
76 #endif
77 #ifdef HAVE_MMI
TEST(EncodeMbAuxTest,WelsScan4x4Ac_mmi)78 TEST (EncodeMbAuxTest, WelsScan4x4Ac_mmi) {
79   CMemoryAlign cMemoryAlign (0);
80   ALLOC_MEMORY (int16_t, iLevelA, 16);
81   ALLOC_MEMORY (int16_t, iLevelB, 16);
82   ALLOC_MEMORY (int16_t, iDct, 16);
83   for (int i = 0; i < 16; i++) {
84     iDct[i] = rand() % 256 + 1;
85   }
86   WelsScan4x4Ac_c (iLevelA, iDct);
87   WelsScan4x4Ac_mmi (iLevelB, iDct);
88   for (int j = 0; j < 16; j++)
89     EXPECT_EQ (iLevelA[j], iLevelB[j]);
90   FREE_MEMORY (iLevelA);
91   FREE_MEMORY (iLevelB);
92   FREE_MEMORY (iDct);
93 }
TEST(EncodeMbAuxTest,WelsScan4x4DcAc_mmi)94 TEST (EncodeMbAuxTest, WelsScan4x4DcAc_mmi) {
95   CMemoryAlign cMemoryAlign (0);
96   ALLOC_MEMORY (int16_t, iLevelA, 32);
97   ALLOC_MEMORY (int16_t, iLevelB, 32);
98   ALLOC_MEMORY (int16_t, iDct, 32);
99   for (int i = 0; i < 32; i++)
100     iDct[i] = (rand() & 32767) - 16384;
101   WelsScan4x4DcAc_mmi (iLevelA, iDct);
102   WelsScan4x4DcAc_c (iLevelB, iDct);
103   for (int i = 0; i < 16; i++)
104     EXPECT_EQ (iLevelA[i], iLevelB[i]);
105   FREE_MEMORY (iLevelA);
106   FREE_MEMORY (iLevelB);
107   FREE_MEMORY (iDct);
108 }
109 #endif
TEST(EncodeMbAuxTest,TestScan_4x4_dcc)110 TEST (EncodeMbAuxTest, TestScan_4x4_dcc) {
111   CMemoryAlign cMemoryAlign (0);
112   ALLOC_MEMORY (int16_t, iLevel, 16);
113   ALLOC_MEMORY (int16_t, iDctA, 16);
114   ALLOC_MEMORY (int16_t, iDctB, 16);
115   for (int i = 0; i < 16; i++)
116     iDctA[i] = iDctB[i] = rand() % 256 + 1;
117   WelsScan4x4Dc (iLevel, iDctA);
118   EXPECT_EQ (iLevel[0], iDctB[0]);
119   EXPECT_EQ (iLevel[1], iDctB[1]);
120   EXPECT_EQ (iLevel[2], iDctB[4]);
121   EXPECT_EQ (iLevel[3], iDctB[8]);
122   EXPECT_EQ (iLevel[4], iDctB[5]);
123   EXPECT_EQ (iLevel[5], iDctB[2]);
124   EXPECT_EQ (iLevel[6], iDctB[3]);
125   EXPECT_EQ (iLevel[7], iDctB[6]);
126   EXPECT_EQ (iLevel[8], iDctB[9]);
127   EXPECT_EQ (iLevel[9], iDctB[12]);
128   EXPECT_EQ (iLevel[10], iDctB[13]);
129   EXPECT_EQ (iLevel[11], iDctB[10]);
130   EXPECT_EQ (iLevel[12], iDctB[7]);
131   EXPECT_EQ (iLevel[13], iDctB[11]);
132   EXPECT_EQ (iLevel[14], iDctB[14]);
133   EXPECT_EQ (iLevel[15], iDctB[15]);
134   FREE_MEMORY (iLevel);
135   FREE_MEMORY (iDctA);
136   FREE_MEMORY (iDctB);
137 }
PixelSubWH(int16_t * iDiff,int iSize,uint8_t * pPix1,int iStride1,uint8_t * pPix2,int iStride2)138 static inline void PixelSubWH (int16_t* iDiff, int iSize, uint8_t* pPix1, int iStride1, uint8_t* pPix2, int iStride2) {
139   int y, x;
140   for (y = 0; y < iSize; y++) {
141     for (x = 0; x < iSize; x++)
142       iDiff[x + y * iSize] = pPix1[x] - pPix2[x];
143     pPix1 += iStride1;
144     pPix2 += iStride2;
145   }
146 }
147 
148 #define FENC_STRIDE 16
149 #define FDEC_STRIDE 32
Sub4x4DctAnchor(int16_t iDct[4][4],uint8_t * pPix1,uint8_t * pPix2)150 static void Sub4x4DctAnchor (int16_t iDct[4][4], uint8_t* pPix1, uint8_t* pPix2) {
151   int16_t iDiff[4][4];
152   int16_t tmp[4][4];
153   int i;
154   PixelSubWH ((int16_t*)iDiff, 4, pPix1, FENC_STRIDE, pPix2, FDEC_STRIDE);
155   for (i = 0; i < 4; i++) {
156     const int a03 = iDiff[i][0] + iDiff[i][3];
157     const int a12 = iDiff[i][1] + iDiff[i][2];
158     const int s03 = iDiff[i][0] - iDiff[i][3];
159     const int s12 = iDiff[i][1] - iDiff[i][2];
160     tmp[0][i] =   a03 +   a12;
161     tmp[1][i] = 2 * s03 +   s12;
162     tmp[2][i] =   a03 -   a12;
163     tmp[3][i] =   s03 - 2 * s12;
164   }
165   for (i = 0; i < 4; i++) {
166     const int a03 = tmp[i][0] + tmp[i][3];
167     const int a12 = tmp[i][1] + tmp[i][2];
168     const int s03 = tmp[i][0] - tmp[i][3];
169     const int s12 = tmp[i][1] - tmp[i][2];
170     iDct[i][0] =   a03 +   a12;
171     iDct[i][1] = 2 * s03 +   s12;
172     iDct[i][2] =   a03 -   a12;
173     iDct[i][3] =   s03 - 2 * s12;
174   }
175 }
176 
Sub8x8DctAnchor(int16_t iDct[4][4][4],uint8_t * pPix1,uint8_t * pPix2)177 static void Sub8x8DctAnchor (int16_t iDct[4][4][4], uint8_t* pPix1, uint8_t* pPix2) {
178   Sub4x4DctAnchor (iDct[0], &pPix1[0], &pPix2[0]);
179   Sub4x4DctAnchor (iDct[1], &pPix1[4], &pPix2[4]);
180   Sub4x4DctAnchor (iDct[2], &pPix1[4 * FENC_STRIDE + 0], &pPix2[4 * FDEC_STRIDE + 0]);
181   Sub4x4DctAnchor (iDct[3], &pPix1[4 * FENC_STRIDE + 4], &pPix2[4 * FDEC_STRIDE + 4]);
182 }
TestDctT4(PDctFunc func)183 static void TestDctT4 (PDctFunc func) {
184   int16_t iDctRef[4][4];
185   CMemoryAlign cMemoryAlign (0);
186   ALLOC_MEMORY (uint8_t, uiPix1, 16 * FENC_STRIDE);
187   ALLOC_MEMORY (uint8_t, uiPix2, 16 * FDEC_STRIDE);
188   ALLOC_MEMORY (int16_t, iDct, 16);
189   for (int i = 0; i < 4; i++) {
190     for (int j = 0; j < 4; j++) {
191       uiPix1[i * FENC_STRIDE + j] = rand() & 255;
192       uiPix2[i * FDEC_STRIDE + j] = rand() & 255;
193     }
194   }
195   Sub4x4DctAnchor (iDctRef, uiPix1, uiPix2);
196   func (iDct, uiPix1, FENC_STRIDE, uiPix2, FDEC_STRIDE);
197   for (int i = 0; i < 4; i++)
198     for (int j = 0; j < 4; j++)
199       EXPECT_EQ (iDctRef[j][i], iDct[i * 4 + j]);
200   FREE_MEMORY (uiPix1);
201   FREE_MEMORY (uiPix2);
202   FREE_MEMORY (iDct);
203 }
TestDctFourT4(PDctFunc func)204 static void TestDctFourT4 (PDctFunc func) {
205   int16_t iDctRef[4][4][4];
206   CMemoryAlign cMemoryAlign (0);
207   ALLOC_MEMORY (uint8_t, uiPix1, 16 * FENC_STRIDE);
208   ALLOC_MEMORY (uint8_t, uiPix2, 16 * FDEC_STRIDE);
209   ALLOC_MEMORY (int16_t, iDct, 16 * 4);
210   for (int i = 0; i < 8; i++) {
211     for (int j = 0; j < 8; j++) {
212       uiPix1[i * FENC_STRIDE + j] = rand() & 255;
213       uiPix2[i * FDEC_STRIDE + j] = rand() & 255;
214     }
215   }
216   Sub8x8DctAnchor (iDctRef, uiPix1, uiPix2);
217   func (iDct, uiPix1, FENC_STRIDE, uiPix2, FDEC_STRIDE);
218   for (int k = 0; k < 4; k++)
219     for (int i = 0; i < 4; i++)
220       for (int j = 0; j < 4; j++)
221         EXPECT_EQ (iDctRef[k][j][i], iDct[k * 16 + i * 4 + j]);
222   FREE_MEMORY (uiPix1);
223   FREE_MEMORY (uiPix2);
224   FREE_MEMORY (iDct);
225 }
TEST(EncodeMbAuxTest,WelsDctT4_c)226 TEST (EncodeMbAuxTest, WelsDctT4_c) {
227   TestDctT4 (WelsDctT4_c);
228 }
TEST(EncodeMbAuxTest,WelsDctFourT4_c)229 TEST (EncodeMbAuxTest, WelsDctFourT4_c) {
230   TestDctFourT4 (WelsDctFourT4_c);
231 }
232 
233 #ifdef X86_ASM
TEST(EncodeMbAuxTest,WelsDctT4_mmx)234 TEST (EncodeMbAuxTest, WelsDctT4_mmx) {
235   TestDctT4 (WelsDctT4_mmx);
236 }
237 
TEST(EncodeMbAuxTest,WelsDctT4_sse2)238 TEST (EncodeMbAuxTest, WelsDctT4_sse2) {
239   TestDctT4 (WelsDctT4_sse2);
240 }
241 
TEST(EncodeMbAuxTest,WelsDctFourT4_sse2)242 TEST (EncodeMbAuxTest, WelsDctFourT4_sse2) {
243   TestDctFourT4 (WelsDctFourT4_sse2);
244 }
245 
246 #ifdef HAVE_AVX2
TEST(EncodeMbAuxTest,WelsDctT4_avx2)247 TEST (EncodeMbAuxTest, WelsDctT4_avx2) {
248   if (WelsCPUFeatureDetect (0) & WELS_CPU_AVX2)
249     TestDctT4 (WelsDctT4_avx2);
250 }
251 
TEST(EncodeMbAuxTest,WelsDctFourT4_avx2)252 TEST (EncodeMbAuxTest, WelsDctFourT4_avx2) {
253   if (WelsCPUFeatureDetect (0) & WELS_CPU_AVX2)
254     TestDctFourT4 (WelsDctFourT4_avx2);
255 }
256 #endif //HAVE_AVX2
257 
TEST(EncodeMbAuxTest,WelsCalculateSingleCtr4x4_sse2)258 TEST (EncodeMbAuxTest, WelsCalculateSingleCtr4x4_sse2) {
259   CMemoryAlign cMemoryAlign (0);
260   ALLOC_MEMORY (int16_t, iDctC, 16);
261   ALLOC_MEMORY (int16_t, iDctS, 16);
262   for (int i = 0; i < 16; i++)
263     iDctC[i] = iDctS[i] = (rand() & 65535) - 32768;
264   WelsCalculateSingleCtr4x4_c (iDctC);
265   WelsCalculateSingleCtr4x4_sse2 (iDctS);
266   for (int i = 0; i < 16; i++)
267     EXPECT_EQ (iDctC[i], iDctS[i]);
268   FREE_MEMORY (iDctC);
269   FREE_MEMORY (iDctS);
270 }
271 #endif
272 #ifdef HAVE_MMI
TEST(EncodeMbAuxTest,WelsDctT4_mmi)273 TEST (EncodeMbAuxTest, WelsDctT4_mmi) {
274   TestDctT4 (WelsDctT4_mmi);
275 }
276 
TEST(EncodeMbAuxTest,WelsDctFourT4_mmi)277 TEST (EncodeMbAuxTest, WelsDctFourT4_mmi) {
278   TestDctFourT4 (WelsDctFourT4_mmi);
279 }
280 
TEST(EncodeMbAuxTest,WelsCalculateSingleCtr4x4_mmi)281 TEST (EncodeMbAuxTest, WelsCalculateSingleCtr4x4_mmi) {
282   CMemoryAlign cMemoryAlign (0);
283   ALLOC_MEMORY (int16_t, iDctC, 16);
284   ALLOC_MEMORY (int16_t, iDctS, 16);
285   for (int i = 0; i < 16; i++)
286     iDctC[i] = iDctS[i] = (rand() & 65535) - 32768;
287   WelsCalculateSingleCtr4x4_c (iDctC);
288   WelsCalculateSingleCtr4x4_mmi (iDctS);
289   for (int i = 0; i < 16; i++)
290     EXPECT_EQ (iDctC[i], iDctS[i]);
291   FREE_MEMORY (iDctC);
292   FREE_MEMORY (iDctS);
293 }
294 #endif
295 #ifdef HAVE_LASX
TEST(EncodeMbAuxTest,WelsDctT4_lasx)296 TEST (EncodeMbAuxTest, WelsDctT4_lasx) {
297   TestDctT4 (WelsDctT4_lasx);
298 }
299 
TEST(EncodeMbAuxTest,WelsDctFourT4_lasx)300 TEST (EncodeMbAuxTest, WelsDctFourT4_lasx) {
301   TestDctFourT4 (WelsDctFourT4_lasx);
302 }
303 #endif
304 
copy(uint8_t * pDst,int32_t iDStride,uint8_t * pSrc,int32_t iSStride,int32_t iWidth,int32_t iHeight)305 void copy (uint8_t* pDst, int32_t iDStride, uint8_t* pSrc, int32_t iSStride, int32_t iWidth, int32_t iHeight) {
306   for (int i = 0; i < iHeight; i++)
307     memcpy (pDst + i * iDStride, pSrc + i * iSStride, iWidth);
308 }
309 
310 #define GENERATE_UT_FOR_COPY(width, height, function) \
311 TEST(EncodeMbAuxTest, function) { \
312   const int iSStride = 64;  \
313   const int iDStride = 64;  \
314   ENFORCE_STACK_ALIGN_1D (uint8_t, ref_src, iSStride*height, 16); \
315   ENFORCE_STACK_ALIGN_1D (uint8_t, ref_dst, iDStride*height, 16); \
316   ENFORCE_STACK_ALIGN_1D (uint8_t, dst, iDStride*height, 16); \
317   for(int i = 0; i < height; i++) \
318     for(int j = 0; j < width; j++) \
319       ref_src[i*iSStride+j] = rand() & 255; \
320   function(dst, iDStride, ref_src, iSStride); \
321   copy(ref_dst, iDStride, ref_src, iSStride, width, height); \
322   for(int i = 0; i < height; i++) \
323     for(int j = 0; j < width; j++) \
324       EXPECT_EQ(ref_dst[i*iDStride+j], dst[i*iDStride+j]); \
325 }
326 
327 GENERATE_UT_FOR_COPY (4, 4, WelsCopy4x4_c);
328 GENERATE_UT_FOR_COPY (8, 4, WelsCopy8x4_c);
329 GENERATE_UT_FOR_COPY (4, 8, WelsCopy4x8_c);
330 GENERATE_UT_FOR_COPY (8, 8, WelsCopy8x8_c);
331 GENERATE_UT_FOR_COPY (8, 16, WelsCopy8x16_c);
332 GENERATE_UT_FOR_COPY (16, 8, WelsCopy16x8_c);
333 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16_c);
334 #ifdef X86_ASM
335 GENERATE_UT_FOR_COPY (16, 8, WelsCopy16x8NotAligned_sse2);
336 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16NotAligned_sse2);
337 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16_sse2);
338 #endif
339 #ifdef HAVE_MMI
340 GENERATE_UT_FOR_COPY (16, 8, WelsCopy16x8NotAligned_mmi);
341 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16NotAligned_mmi);
342 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16_mmi);
343 #endif
344 #ifdef HAVE_MSA
345 GENERATE_UT_FOR_COPY (8, 8, WelsCopy8x8_msa);
346 GENERATE_UT_FOR_COPY (8, 16, WelsCopy8x16_msa);
347 GENERATE_UT_FOR_COPY (16, 8, WelsCopy16x8_msa);
348 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16_msa);
349 #endif
350 
351 #ifdef HAVE_LSX
352 GENERATE_UT_FOR_COPY (8, 8, WelsCopy8x8_lsx);
353 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16_lsx);
354 GENERATE_UT_FOR_COPY (16, 16, WelsCopy16x16NotAligned_lsx);
355 #endif
356 
357 namespace {
358 
TestGetNoneZeroCount(PGetNoneZeroCountFunc func)359 void TestGetNoneZeroCount (PGetNoneZeroCountFunc func) {
360   ENFORCE_STACK_ALIGN_1D (int16_t, pLevel, 16, 16);
361   const int num_test_runs = 1000;
362   for (int run = 0; run < num_test_runs; run++) {
363     const bool all_zero = run == 0;
364     const bool all_nonzero = run == 1;
365     int result = 0;
366     for (int i = 0; i < 16; i++) {
367       const int r = rand();
368       if (all_zero)
369         pLevel[i] = 0;
370       else if (all_nonzero)
371         pLevel[i] = r % 0xFFFF - 0x8000 ? r % 0xFFFF - 0x8000 : 0x7FFF;
372       else
373         pLevel[i] = (r >> 16 & 1) * ((r & 0xFFFF) - 0x8000);
374       result += pLevel[i] != 0;
375     }
376     const int32_t nnz = func (pLevel);
377     EXPECT_EQ (nnz, result);
378   }
379 }
380 
381 } // anon ns.
382 
TEST(EncodeMbAuxTest,WelsGetNoneZeroCount_c)383 TEST (EncodeMbAuxTest, WelsGetNoneZeroCount_c) {
384   TestGetNoneZeroCount (WelsGetNoneZeroCount_c);
385 }
386 #ifdef X86_ASM
TEST(EncodeMbAuxTest,WelsGetNoneZeroCount_sse2)387 TEST (EncodeMbAuxTest, WelsGetNoneZeroCount_sse2) {
388   TestGetNoneZeroCount (WelsGetNoneZeroCount_sse2);
389 }
TEST(EncodeMbAuxTest,WelsGetNoneZeroCount_sse42)390 TEST (EncodeMbAuxTest, WelsGetNoneZeroCount_sse42) {
391   if (WelsCPUFeatureDetect (0) & WELS_CPU_SSE42)
392     TestGetNoneZeroCount (WelsGetNoneZeroCount_sse42);
393 }
394 #endif
395 #ifdef HAVE_MMI
TEST(EncodeMbAuxTest,WelsGetNoneZeroCount_mmi)396 TEST (EncodeMbAuxTest, WelsGetNoneZeroCount_mmi) {
397   TestGetNoneZeroCount (WelsGetNoneZeroCount_mmi);
398 }
399 #endif
400 #define WELS_ABS_LC(a) ((sign ^ (int32_t)(a)) - sign)
401 #define NEW_QUANT(pDct, ff, mf) (((ff)+ WELS_ABS_LC(pDct))*(mf)) >>16
402 #define WELS_NEW_QUANT(pDct,ff,mf) WELS_ABS_LC(NEW_QUANT(pDct, ff, mf))
403 namespace {
WelsQuant4x4MaxAnchor(int16_t * pDct,int16_t * ff,int16_t * mf)404 int16_t WelsQuant4x4MaxAnchor (int16_t* pDct, int16_t* ff, int16_t* mf) {
405   int16_t max_abs = 0;
406   for (int i = 0; i < 16; i++) {
407     const int j = i & 0x07;
408     const int32_t sign = WELS_SIGN (pDct[i]);
409     pDct[i] = NEW_QUANT (pDct[i], ff[j], mf[j]);
410     max_abs = std::max(max_abs, pDct[i]);
411     pDct[i] = WELS_ABS_LC (pDct[i]);
412   }
413   return max_abs;
414 }
WelsQuant4x4DcAnchor(int16_t * pDct,int16_t iFF,int16_t iMF)415 void WelsQuant4x4DcAnchor (int16_t* pDct, int16_t iFF, int16_t iMF) {
416   for (int i = 0; i < 16; i++) {
417     const int32_t sign = WELS_SIGN (pDct[i]);
418     pDct[i] = WELS_NEW_QUANT (pDct[i], iFF, iMF);
419   }
420 }
WelsQuantFour4x4Anchor(int16_t * pDct,int16_t * ff,int16_t * mf)421 void WelsQuantFour4x4Anchor (int16_t* pDct, int16_t* ff,  int16_t* mf) {
422   for (int i = 0; i < 4; i++)
423     WelsQuant4x4MaxAnchor (pDct + 16 * i, ff, mf);
424 }
WelsQuantFour4x4MaxAnchor(int16_t * pDct,int16_t * ff,int16_t * mf,int16_t * max)425 void WelsQuantFour4x4MaxAnchor (int16_t* pDct, int16_t* ff,  int16_t* mf, int16_t* max) {
426   for (int i = 0; i < 4; i++)
427     max[i] = WelsQuant4x4MaxAnchor (pDct + 16 * i, ff, mf);
428 }
TestWelsQuant4x4(PQuantizationFunc func)429 void TestWelsQuant4x4 (PQuantizationFunc func) {
430   const std::size_t f_size = 8;
431   const std::size_t dct_size = 16;
432   CMemoryAlign cMemoryAlign (0);
433   ALLOC_MEMORY (int16_t, ff, f_size);
434   ALLOC_MEMORY (int16_t, mf, f_size);
435   ALLOC_MEMORY (int16_t, iDctC, dct_size);
436   ALLOC_MEMORY (int16_t, iDctS, dct_size);
437   for (std::size_t i = 0; i < f_size; i++) {
438     ff[i] = rand() & 32767;
439     mf[i] = rand() & 32767;
440   }
441   for (std::size_t i = 0; i < dct_size; i++)
442     iDctC[i] = iDctS[i] = (rand() & 65535) - 32768;
443   WelsQuant4x4MaxAnchor (iDctC, ff, mf);
444   func (iDctS, ff, mf);
445   for (std::size_t i = 0; i < dct_size; i++)
446     EXPECT_EQ (iDctC[i], iDctS[i]);
447   FREE_MEMORY (ff);
448   FREE_MEMORY (mf);
449   FREE_MEMORY (iDctC);
450   FREE_MEMORY (iDctS);
451 }
TestWelsQuant4x4Dc(PQuantizationDcFunc func)452 void TestWelsQuant4x4Dc (PQuantizationDcFunc func) {
453   const std::size_t dct_size = 16;
454   const int16_t ff = rand() & 32767;
455   const int16_t mf = rand() & 32767;
456   CMemoryAlign cMemoryAlign (0);
457   ALLOC_MEMORY (int16_t, iDctC, dct_size);
458   ALLOC_MEMORY (int16_t, iDctS, dct_size);
459   for (std::size_t i = 0; i < dct_size; i++)
460     iDctC[i] = iDctS[i] = (rand() & 65535) - 32768;
461   WelsQuant4x4DcAnchor (iDctC, ff, mf);
462   func (iDctS, ff, mf);
463   for (std::size_t i = 0; i < dct_size; i++)
464     EXPECT_EQ (iDctC[i], iDctS[i]);
465   FREE_MEMORY (iDctC);
466   FREE_MEMORY (iDctS);
467 }
TestWelsQuantFour4x4(PQuantizationFunc func)468 void TestWelsQuantFour4x4 (PQuantizationFunc func) {
469   const std::size_t f_size = 8;
470   const std::size_t dct_size = 4 * 16;
471   CMemoryAlign cMemoryAlign (0);
472   ALLOC_MEMORY (int16_t, ff, f_size);
473   ALLOC_MEMORY (int16_t, mf, f_size);
474   ALLOC_MEMORY (int16_t, iDctC, dct_size);
475   ALLOC_MEMORY (int16_t, iDctS, dct_size);
476   for (std::size_t i = 0; i < f_size; i++) {
477     ff[i] = rand() & 32767;
478     mf[i] = rand() & 32767;
479   }
480   for (std::size_t i = 0; i < dct_size; i++)
481     iDctC[i] = iDctS[i] = (rand() & 65535) - 32768;
482   WelsQuantFour4x4Anchor (iDctC, ff, mf);
483   func (iDctS, ff, mf);
484   for (std::size_t i = 0; i < dct_size; i++)
485     EXPECT_EQ (iDctC[i], iDctS[i]);
486   FREE_MEMORY (ff);
487   FREE_MEMORY (mf);
488   FREE_MEMORY (iDctC);
489   FREE_MEMORY (iDctS);
490 }
TestWelsQuantFour4x4Max(PQuantizationMaxFunc func)491 void TestWelsQuantFour4x4Max (PQuantizationMaxFunc func) {
492   CMemoryAlign cMemoryAlign (0);
493   ALLOC_MEMORY (int16_t, ff, 8);
494   ALLOC_MEMORY (int16_t, mf, 8);
495   ALLOC_MEMORY (int16_t, iDctC, 64);
496   ALLOC_MEMORY (int16_t, iDctS, 64);
497   ALLOC_MEMORY (int16_t, iMaxC, 16);
498   ALLOC_MEMORY (int16_t, iMaxS, 16);
499   for (int i = 0; i < 8; i++) {
500     ff[i] = rand() & 32767;
501     mf[i] = rand() & 32767;
502   }
503   for (int i = 0; i < 64; i++)
504     iDctC[i] = iDctS[i] = (rand() & 65535) - 32767;
505   WelsQuantFour4x4MaxAnchor (iDctC, ff, mf, iMaxC);
506   func (iDctS, ff, mf, iMaxS);
507   for (int i = 0; i < 64; i++)
508     EXPECT_EQ (iDctC[i], iDctS[i]);
509   for (int i = 0; i < 4; i++)
510     EXPECT_EQ (iMaxC[i], iMaxS[i]);
511   FREE_MEMORY (ff);
512   FREE_MEMORY (mf);
513   FREE_MEMORY (iDctC);
514   FREE_MEMORY (iDctS);
515   FREE_MEMORY (iMaxC);
516   FREE_MEMORY (iMaxS);
517 }
518 } // anon ns
TEST(EncodeMbAuxTest,WelsQuant4x4_c)519 TEST (EncodeMbAuxTest, WelsQuant4x4_c) {
520   TestWelsQuant4x4 (WelsQuant4x4_c);
521 }
TEST(EncodeMbAuxTest,WelsQuant4x4Dc_c)522 TEST (EncodeMbAuxTest, WelsQuant4x4Dc_c) {
523   TestWelsQuant4x4Dc (WelsQuant4x4Dc_c);
524 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4_c)525 TEST (EncodeMbAuxTest, WelsQuantFour4x4_c) {
526   TestWelsQuantFour4x4 (WelsQuantFour4x4_c);
527 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4Max_c)528 TEST (EncodeMbAuxTest, WelsQuantFour4x4Max_c) {
529   TestWelsQuantFour4x4Max (WelsQuantFour4x4Max_c);
530 }
531 #ifdef X86_ASM
TEST(EncodeMbAuxTest,WelsQuant4x4_sse2)532 TEST (EncodeMbAuxTest, WelsQuant4x4_sse2) {
533   TestWelsQuant4x4 (WelsQuant4x4_sse2);
534 }
TEST(EncodeMbAuxTest,WelsQuant4x4Dc_sse2)535 TEST (EncodeMbAuxTest, WelsQuant4x4Dc_sse2) {
536   TestWelsQuant4x4Dc (WelsQuant4x4Dc_sse2);
537 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4_sse2)538 TEST (EncodeMbAuxTest, WelsQuantFour4x4_sse2) {
539   TestWelsQuantFour4x4 (WelsQuantFour4x4_sse2);
540 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4Max_sse2)541 TEST (EncodeMbAuxTest, WelsQuantFour4x4Max_sse2) {
542   TestWelsQuantFour4x4Max (WelsQuantFour4x4Max_sse2);
543 }
544 #ifdef HAVE_AVX2
TEST(EncodeMbAuxTest,WelsQuant4x4_avx2)545 TEST (EncodeMbAuxTest, WelsQuant4x4_avx2) {
546   if (WelsCPUFeatureDetect (0) & WELS_CPU_AVX2)
547     TestWelsQuant4x4 (WelsQuant4x4_avx2);
548 }
TEST(EncodeMbAuxTest,WelsQuant4x4Dc_avx2)549 TEST (EncodeMbAuxTest, WelsQuant4x4Dc_avx2) {
550   if (WelsCPUFeatureDetect (0) & WELS_CPU_AVX2)
551     TestWelsQuant4x4Dc (WelsQuant4x4Dc_avx2);
552 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4_avx2)553 TEST (EncodeMbAuxTest, WelsQuantFour4x4_avx2) {
554   if (WelsCPUFeatureDetect (0) & WELS_CPU_AVX2)
555     TestWelsQuantFour4x4 (WelsQuantFour4x4_avx2);
556 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4Max_avx2)557 TEST (EncodeMbAuxTest, WelsQuantFour4x4Max_avx2) {
558   if (WelsCPUFeatureDetect (0) & WELS_CPU_AVX2)
559     TestWelsQuantFour4x4Max (WelsQuantFour4x4Max_avx2);
560 }
561 #endif //HAVE_AVX2
562 #endif
563 #ifdef HAVE_MMI
TEST(EncodeMbAuxTest,WelsQuant4x4_mmi)564 TEST (EncodeMbAuxTest, WelsQuant4x4_mmi) {
565   if (WelsCPUFeatureDetect (0) & WELS_CPU_MMI)
566     TestWelsQuant4x4 (WelsQuant4x4_mmi);
567 }
TEST(EncodeMbAuxTest,WelsQuant4x4Dc_mmi)568 TEST (EncodeMbAuxTest, WelsQuant4x4Dc_mmi) {
569   if (WelsCPUFeatureDetect (0) & WELS_CPU_MMI)
570     TestWelsQuant4x4Dc (WelsQuant4x4Dc_mmi);
571 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4_mmi)572 TEST (EncodeMbAuxTest, WelsQuantFour4x4_mmi) {
573   if (WelsCPUFeatureDetect (0) & WELS_CPU_MMI)
574     TestWelsQuantFour4x4 (WelsQuantFour4x4_mmi);
575 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4Max_mmi)576 TEST (EncodeMbAuxTest, WelsQuantFour4x4Max_mmi) {
577   if (WelsCPUFeatureDetect (0) & WELS_CPU_MMI)
578     TestWelsQuantFour4x4Max (WelsQuantFour4x4Max_mmi);
579 }
580 #endif //HAVE_MMI
581 
582 #ifdef HAVE_LSX
TEST(EncodeMbAuxTest,WelsQuantFour4x4_lsx)583 TEST (EncodeMbAuxTest, WelsQuantFour4x4_lsx) {
584   if (WelsCPUFeatureDetect (0) & WELS_CPU_LSX)
585     TestWelsQuantFour4x4 (WelsQuantFour4x4_lsx);
586 }
TEST(EncodeMbAuxTest,WelsQuantFour4x4Max_lsx)587 TEST (EncodeMbAuxTest, WelsQuantFour4x4Max_lsx) {
588   if (WelsCPUFeatureDetect (0) & WELS_CPU_LSX)
589     TestWelsQuantFour4x4Max (WelsQuantFour4x4Max_lsx);
590 }
591 #endif //HAVE_LSX
592 
WelsHadamardQuant2x2SkipAnchor(int16_t * rs,int16_t ff,int16_t mf)593 int32_t WelsHadamardQuant2x2SkipAnchor (int16_t* rs, int16_t ff,  int16_t mf) {
594   int16_t pDct[4], s[4];
595   int16_t threshold = ((1 << 16) - 1) / mf - ff;
596   s[0] = rs[0]  + rs[32];
597   s[1] = rs[0]  - rs[32];
598   s[2] = rs[16] + rs[48];
599   s[3] = rs[16] - rs[48];
600   pDct[0] = s[0] + s[2];
601   pDct[1] = s[0] - s[2];
602   pDct[2] = s[1] + s[3];
603   pDct[3] = s[1] - s[3];
604   return ((WELS_ABS (pDct[0]) > threshold) || (WELS_ABS (pDct[1]) > threshold) || (WELS_ABS (pDct[2]) > threshold)
605           || (WELS_ABS (pDct[3]) > threshold));
606 }
607 
TEST(EncodeMbAuxTest,WelsHadamardQuant2x2Skip_c)608 TEST (EncodeMbAuxTest, WelsHadamardQuant2x2Skip_c) {
609   int16_t iRS[64];
610   int16_t ff, mf;
611   for (int i = 0; i < 64; i++)
612     iRS[i] = (rand() & 32767) - 16384;
613   ff = rand() & 32767;
614   mf = rand() & 32767;
615   EXPECT_EQ (WelsHadamardQuant2x2Skip_c (iRS, ff, mf), WelsHadamardQuant2x2SkipAnchor (iRS, ff, mf));
616 }
617 
WelsHadamardQuant2x2Anchor(int16_t * rs,const int16_t ff,int16_t mf,int16_t * pDct,int16_t * block)618 int32_t WelsHadamardQuant2x2Anchor (int16_t* rs, const int16_t ff, int16_t mf, int16_t* pDct, int16_t* block) {
619   int16_t s[4];
620   int32_t sign, i, dc_nzc = 0;
621 
622   s[0] = rs[0]  + rs[32];
623   s[1] = rs[0]  - rs[32];
624   s[2] = rs[16] + rs[48];
625   s[3] = rs[16] - rs[48];
626 
627   rs[0] = 0;
628   rs[16] = 0;
629   rs[32] = 0;
630   rs[48] = 0;
631 
632   pDct[0] = s[0] + s[2];
633   pDct[1] = s[0] - s[2];
634   pDct[2] = s[1] + s[3];
635   pDct[3] = s[1] - s[3];
636 
637   sign = WELS_SIGN (pDct[0]);
638   pDct[0] = WELS_NEW_QUANT (pDct[0], ff, mf);
639   sign = WELS_SIGN (pDct[1]);
640   pDct[1] = WELS_NEW_QUANT (pDct[1], ff, mf);
641   sign = WELS_SIGN (pDct[2]);
642   pDct[2] = WELS_NEW_QUANT (pDct[2], ff, mf);
643   sign = WELS_SIGN (pDct[3]);
644   pDct[3] = WELS_NEW_QUANT (pDct[3], ff, mf);
645   ST64 (block, LD64 (pDct));
646   for (i = 0; i < 4; i++)
647     dc_nzc += (block[i] != 0);
648   return dc_nzc;
649 }
650 
TEST(EncodeMbAuxTest,WelsHadamardQuant2x2_c)651 TEST (EncodeMbAuxTest, WelsHadamardQuant2x2_c) {
652   int16_t iRsC[64], iRsA[64];
653   int16_t ff, mf;
654   int16_t iBlockA[16], iBlockC[16], iDctA[4], iDctC[4];
655   for (int i = 0; i < 64; i++)
656     iRsA[i] = iRsC[i] = (rand() & 32767) - 16384;
657   for (int i = 0; i < 4; i++)
658     iDctA[i] = iDctC[i] = (rand() & 32767) - 16384;
659   ff = rand() & 32767;
660   mf = rand() & 32767;
661 
662   int32_t iRetA = WelsHadamardQuant2x2Anchor (iRsA, ff, mf, iDctA, iBlockA);
663   int32_t iRetC = WelsHadamardQuant2x2_c (iRsC, ff, mf, iDctC,    iBlockC);
664   EXPECT_EQ (iRetA, iRetC);
665   for (int i = 0; i < 4; i++)
666     EXPECT_EQ (iDctA[i], iDctC[i]);
667 }
668 
WelsHadamardT4DcAnchor(int16_t * pLumaDc,int16_t * pDct)669 void WelsHadamardT4DcAnchor (int16_t* pLumaDc, int16_t* pDct) {
670   int32_t p[16], s[4];
671   int32_t i, iIdx;
672   for (i = 0 ; i < 16 ; i += 4) {
673     iIdx = ((i & 0x08) << 4) + ((i & 0x04) << 3);
674     s[0] = pDct[iIdx ]     + pDct[iIdx + 80];
675     s[3] = pDct[iIdx ]     - pDct[iIdx + 80];
676     s[1] = pDct[iIdx + 16] + pDct[iIdx + 64];
677     s[2] = pDct[iIdx + 16] - pDct[iIdx + 64];
678     p[i  ] = s[0] + s[1];
679     p[i + 2] = s[0] - s[1];
680     p[i + 1] = s[3] + s[2];
681     p[i + 3] = s[3] - s[2];
682   }
683   for (i = 0 ; i < 4 ; i ++) {
684     s[0] = p[i ]  + p[i + 12];
685     s[3] = p[i ]  - p[i + 12];
686     s[1] = p[i + 4] + p[i + 8];
687     s[2] = p[i + 4] - p[i + 8];
688     pLumaDc[i  ]  = WELS_CLIP3 ((s[0] + s[1] + 1) >> 1, -32768, 32767);
689     pLumaDc[i + 8 ] = WELS_CLIP3 ((s[0] - s[1] + 1) >> 1, -32768, 32767);
690     pLumaDc[i + 4 ] = WELS_CLIP3 ((s[3] + s[2] + 1) >> 1, -32768, 32767);
691     pLumaDc[i + 12] = WELS_CLIP3 ((s[3] - s[2] + 1) >> 1, -32768, 32767);
692   }
693 }
TEST(EncodeMbAuxTest,WelsHadamardT4Dc_c)694 TEST (EncodeMbAuxTest, WelsHadamardT4Dc_c) {
695   CMemoryAlign cMemoryAlign (0);
696   ALLOC_MEMORY (int16_t, iDct, 128 * 16);
697   ALLOC_MEMORY (int16_t, iLumaDcR, 16);
698   ALLOC_MEMORY (int16_t, iLumaDcC, 16);
699   for (int i = 0; i < 128 * 16; i++)
700     iDct[i] = (rand() & 32767) - 16384;
701   WelsHadamardT4DcAnchor (iLumaDcR, iDct);
702   WelsHadamardT4Dc_c (iLumaDcC, iDct);
703   for (int i = 0; i < 16; i++)
704     EXPECT_EQ (iLumaDcR[i], iLumaDcC[i]);
705   FREE_MEMORY (iDct);
706   FREE_MEMORY (iLumaDcR);
707   FREE_MEMORY (iLumaDcC);
708 }
709 #ifdef X86_ASM
TEST(EncodeMbAuxTest,WelsHadamardT4Dc_sse2)710 TEST (EncodeMbAuxTest, WelsHadamardT4Dc_sse2) {
711   CMemoryAlign cMemoryAlign (0);
712   ALLOC_MEMORY (int16_t, iDct, 128 * 16);
713   ALLOC_MEMORY (int16_t, iLumaDcC, 16);
714   ALLOC_MEMORY (int16_t, iLumaDcS, 16);
715   for (int i = 0; i < 128 * 16; i++)
716     iDct[i] = (rand() & 32767) - 16384;
717   WelsHadamardT4Dc_c (iLumaDcC, iDct);
718   WelsHadamardT4Dc_sse2 (iLumaDcS, iDct);
719   for (int i = 0; i < 16; i++)
720     EXPECT_EQ (iLumaDcC[i], iLumaDcS[i]);
721   FREE_MEMORY (iDct);
722   FREE_MEMORY (iLumaDcC);
723   FREE_MEMORY (iLumaDcS);
724 }
725 #endif
726 #ifdef HAVE_MMI
TEST(EncodeMbAuxTest,WelsHadamardT4Dc_mmi)727 TEST (EncodeMbAuxTest, WelsHadamardT4Dc_mmi) {
728   CMemoryAlign cMemoryAlign (0);
729   ALLOC_MEMORY (int16_t, iDct, 128 * 16);
730   ALLOC_MEMORY (int16_t, iLumaDcC, 16);
731   ALLOC_MEMORY (int16_t, iLumaDcS, 16);
732   for (int i = 0; i < 128 * 16; i++)
733     iDct[i] = (rand() & 32767) - 16384;
734   WelsHadamardT4Dc_c (iLumaDcC, iDct);
735   WelsHadamardT4Dc_mmi (iLumaDcS, iDct);
736   for (int i = 0; i < 16; i++)
737     EXPECT_EQ (iLumaDcC[i], iLumaDcS[i]);
738   FREE_MEMORY (iDct);
739   FREE_MEMORY (iLumaDcC);
740   FREE_MEMORY (iLumaDcS);
741 }
742 #endif
743