• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <gtest/gtest.h>
2 #include "macros.h"
3 #include "decode_mb_aux.h"
4 #include "deblocking.h"
5 #include "cpu.h"
6 using namespace WelsDec;
7 
8 namespace {
9 
IdctResAddPred_ref(uint8_t * pPred,const int32_t kiStride,int16_t * pRs)10 void IdctResAddPred_ref (uint8_t* pPred, const int32_t kiStride, int16_t* pRs) {
11   int16_t iSrc[16];
12 
13   uint8_t* pDst             = pPred;
14   const int32_t kiStride2   = kiStride << 1;
15   const int32_t kiStride3   = kiStride + kiStride2;
16   int32_t i;
17 
18   for (i = 0; i < 4; i++) {
19     const int32_t kiY  = i << 2;
20     const int32_t kiT0 = pRs[kiY] + pRs[kiY + 2];
21     const int32_t kiT1 = pRs[kiY] - pRs[kiY + 2];
22     const int32_t kiT2 = (pRs[kiY + 1] >> 1) - pRs[kiY + 3];
23     const int32_t kiT3 = pRs[kiY + 1] + (pRs[kiY + 3] >> 1);
24 
25     iSrc[kiY] = kiT0 + kiT3;
26     iSrc[kiY + 1] = kiT1 + kiT2;
27     iSrc[kiY + 2] = kiT1 - kiT2;
28     iSrc[kiY + 3] = kiT0 - kiT3;
29   }
30 
31   for (i = 0; i < 4; i++) {
32     int32_t kT1 = iSrc[i]     +  iSrc[i + 8];
33     int32_t kT2 = iSrc[i + 4] + (iSrc[i + 12] >> 1);
34     int32_t kT3 = (32 + kT1 + kT2) >> 6;
35     int32_t kT4 = (32 + kT1 - kT2) >> 6;
36 
37     pDst[i] = WelsClip1 (kT3 + pPred[i]);
38     pDst[i + kiStride3] = WelsClip1 (kT4 + pPred[i + kiStride3]);
39 
40     kT1 =  iSrc[i]           - iSrc[i + 8];
41     kT2 = (iSrc[i + 4] >> 1) - iSrc[i + 12];
42     pDst[i + kiStride] = WelsClip1 (((32 + kT1 + kT2) >> 6) + pDst[i + kiStride]);
43     pDst[i + kiStride2] = WelsClip1 (((32 + kT1 - kT2) >> 6) + pDst[i + kiStride2]);
44   }
45 }
46 
SetNonZeroCount_ref(int8_t * pNonZeroCount)47 void SetNonZeroCount_ref (int8_t* pNonZeroCount) {
48   int32_t i;
49 
50   for (i = 0; i < 24; i++) {
51     pNonZeroCount[i] = !!pNonZeroCount[i];
52   }
53 }
54 
55 #if defined(X86_ASM)
56 #if defined(HAVE_AVX2)
IdctFourResAddPred_ref(uint8_t * pPred,int32_t iStride,int16_t * pRs)57 void IdctFourResAddPred_ref (uint8_t* pPred, int32_t iStride, int16_t* pRs) {
58   IdctResAddPred_ref (pPred + 0 * iStride + 0, iStride, pRs + 0 * 16);
59   IdctResAddPred_ref (pPred + 0 * iStride + 4, iStride, pRs + 1 * 16);
60   IdctResAddPred_ref (pPred + 4 * iStride + 0, iStride, pRs + 2 * 16);
61   IdctResAddPred_ref (pPred + 4 * iStride + 4, iStride, pRs + 3 * 16);
62 }
63 #endif
64 #endif
65 
66 } // anon ns
67 
68 #define GENERATE_IDCTRESADDPRED(pred, flag) \
69 TEST(DecoderDecodeMbAux, pred) {\
70   const int32_t kiStride = 32;\
71   const int iBits = 12;\
72   const int iMask = (1 << iBits) - 1;\
73   const int iOffset = 1 << (iBits - 1);\
74   ENFORCE_STACK_ALIGN_1D (int16_t, iRS, 16, 16);\
75   ENFORCE_STACK_ALIGN_1D (uint8_t, uiPred, 16 * kiStride, 16);\
76   int16_t iRefRS[16];\
77   uint8_t uiRefPred[16*kiStride];\
78   int32_t iRunTimes = 1000;\
79   uint32_t uiCPUFlags = WelsCPUFeatureDetect(NULL); \
80   if ((uiCPUFlags & flag) == 0 && flag != 0) \
81     return; \
82   while(iRunTimes--) {\
83     for(int i = 0; i < 4; i++)\
84       for(int j = 0; j < 4; j++)\
85         iRefRS[i*4+j] = iRS[i*4+j] = (rand() & iMask) - iOffset;\
86     for(int i = 0; i < 4; i++)\
87       for(int j = 0; j < 4; j++)\
88         uiRefPred[i * kiStride + j] = uiPred[i * kiStride + j] = rand() & 255;\
89     pred(uiPred, kiStride, iRS);\
90     IdctResAddPred_ref(uiRefPred, kiStride, iRefRS);\
91     bool ok = true;\
92     for(int i = 0; i < 4; i++)\
93       for(int j = 0; j < 4; j++)\
94         if (uiRefPred[i * kiStride + j] != uiPred[i * kiStride + j]) {\
95           ok = false;\
96           goto next;\
97         }\
98     next:\
99     EXPECT_EQ(ok, true);\
100   }\
101 }
102 
103 #define GENERATE_IDCTFOURRESADDPRED(pred, flag) \
104 TEST(DecoderDecodeMbAux, pred) {\
105   const int32_t kiStride = 32;\
106   const int iBits = 12;\
107   const int iMask = (1 << iBits) - 1;\
108   const int iOffset = 1 << (iBits - 1);\
109   ENFORCE_STACK_ALIGN_1D (int16_t, iRS, 4 * 16, 16);\
110   ENFORCE_STACK_ALIGN_1D (uint8_t, uiPred, 4 * 16 * kiStride, 16);\
111   int16_t iRefRS[4 * 16];\
112   uint8_t uiRefPred[4 * 16 * kiStride];\
113   int8_t iNzc[6] = { 0 };\
114   int32_t iRunTimes = 1000;\
115   uint32_t uiCPUFlags = WelsCPUFeatureDetect(0); \
116   if ((uiCPUFlags & flag) == 0 && flag != 0) \
117     return; \
118   while (iRunTimes--) {\
119     for (int i = 0; i < 4; i++)\
120       for (int j = 0; j < 16; j++)\
121         iNzc[i / 2 * 4 + i % 2] += !!(iRefRS[16 * i + j] = iRS[16 * i + j] = (rand() & iMask) - iOffset);\
122     for (int i = 0; i < 8; i++)\
123       for (int j = 0; j < 8; j++)\
124         uiRefPred[i * kiStride + j] = uiPred[i * kiStride + j] = rand() & 255;\
125     pred (uiPred, kiStride, iRS, iNzc);\
126     IdctFourResAddPred_ref (uiRefPred, kiStride, iRefRS);\
127     bool ok = true;\
128     for (int i = 0; i < 8; i++)\
129       for (int j = 0; j < 8; j++)\
130         if (uiRefPred[i * kiStride + j] != uiPred[i * kiStride + j]) {\
131           ok = false;\
132           goto next;\
133         }\
134     next:\
135     EXPECT_EQ(ok, true);\
136   }\
137 }
138 
139 GENERATE_IDCTRESADDPRED (IdctResAddPred_c, 0)
140 #if defined(X86_ASM)
141 GENERATE_IDCTRESADDPRED (IdctResAddPred_mmx, WELS_CPU_MMXEXT)
142 GENERATE_IDCTRESADDPRED (IdctResAddPred_sse2, WELS_CPU_SSE2)
143 #if defined(HAVE_AVX2)
144 GENERATE_IDCTRESADDPRED (IdctResAddPred_avx2, WELS_CPU_AVX2)
145 GENERATE_IDCTFOURRESADDPRED (IdctFourResAddPred_avx2, WELS_CPU_AVX2)
146 #endif
147 #endif
148 
149 #if defined(HAVE_NEON)
150 GENERATE_IDCTRESADDPRED (IdctResAddPred_neon, WELS_CPU_NEON)
151 #endif
152 
153 #if defined(HAVE_NEON_AARCH64)
154 GENERATE_IDCTRESADDPRED (IdctResAddPred_AArch64_neon, WELS_CPU_NEON)
155 #endif
156 
157 #if defined(HAVE_MMI)
158 GENERATE_IDCTRESADDPRED (IdctResAddPred_mmi, WELS_CPU_MMI)
159 #endif
160 
161 #define GENERATE_SETNONZEROCOUNT(method, flag) \
162 TEST(DecoderDecodeMbAux, method) \
163 {\
164     uint32_t uiCPUFlags = WelsCPUFeatureDetect(NULL); \
165     if ((uiCPUFlags & flag) == 0 && flag != 0) \
166         return; \
167     int8_t iNonZeroCount[2][24];\
168     for(int32_t i = 0; i < 24; i++) {\
169         iNonZeroCount[0][i] = iNonZeroCount[1][i] = (rand() % 25);\
170     }\
171     method(iNonZeroCount[0]);\
172     SetNonZeroCount_ref(iNonZeroCount[1]);\
173     for(int32_t i =0; i<24; i++) {\
174         ASSERT_EQ (iNonZeroCount[0][i], iNonZeroCount[1][i]);\
175     }\
176     for(int32_t i =0; i<24; i++) {\
177         iNonZeroCount[0][i] = iNonZeroCount[1][i] = 0;\
178     }\
179     method(iNonZeroCount[0]);\
180     SetNonZeroCount_ref(iNonZeroCount[1]);\
181     for(int32_t i =0; i<24; i++) {\
182         ASSERT_EQ (iNonZeroCount[0][i], iNonZeroCount[1][i]);\
183     }\
184     for(int32_t i =0; i<24; i++) {\
185         iNonZeroCount[0][i] = iNonZeroCount[1][i] = 16;\
186     }\
187     method(iNonZeroCount[0]);\
188     SetNonZeroCount_ref(iNonZeroCount[1]);\
189     for(int32_t i =0; i<24; i++) {\
190         ASSERT_EQ (iNonZeroCount[0][i], iNonZeroCount[1][i]);\
191     }\
192 }
193 
194 GENERATE_SETNONZEROCOUNT (WelsNonZeroCount_c, 0)
195 
196 #if defined(X86_ASM)
197 GENERATE_SETNONZEROCOUNT (WelsNonZeroCount_sse2, WELS_CPU_SSE2)
198 #endif
199 
200 #if defined(HAVE_NEON)
201 GENERATE_SETNONZEROCOUNT (WelsNonZeroCount_neon, WELS_CPU_NEON)
202 #endif
203 
204 #if defined(HAVE_NEON_AARCH64)
205 GENERATE_SETNONZEROCOUNT (WelsNonZeroCount_AArch64_neon, WELS_CPU_NEON)
206 #endif
207 
208 #if defined(HAVE_MSA)
209 GENERATE_SETNONZEROCOUNT (WelsNonZeroCount_msa, WELS_CPU_MSA)
210 #endif
211