1 #include <gtest/gtest.h>
2 #include "macros.h"
3 #include "decode_mb_aux.h"
4 #include "deblocking.h"
5 #include "cpu.h"
6 using namespace WelsDec;
7
8 namespace {
9
IdctResAddPred_ref(uint8_t * pPred,const int32_t kiStride,int16_t * pRs)10 void IdctResAddPred_ref (uint8_t* pPred, const int32_t kiStride, int16_t* pRs) {
11 int16_t iSrc[16];
12
13 uint8_t* pDst = pPred;
14 const int32_t kiStride2 = kiStride << 1;
15 const int32_t kiStride3 = kiStride + kiStride2;
16 int32_t i;
17
18 for (i = 0; i < 4; i++) {
19 const int32_t kiY = i << 2;
20 const int32_t kiT0 = pRs[kiY] + pRs[kiY + 2];
21 const int32_t kiT1 = pRs[kiY] - pRs[kiY + 2];
22 const int32_t kiT2 = (pRs[kiY + 1] >> 1) - pRs[kiY + 3];
23 const int32_t kiT3 = pRs[kiY + 1] + (pRs[kiY + 3] >> 1);
24
25 iSrc[kiY] = kiT0 + kiT3;
26 iSrc[kiY + 1] = kiT1 + kiT2;
27 iSrc[kiY + 2] = kiT1 - kiT2;
28 iSrc[kiY + 3] = kiT0 - kiT3;
29 }
30
31 for (i = 0; i < 4; i++) {
32 int32_t kT1 = iSrc[i] + iSrc[i + 8];
33 int32_t kT2 = iSrc[i + 4] + (iSrc[i + 12] >> 1);
34 int32_t kT3 = (32 + kT1 + kT2) >> 6;
35 int32_t kT4 = (32 + kT1 - kT2) >> 6;
36
37 pDst[i] = WelsClip1 (kT3 + pPred[i]);
38 pDst[i + kiStride3] = WelsClip1 (kT4 + pPred[i + kiStride3]);
39
40 kT1 = iSrc[i] - iSrc[i + 8];
41 kT2 = (iSrc[i + 4] >> 1) - iSrc[i + 12];
42 pDst[i + kiStride] = WelsClip1 (((32 + kT1 + kT2) >> 6) + pDst[i + kiStride]);
43 pDst[i + kiStride2] = WelsClip1 (((32 + kT1 - kT2) >> 6) + pDst[i + kiStride2]);
44 }
45 }
46
SetNonZeroCount_ref(int8_t * pNonZeroCount)47 void SetNonZeroCount_ref (int8_t* pNonZeroCount) {
48 int32_t i;
49
50 for (i = 0; i < 24; i++) {
51 pNonZeroCount[i] = !!pNonZeroCount[i];
52 }
53 }
54
55 #if defined(X86_ASM)
56 #if defined(HAVE_AVX2)
IdctFourResAddPred_ref(uint8_t * pPred,int32_t iStride,int16_t * pRs)57 void IdctFourResAddPred_ref (uint8_t* pPred, int32_t iStride, int16_t* pRs) {
58 IdctResAddPred_ref (pPred + 0 * iStride + 0, iStride, pRs + 0 * 16);
59 IdctResAddPred_ref (pPred + 0 * iStride + 4, iStride, pRs + 1 * 16);
60 IdctResAddPred_ref (pPred + 4 * iStride + 0, iStride, pRs + 2 * 16);
61 IdctResAddPred_ref (pPred + 4 * iStride + 4, iStride, pRs + 3 * 16);
62 }
63 #endif
64 #endif
65
66 } // anon ns
67
68 #define GENERATE_IDCTRESADDPRED(pred, flag) \
69 TEST(DecoderDecodeMbAux, pred) {\
70 const int32_t kiStride = 32;\
71 const int iBits = 12;\
72 const int iMask = (1 << iBits) - 1;\
73 const int iOffset = 1 << (iBits - 1);\
74 ENFORCE_STACK_ALIGN_1D (int16_t, iRS, 16, 16);\
75 ENFORCE_STACK_ALIGN_1D (uint8_t, uiPred, 16 * kiStride, 16);\
76 int16_t iRefRS[16];\
77 uint8_t uiRefPred[16*kiStride];\
78 int32_t iRunTimes = 1000;\
79 uint32_t uiCPUFlags = WelsCPUFeatureDetect(NULL); \
80 if ((uiCPUFlags & flag) == 0 && flag != 0) \
81 return; \
82 while(iRunTimes--) {\
83 for(int i = 0; i < 4; i++)\
84 for(int j = 0; j < 4; j++)\
85 iRefRS[i*4+j] = iRS[i*4+j] = (rand() & iMask) - iOffset;\
86 for(int i = 0; i < 4; i++)\
87 for(int j = 0; j < 4; j++)\
88 uiRefPred[i * kiStride + j] = uiPred[i * kiStride + j] = rand() & 255;\
89 pred(uiPred, kiStride, iRS);\
90 IdctResAddPred_ref(uiRefPred, kiStride, iRefRS);\
91 bool ok = true;\
92 for(int i = 0; i < 4; i++)\
93 for(int j = 0; j < 4; j++)\
94 if (uiRefPred[i * kiStride + j] != uiPred[i * kiStride + j]) {\
95 ok = false;\
96 goto next;\
97 }\
98 next:\
99 EXPECT_EQ(ok, true);\
100 }\
101 }
102
103 #define GENERATE_IDCTFOURRESADDPRED(pred, flag) \
104 TEST(DecoderDecodeMbAux, pred) {\
105 const int32_t kiStride = 32;\
106 const int iBits = 12;\
107 const int iMask = (1 << iBits) - 1;\
108 const int iOffset = 1 << (iBits - 1);\
109 ENFORCE_STACK_ALIGN_1D (int16_t, iRS, 4 * 16, 16);\
110 ENFORCE_STACK_ALIGN_1D (uint8_t, uiPred, 4 * 16 * kiStride, 16);\
111 int16_t iRefRS[4 * 16];\
112 uint8_t uiRefPred[4 * 16 * kiStride];\
113 int8_t iNzc[6] = { 0 };\
114 int32_t iRunTimes = 1000;\
115 uint32_t uiCPUFlags = WelsCPUFeatureDetect(0); \
116 if ((uiCPUFlags & flag) == 0 && flag != 0) \
117 return; \
118 while (iRunTimes--) {\
119 for (int i = 0; i < 4; i++)\
120 for (int j = 0; j < 16; j++)\
121 iNzc[i / 2 * 4 + i % 2] += !!(iRefRS[16 * i + j] = iRS[16 * i + j] = (rand() & iMask) - iOffset);\
122 for (int i = 0; i < 8; i++)\
123 for (int j = 0; j < 8; j++)\
124 uiRefPred[i * kiStride + j] = uiPred[i * kiStride + j] = rand() & 255;\
125 pred (uiPred, kiStride, iRS, iNzc);\
126 IdctFourResAddPred_ref (uiRefPred, kiStride, iRefRS);\
127 bool ok = true;\
128 for (int i = 0; i < 8; i++)\
129 for (int j = 0; j < 8; j++)\
130 if (uiRefPred[i * kiStride + j] != uiPred[i * kiStride + j]) {\
131 ok = false;\
132 goto next;\
133 }\
134 next:\
135 EXPECT_EQ(ok, true);\
136 }\
137 }
138
139 GENERATE_IDCTRESADDPRED (IdctResAddPred_c, 0)
140 #if defined(X86_ASM)
141 GENERATE_IDCTRESADDPRED (IdctResAddPred_mmx, WELS_CPU_MMXEXT)
142 GENERATE_IDCTRESADDPRED (IdctResAddPred_sse2, WELS_CPU_SSE2)
143 #if defined(HAVE_AVX2)
144 GENERATE_IDCTRESADDPRED (IdctResAddPred_avx2, WELS_CPU_AVX2)
145 GENERATE_IDCTFOURRESADDPRED (IdctFourResAddPred_avx2, WELS_CPU_AVX2)
146 #endif
147 #endif
148
149 #if defined(HAVE_NEON)
150 GENERATE_IDCTRESADDPRED (IdctResAddPred_neon, WELS_CPU_NEON)
151 #endif
152
153 #if defined(HAVE_NEON_AARCH64)
154 GENERATE_IDCTRESADDPRED (IdctResAddPred_AArch64_neon, WELS_CPU_NEON)
155 #endif
156
157 #if defined(HAVE_MMI)
158 GENERATE_IDCTRESADDPRED (IdctResAddPred_mmi, WELS_CPU_MMI)
159 #endif
160
161 #define GENERATE_SETNONZEROCOUNT(method, flag) \
162 TEST(DecoderDecodeMbAux, method) \
163 {\
164 uint32_t uiCPUFlags = WelsCPUFeatureDetect(NULL); \
165 if ((uiCPUFlags & flag) == 0 && flag != 0) \
166 return; \
167 int8_t iNonZeroCount[2][24];\
168 for(int32_t i = 0; i < 24; i++) {\
169 iNonZeroCount[0][i] = iNonZeroCount[1][i] = (rand() % 25);\
170 }\
171 method(iNonZeroCount[0]);\
172 SetNonZeroCount_ref(iNonZeroCount[1]);\
173 for(int32_t i =0; i<24; i++) {\
174 ASSERT_EQ (iNonZeroCount[0][i], iNonZeroCount[1][i]);\
175 }\
176 for(int32_t i =0; i<24; i++) {\
177 iNonZeroCount[0][i] = iNonZeroCount[1][i] = 0;\
178 }\
179 method(iNonZeroCount[0]);\
180 SetNonZeroCount_ref(iNonZeroCount[1]);\
181 for(int32_t i =0; i<24; i++) {\
182 ASSERT_EQ (iNonZeroCount[0][i], iNonZeroCount[1][i]);\
183 }\
184 for(int32_t i =0; i<24; i++) {\
185 iNonZeroCount[0][i] = iNonZeroCount[1][i] = 16;\
186 }\
187 method(iNonZeroCount[0]);\
188 SetNonZeroCount_ref(iNonZeroCount[1]);\
189 for(int32_t i =0; i<24; i++) {\
190 ASSERT_EQ (iNonZeroCount[0][i], iNonZeroCount[1][i]);\
191 }\
192 }
193
194 GENERATE_SETNONZEROCOUNT (WelsNonZeroCount_c, 0)
195
196 #if defined(X86_ASM)
197 GENERATE_SETNONZEROCOUNT (WelsNonZeroCount_sse2, WELS_CPU_SSE2)
198 #endif
199
200 #if defined(HAVE_NEON)
201 GENERATE_SETNONZEROCOUNT (WelsNonZeroCount_neon, WELS_CPU_NEON)
202 #endif
203
204 #if defined(HAVE_NEON_AARCH64)
205 GENERATE_SETNONZEROCOUNT (WelsNonZeroCount_AArch64_neon, WELS_CPU_NEON)
206 #endif
207
208 #if defined(HAVE_MSA)
209 GENERATE_SETNONZEROCOUNT (WelsNonZeroCount_msa, WELS_CPU_MSA)
210 #endif
211