1 #include <gtest/gtest.h>
2 #include "decode_mb_aux.h"
3 #include "wels_common_basis.h"
4 #include "macros.h"
5 #include "cpu.h"
6
7 using namespace WelsEnc;
8
9
TEST(DecodeMbAuxTest,TestIhdm_4x4_dc)10 TEST (DecodeMbAuxTest, TestIhdm_4x4_dc) {
11 short W[16], T[16], Y[16];
12 for (int i = 0; i < 16; i++)
13 W[i] = rand() % 256 + 1;
14
15 T[0] = W[0] + W[4] + W[8] + W[12];
16 T[1] = W[1] + W[5] + W[9] + W[13];
17 T[2] = W[2] + W[6] + W[10] + W[14];
18 T[3] = W[3] + W[7] + W[11] + W[15];
19
20 T[4] = W[0] + W[4] - W[8] - W[12];
21 T[5] = W[1] + W[5] - W[9] - W[13];
22 T[6] = W[2] + W[6] - W[10] - W[14];
23 T[7] = W[3] + W[7] - W[11] - W[15];
24
25 T[8] = W[0] - W[4] - W[8] + W[12];
26 T[9] = W[1] - W[5] - W[9] + W[13];
27 T[10] = W[2] - W[6] - W[10] + W[14];
28 T[11] = W[3] - W[7] - W[11] + W[15];
29
30 T[12] = W[0] - W[4] + W[8] - W[12];
31 T[13] = W[1] - W[5] + W[9] - W[13];
32 T[14] = W[2] - W[6] + W[10] - W[14];
33 T[15] = W[3] - W[7] + W[11] - W[15];
34
35 Y[0] = T[0] + T[1] + T[2] + T[3];
36 Y[1] = T[0] + T[1] - T[2] - T[3];
37 Y[2] = T[0] - T[1] - T[2] + T[3];
38 Y[3] = T[0] - T[1] + T[2] - T[3];
39
40 Y[4] = T[4] + T[5] + T[6] + T[7];
41 Y[5] = T[4] + T[5] - T[6] - T[7];
42 Y[6] = T[4] - T[5] - T[6] + T[7];
43 Y[7] = T[4] - T[5] + T[6] - T[7];
44
45 Y[8] = T[8] + T[9] + T[10] + T[11];
46 Y[9] = T[8] + T[9] - T[10] - T[11];
47 Y[10] = T[8] - T[9] - T[10] + T[11];
48 Y[11] = T[8] - T[9] + T[10] - T[11];
49
50 Y[12] = T[12] + T[13] + T[14] + T[15];
51 Y[13] = T[12] + T[13] - T[14] - T[15];
52 Y[14] = T[12] - T[13] - T[14] + T[15];
53 Y[15] = T[12] - T[13] + T[14] - T[15];
54
55 WelsIHadamard4x4Dc (W);
56 for (int i = 0; i < 16; i++)
57 EXPECT_EQ (Y[i], W[i]);
58 }
59
TEST(DecodeMbAuxTest,TestDequant_4x4_luma_dc)60 TEST (DecodeMbAuxTest, TestDequant_4x4_luma_dc) {
61 short T[16], W[16];
62
63 for (int qp = 0; qp < 12; qp++) {
64 for (int i = 0; i < 16; i++) {
65 T[i] = rand() % 256 + 1;
66 W[i] = T[i];
67 }
68 WelsDequantLumaDc4x4 (W, qp);
69 for (int i = 0; i < 16; i++) {
70 T[i] = (((T[i] * g_kuiDequantCoeff[qp % 6][0] + (1 << (1 - qp / 6)))) >> (2 - qp / 6));
71 EXPECT_EQ (T[i], W[i]);
72 }
73 }
74 }
75
TEST(DecodeMbAuxTest,TestDequant_ihdm_4x4_c)76 TEST (DecodeMbAuxTest, TestDequant_ihdm_4x4_c) {
77 short W[16], T[16], Y[16];
78 const unsigned short mf = rand() % 16 + 1;
79 for (int i = 0; i < 16; i++)
80 W[i] = rand() % 256 + 1;
81
82 T[0] = W[0] + W[4] + W[8] + W[12];
83 T[1] = W[1] + W[5] + W[9] + W[13];
84 T[2] = W[2] + W[6] + W[10] + W[14];
85 T[3] = W[3] + W[7] + W[11] + W[15];
86
87 T[4] = W[0] + W[4] - W[8] - W[12];
88 T[5] = W[1] + W[5] - W[9] - W[13];
89 T[6] = W[2] + W[6] - W[10] - W[14];
90 T[7] = W[3] + W[7] - W[11] - W[15];
91
92 T[8] = W[0] - W[4] - W[8] + W[12];
93 T[9] = W[1] - W[5] - W[9] + W[13];
94 T[10] = W[2] - W[6] - W[10] + W[14];
95 T[11] = W[3] - W[7] - W[11] + W[15];
96
97 T[12] = W[0] - W[4] + W[8] - W[12];
98 T[13] = W[1] - W[5] + W[9] - W[13];
99 T[14] = W[2] - W[6] + W[10] - W[14];
100 T[15] = W[3] - W[7] + W[11] - W[15];
101
102 Y[0] = (T[0] + T[1] + T[2] + T[3]) * mf;
103 Y[1] = (T[0] + T[1] - T[2] - T[3]) * mf;
104 Y[2] = (T[0] - T[1] - T[2] + T[3]) * mf;
105 Y[3] = (T[0] - T[1] + T[2] - T[3]) * mf;
106
107 Y[4] = (T[4] + T[5] + T[6] + T[7]) * mf;
108 Y[5] = (T[4] + T[5] - T[6] - T[7]) * mf;
109 Y[6] = (T[4] - T[5] - T[6] + T[7]) * mf;
110 Y[7] = (T[4] - T[5] + T[6] - T[7]) * mf;
111
112 Y[8] = (T[8] + T[9] + T[10] + T[11]) * mf;
113 Y[9] = (T[8] + T[9] - T[10] - T[11]) * mf;
114 Y[10] = (T[8] - T[9] - T[10] + T[11]) * mf;
115 Y[11] = (T[8] - T[9] + T[10] - T[11]) * mf;
116
117 Y[12] = (T[12] + T[13] + T[14] + T[15]) * mf;
118 Y[13] = (T[12] + T[13] - T[14] - T[15]) * mf;
119 Y[14] = (T[12] - T[13] - T[14] + T[15]) * mf;
120 Y[15] = (T[12] - T[13] + T[14] - T[15]) * mf;
121
122 WelsDequantIHadamard4x4_c (W, mf);
123 for (int i = 0; i < 16; i++)
124 EXPECT_EQ (Y[i], W[i]);
125 }
126
TEST(DecodeMbAuxTest,TestDequant_4x4_c)127 TEST (DecodeMbAuxTest, TestDequant_4x4_c) {
128 short W[16], T[16];
129 unsigned short mf[16];
130 for (int i = 0; i < 16; i++) {
131 W[i] = rand() % 256 + 1;
132 T[i] = W[i];
133 }
134
135 for (int i = 0; i < 8; i++)
136 mf[i] = rand() % 16 + 1;
137 WelsDequant4x4_c (W, mf);
138 for (int i = 0; i < 16; i++)
139 EXPECT_EQ (T[i]*mf[i % 8], W[i]);
140 }
TEST(DecodeMbAuxTest,TestDequant_4_4x4_c)141 TEST (DecodeMbAuxTest, TestDequant_4_4x4_c) {
142 short W[64], T[64];
143 unsigned short mf[16];
144 for (int i = 0; i < 64; i++) {
145 W[i] = rand() % 256 + 1;
146 T[i] = W[i];
147 }
148 for (int i = 0; i < 8; i++)
149 mf[i] = rand() % 16 + 1;
150 WelsDequantFour4x4_c (W, mf);
151 for (int i = 0; i < 64; i++)
152 EXPECT_EQ (T[i]*mf[i % 8], W[i]);
153 }
WelsDequantHadamard2x2DcAnchor(int16_t * pDct,int16_t iMF)154 void WelsDequantHadamard2x2DcAnchor (int16_t* pDct, int16_t iMF) {
155 const int16_t iSumU = pDct[0] + pDct[2];
156 const int16_t iDelU = pDct[0] - pDct[2];
157 const int16_t iSumD = pDct[1] + pDct[3];
158 const int16_t iDelD = pDct[1] - pDct[3];
159 pDct[0] = ((iSumU + iSumD) * iMF) >> 1;
160 pDct[1] = ((iSumU - iSumD) * iMF) >> 1;
161 pDct[2] = ((iDelU + iDelD) * iMF) >> 1;
162 pDct[3] = ((iDelU - iDelD) * iMF) >> 1;
163 }
TEST(DecodeMbAuxTest,WelsDequantIHadamard2x2Dc)164 TEST (DecodeMbAuxTest, WelsDequantIHadamard2x2Dc) {
165 int16_t iDct[4], iRefDct[4];
166 int16_t iMF;
167 iMF = rand() & 127;
168 for (int i = 0; i < 4; i++)
169 iDct[i] = iRefDct[i] = (rand() & 65535) - 32768;
170 WelsDequantHadamard2x2DcAnchor (iRefDct, iMF);
171 WelsDequantIHadamard2x2Dc (iDct, iMF);
172 bool ok = true;
173 for (int i = 0; i < 4; i++) {
174 if (iDct[i] != iRefDct[i]) {
175 ok = false;
176 break;
177 }
178 }
179 EXPECT_TRUE (ok);
180 }
181 #define FDEC_STRIDE 32
182 template<typename clip_t>
WelsIDctT4Anchor(uint8_t * p_dst,int16_t dct[16])183 void WelsIDctT4Anchor (uint8_t* p_dst, int16_t dct[16]) {
184 int16_t tmp[16];
185 int32_t iStridex2 = (FDEC_STRIDE << 1);
186 int32_t iStridex3 = iStridex2 + FDEC_STRIDE;
187 uint8_t uiDst = 0;
188 int i;
189 for (i = 0; i < 4; i++) {
190 tmp[i << 2] = dct[i << 2] + dct[ (i << 2) + 1] + dct[ (i << 2) + 2] + (dct[ (i << 2) + 3] >> 1);
191 tmp[ (i << 2) + 1] = dct[i << 2] + (dct[ (i << 2) + 1] >> 1) - dct[ (i << 2) + 2] - dct[ (i << 2) + 3];
192 tmp[ (i << 2) + 2] = dct[i << 2] - (dct[ (i << 2) + 1] >> 1) - dct[ (i << 2) + 2] + dct[ (i << 2) + 3];
193 tmp[ (i << 2) + 3] = dct[i << 2] - dct[ (i << 2) + 1] + dct[ (i << 2) + 2] - (dct[ (i << 2) + 3] >> 1);
194 }
195 for (i = 0; i < 4; i++) {
196 uiDst = p_dst[i];
197 p_dst[i] = WelsClip1 (uiDst + (clip_t (tmp[i] + tmp[4 + i] + tmp[8 + i] + (tmp[12 + i] >> 1) + 32) >> 6));
198 uiDst = p_dst[i + FDEC_STRIDE];
199 p_dst[i + FDEC_STRIDE] = WelsClip1 (uiDst + (clip_t (tmp[i] + (tmp[4 + i] >> 1) - tmp[8 + i] - tmp[12 + i] + 32) >> 6));
200 uiDst = p_dst[i + iStridex2];
201 p_dst[i + iStridex2] = WelsClip1 (uiDst + (clip_t (tmp[i] - (tmp[4 + i] >> 1) - tmp[8 + i] + tmp[12 + i] + 32) >> 6));
202 uiDst = p_dst[i + iStridex3];
203 p_dst[i + iStridex3] = WelsClip1 (uiDst + (clip_t (tmp[i] - tmp[4 + i] + tmp[8 + i] - (tmp[12 + i] >> 1) + 32) >> 6));
204 }
205 }
206 template<typename clip_t>
TestIDctT4Rec(PIDctFunc func)207 void TestIDctT4Rec (PIDctFunc func) {
208 int16_t iRefDct[16];
209 uint8_t iRefDst[16 * FDEC_STRIDE];
210 ENFORCE_STACK_ALIGN_1D (int16_t, iDct, 16, 16);
211 ENFORCE_STACK_ALIGN_1D (uint8_t, iPred, 16 * FDEC_STRIDE, 16);
212 ENFORCE_STACK_ALIGN_1D (uint8_t, iRec, 16 * FDEC_STRIDE, 16);
213 for (int i = 0; i < 4; i++) {
214 for (int j = 0; j < 4; j++) {
215 iRefDct[i * 4 + j] = iDct[i * 4 + j] = (rand() & 65535) - 32768;
216 iPred[i * FDEC_STRIDE + j] = iRefDst[i * FDEC_STRIDE + j] = rand() & 255;
217 }
218 }
219 WelsIDctT4Anchor<clip_t> (iRefDst, iRefDct);
220 func (iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
221 int ok = -1;
222 for (int i = 0; i < 4; i++) {
223 for (int j = 0; j < 4; j++) {
224 if (iRec[i * FDEC_STRIDE + j] != iRefDst[i * FDEC_STRIDE + j]) {
225 ok = i * 4 + j;
226 break;
227 }
228 }
229 }
230 EXPECT_EQ (ok, -1);
231 }
TEST(DecodeMbAuxTest,WelsIDctT4Rec_c)232 TEST (DecodeMbAuxTest, WelsIDctT4Rec_c) {
233 TestIDctT4Rec<int32_t> (WelsIDctT4Rec_c);
234 }
235 #if defined(X86_ASM)
TEST(DecodeMbAuxTest,WelsIDctT4Rec_mmx)236 TEST (DecodeMbAuxTest, WelsIDctT4Rec_mmx) {
237 TestIDctT4Rec<int16_t> (WelsIDctT4Rec_mmx);
238 }
TEST(DecodeMbAuxTest,WelsIDctT4Rec_sse2)239 TEST (DecodeMbAuxTest, WelsIDctT4Rec_sse2) {
240 TestIDctT4Rec<int16_t> (WelsIDctT4Rec_sse2);
241 }
242 #if defined(HAVE_AVX2)
TEST(DecodeMbAuxTest,WelsIDctT4Rec_avx2)243 TEST (DecodeMbAuxTest, WelsIDctT4Rec_avx2) {
244 if (WelsCPUFeatureDetect (0) & WELS_CPU_AVX2)
245 TestIDctT4Rec<int16_t> (WelsIDctT4Rec_avx2);
246 }
247 #endif
248 #endif
249 #if defined(HAVE_MMI)
TEST(DecodeMbAuxTest,WelsIDctT4Rec_mmi)250 TEST (DecodeMbAuxTest, WelsIDctT4Rec_mmi) {
251 TestIDctT4Rec<int16_t> (WelsIDctT4Rec_mmi);
252 }
253 #endif
254 template<typename clip_t>
WelsIDctT8Anchor(uint8_t * p_dst,int16_t dct[4][16])255 void WelsIDctT8Anchor (uint8_t* p_dst, int16_t dct[4][16]) {
256 WelsIDctT4Anchor<clip_t> (&p_dst[0], dct[0]);
257 WelsIDctT4Anchor<clip_t> (&p_dst[4], dct[1]);
258 WelsIDctT4Anchor<clip_t> (&p_dst[4 * FDEC_STRIDE + 0], dct[2]);
259 WelsIDctT4Anchor<clip_t> (&p_dst[4 * FDEC_STRIDE + 4], dct[3]);
260 }
261 template<typename clip_t>
TestIDctFourT4Rec(PIDctFunc func)262 void TestIDctFourT4Rec (PIDctFunc func) {
263 int16_t iRefDct[4][16];
264 uint8_t iRefDst[16 * FDEC_STRIDE];
265 ENFORCE_STACK_ALIGN_1D (int16_t, iDct, 64, 16);
266 ENFORCE_STACK_ALIGN_1D (uint8_t, iPred, 16 * FDEC_STRIDE, 16);
267 ENFORCE_STACK_ALIGN_1D (uint8_t, iRec, 16 * FDEC_STRIDE, 16);
268 for (int k = 0; k < 4; k++)
269 for (int i = 0; i < 16; i++)
270 iRefDct[k][i] = iDct[k * 16 + i] = (rand() & 65535) - 32768;
271
272 for (int i = 0; i < 8; i++)
273 for (int j = 0; j < 8; j++)
274 iPred[i * FDEC_STRIDE + j] = iRefDst[i * FDEC_STRIDE + j] = rand() & 255;
275
276 WelsIDctT8Anchor<clip_t> (iRefDst, iRefDct);
277 func (iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
278 int ok = -1;
279 for (int i = 0; i < 8; i++) {
280 for (int j = 0; j < 8; j++) {
281 if (iRec[i * FDEC_STRIDE + j] != iRefDst[i * FDEC_STRIDE + j]) {
282 ok = i * 8 + j;
283 break;
284 }
285 }
286 }
287 EXPECT_EQ (ok, -1);
288 }
TEST(DecodeMbAuxTest,WelsIDctFourT4Rec_c)289 TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_c) {
290 TestIDctFourT4Rec<int32_t> (WelsIDctFourT4Rec_c);
291 }
WelsIDctRecI16x4DcAnchor(uint8_t * p_dst,int16_t dct[4])292 void WelsIDctRecI16x4DcAnchor (uint8_t* p_dst, int16_t dct[4]) {
293 for (int i = 0; i < 4; i++, p_dst += FDEC_STRIDE) {
294 p_dst[0] = WelsClip1 (p_dst[0] + ((dct[0] + 32) >> 6));
295 p_dst[1] = WelsClip1 (p_dst[1] + ((dct[0] + 32) >> 6));
296 p_dst[2] = WelsClip1 (p_dst[2] + ((dct[0] + 32) >> 6));
297 p_dst[3] = WelsClip1 (p_dst[3] + ((dct[0] + 32) >> 6));
298
299 p_dst[4] = WelsClip1 (p_dst[4] + ((dct[1] + 32) >> 6));
300 p_dst[5] = WelsClip1 (p_dst[5] + ((dct[1] + 32) >> 6));
301 p_dst[6] = WelsClip1 (p_dst[6] + ((dct[1] + 32) >> 6));
302 p_dst[7] = WelsClip1 (p_dst[7] + ((dct[1] + 32) >> 6));
303
304 p_dst[8] = WelsClip1 (p_dst[8] + ((dct[2] + 32) >> 6));
305 p_dst[9] = WelsClip1 (p_dst[9] + ((dct[2] + 32) >> 6));
306 p_dst[10] = WelsClip1 (p_dst[10] + ((dct[2] + 32) >> 6));
307 p_dst[11] = WelsClip1 (p_dst[11] + ((dct[2] + 32) >> 6));
308
309 p_dst[12] = WelsClip1 (p_dst[12] + ((dct[3] + 32) >> 6));
310 p_dst[13] = WelsClip1 (p_dst[13] + ((dct[3] + 32) >> 6));
311 p_dst[14] = WelsClip1 (p_dst[14] + ((dct[3] + 32) >> 6));
312 p_dst[15] = WelsClip1 (p_dst[15] + ((dct[3] + 32) >> 6));
313 }
314 }
WelsIDctRecI16x16DcAnchor(uint8_t * p_dst,int16_t dct[4][4])315 void WelsIDctRecI16x16DcAnchor (uint8_t* p_dst, int16_t dct[4][4]) {
316 for (int i = 0; i < 4; i++, p_dst += 4 * FDEC_STRIDE)
317 WelsIDctRecI16x4DcAnchor (&p_dst[0], dct[i]);
318 }
319
TEST(DecodeMbAuxTest,WelsIDctRecI16x16Dc_c)320 TEST (DecodeMbAuxTest, WelsIDctRecI16x16Dc_c) {
321 uint8_t iRefDst[16 * FDEC_STRIDE];
322 int16_t iRefDct[4][4];
323 ENFORCE_STACK_ALIGN_1D (int16_t, iDct, 16, 16);
324 ENFORCE_STACK_ALIGN_1D (uint8_t, iPred, 16 * FDEC_STRIDE, 16);
325 ENFORCE_STACK_ALIGN_1D (uint8_t, iRec, 16 * FDEC_STRIDE, 16);
326 for (int i = 0; i < 16; i++)
327 for (int j = 0; j < 16; j++)
328 iRefDst[i * FDEC_STRIDE + j] = iPred[i * FDEC_STRIDE + j] = rand() & 255;
329
330 for (int i = 0; i < 4; i++)
331 for (int j = 0; j < 4; j++)
332 iRefDct[i][j] = iDct[i * 4 + j] = (rand() & 65535) - 32768;
333 WelsIDctRecI16x16DcAnchor (iRefDst, iRefDct);
334 WelsIDctRecI16x16Dc_c (iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
335 int ok = -1;
336 for (int i = 0; i < 16; i++) {
337 for (int j = 0; j < 16; j++) {
338 if (iRec[i * FDEC_STRIDE + j] != iRefDst[i * FDEC_STRIDE + j]) {
339 ok = i * 16 + j;
340 break;
341 }
342 }
343 }
344 EXPECT_EQ (ok, -1);
345 }
346 #if defined(X86_ASM)
TEST(DecodeMbAuxTest,WelsIDctFourT4Rec_sse2)347 TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_sse2) {
348 TestIDctFourT4Rec<int16_t> (WelsIDctFourT4Rec_sse2);
349 }
350 #if defined(HAVE_AVX2)
TEST(DecodeMbAuxTest,WelsIDctFourT4Rec_avx2)351 TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_avx2) {
352 if (WelsCPUFeatureDetect (0) & WELS_CPU_AVX2)
353 TestIDctFourT4Rec<int16_t> (WelsIDctFourT4Rec_avx2);
354 }
355 #endif
TEST(DecodeMbAuxTest,WelsIDctRecI16x16Dc_sse2)356 TEST (DecodeMbAuxTest, WelsIDctRecI16x16Dc_sse2) {
357 int32_t iCpuCores = 0;
358 uint32_t uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores);
359
360 if (uiCpuFeatureFlag & WELS_CPU_SSE2) {
361 uint8_t iRefDst[16 * FDEC_STRIDE];
362 int16_t iRefDct[4][4];
363 ENFORCE_STACK_ALIGN_1D (int16_t, iDct, 16, 16);
364 ENFORCE_STACK_ALIGN_1D (uint8_t, iPred, 16 * FDEC_STRIDE, 16);
365 ENFORCE_STACK_ALIGN_1D (uint8_t, iRec, 16 * FDEC_STRIDE, 16);
366 for (int i = 0; i < 16; i++)
367 for (int j = 0; j < 16; j++)
368 iRefDst[i * FDEC_STRIDE + j] = iPred[i * FDEC_STRIDE + j] = rand() & 255;
369 for (int i = 0; i < 4; i++)
370 for (int j = 0; j < 4; j++)
371 iRefDct[i][j] = iDct[i * 4 + j] = (rand() & ((1 << 15) - 1)) - (1 <<
372 14); //2^14 limit, (2^15+32) will cause overflow for SSE2.
373 WelsIDctRecI16x16DcAnchor (iRefDst, iRefDct);
374 WelsIDctRecI16x16Dc_sse2 (iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
375 int ok = -1;
376 for (int i = 0; i < 16; i++) {
377 for (int j = 0; j < 16; j++) {
378 if (iRec[i * FDEC_STRIDE + j] != iRefDst[i * FDEC_STRIDE + j]) {
379 ok = i * 16 + j;
380 break;
381 }
382 }
383 }
384 EXPECT_EQ (ok, -1);
385 }
386 }
387 #endif
388 #if defined(HAVE_MMI)
TEST(DecodeMbAuxTest,WelsIDctFourT4Rec_mmi)389 TEST (DecodeMbAuxTest, WelsIDctFourT4Rec_mmi) {
390 TestIDctFourT4Rec<int16_t> (WelsIDctFourT4Rec_mmi);
391 }
TEST(DecodeMbAuxTest,WelsIDctRecI16x16Dc_mmi)392 TEST (DecodeMbAuxTest, WelsIDctRecI16x16Dc_mmi) {
393 int32_t iCpuCores = 0;
394 uint32_t uiCpuFeatureFlag = WelsCPUFeatureDetect (&iCpuCores);
395
396 if (uiCpuFeatureFlag & WELS_CPU_MMI) {
397 uint8_t iRefDst[16 * FDEC_STRIDE];
398 int16_t iRefDct[4][4];
399 ENFORCE_STACK_ALIGN_1D (int16_t, iDct, 16, 16);
400 ENFORCE_STACK_ALIGN_1D (uint8_t, iPred, 16 * FDEC_STRIDE, 16);
401 ENFORCE_STACK_ALIGN_1D (uint8_t, iRec, 16 * FDEC_STRIDE, 16);
402 for (int i = 0; i < 16; i++)
403 for (int j = 0; j < 16; j++)
404 iRefDst[i * FDEC_STRIDE + j] = iPred[i * FDEC_STRIDE + j] = rand() & 255;
405 for (int i = 0; i < 4; i++)
406 for (int j = 0; j < 4; j++)
407 iRefDct[i][j] = iDct[i * 4 + j] = (rand() & ((1 << 15) - 1)) - (1 <<
408 14); //2^14 limit, (2^15+32) will cause overflow for SSE2.
409 WelsIDctRecI16x16DcAnchor (iRefDst, iRefDct);
410 WelsIDctRecI16x16Dc_mmi (iRec, FDEC_STRIDE, iPred, FDEC_STRIDE, iDct);
411 int ok = -1;
412 for (int i = 0; i < 16; i++) {
413 for (int j = 0; j < 16; j++) {
414 if (iRec[i * FDEC_STRIDE + j] != iRefDst[i * FDEC_STRIDE + j]) {
415 ok = i * 16 + j;
416 break;
417 }
418 }
419 }
420 EXPECT_EQ (ok, -1);
421 }
422 }
423 #endif
424