1 //===- subzero/unittest/AssemblerX8632/XmmArith.cpp -----------------------===//
2 //
3 // The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "AssemblerX8632/TestUtil.h"
10
11 namespace Ice {
12 namespace X8632 {
13 namespace Test {
14 namespace {
15
TEST_F(AssemblerX8632Test,ArithSS)16 TEST_F(AssemblerX8632Test, ArithSS) {
17 #define TestArithSSXmmXmm(FloatSize, Src, Value0, Dst, Value1, Inst, Op) \
18 do { \
19 static_assert(FloatSize == 32 || FloatSize == 64, \
20 "Invalid fp size " #FloatSize); \
21 static constexpr char TestString[] = \
22 "(" #FloatSize ", " #Src ", " #Value0 ", " #Dst ", " #Value1 \
23 ", " #Inst ", " #Op ")"; \
24 static constexpr bool IsDouble = FloatSize == 64; \
25 using Type = std::conditional<IsDouble, double, float>::type; \
26 const uint32_t T0 = allocateQword(); \
27 const Type V0 = Value0; \
28 const uint32_t T1 = allocateQword(); \
29 const Type V1 = Value1; \
30 \
31 __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \
32 dwordAddress(T0)); \
33 __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Src, \
34 dwordAddress(T1)); \
35 __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \
36 XmmRegister::Encoded_Reg_##Src); \
37 \
38 AssembledTest test = assemble(); \
39 if (IsDouble) { \
40 test.setQwordTo(T0, static_cast<double>(V0)); \
41 test.setQwordTo(T1, static_cast<double>(V1)); \
42 } else { \
43 test.setDwordTo(T0, static_cast<float>(V0)); \
44 test.setDwordTo(T1, static_cast<float>(V1)); \
45 } \
46 \
47 test.run(); \
48 \
49 ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString; \
50 reset(); \
51 } while (0)
52
53 #define TestArithSSXmmAddr(FloatSize, Value0, Dst, Value1, Inst, Op) \
54 do { \
55 static_assert(FloatSize == 32 || FloatSize == 64, \
56 "Invalid fp size " #FloatSize); \
57 static constexpr char TestString[] = \
58 "(" #FloatSize ", Addr, " #Value0 ", " #Dst ", " #Value1 ", " #Inst \
59 ", " #Op ")"; \
60 static constexpr bool IsDouble = FloatSize == 64; \
61 using Type = std::conditional<IsDouble, double, float>::type; \
62 const uint32_t T0 = allocateQword(); \
63 const Type V0 = Value0; \
64 const uint32_t T1 = allocateQword(); \
65 const Type V1 = Value1; \
66 \
67 __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \
68 dwordAddress(T0)); \
69 __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \
70 dwordAddress(T1)); \
71 \
72 AssembledTest test = assemble(); \
73 if (IsDouble) { \
74 test.setQwordTo(T0, static_cast<double>(V0)); \
75 test.setQwordTo(T1, static_cast<double>(V1)); \
76 } else { \
77 test.setDwordTo(T0, static_cast<float>(V0)); \
78 test.setDwordTo(T1, static_cast<float>(V1)); \
79 } \
80 \
81 test.run(); \
82 \
83 ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString; \
84 reset(); \
85 } while (0)
86
87 #define TestArithSS(FloatSize, Src, Dst0, Dst1) \
88 do { \
89 TestArithSSXmmXmm(FloatSize, Src, 1.0, Dst0, 10.0, addss, +); \
90 TestArithSSXmmAddr(FloatSize, 2.0, Dst1, 20.0, addss, +); \
91 TestArithSSXmmXmm(FloatSize, Src, 3.0, Dst0, 30.0, subss, -); \
92 TestArithSSXmmAddr(FloatSize, 4.0, Dst1, 40.0, subss, -); \
93 TestArithSSXmmXmm(FloatSize, Src, 5.0, Dst0, 50.0, mulss, *); \
94 TestArithSSXmmAddr(FloatSize, 6.0, Dst1, 60.0, mulss, *); \
95 TestArithSSXmmXmm(FloatSize, Src, 7.0, Dst0, 70.0, divss, / ); \
96 TestArithSSXmmAddr(FloatSize, 8.0, Dst1, 80.0, divss, / ); \
97 } while (0)
98
99 TestArithSS(32, xmm0, xmm1, xmm2);
100 TestArithSS(32, xmm1, xmm2, xmm3);
101 TestArithSS(32, xmm2, xmm3, xmm4);
102 TestArithSS(32, xmm3, xmm4, xmm5);
103 TestArithSS(32, xmm4, xmm5, xmm6);
104 TestArithSS(32, xmm5, xmm6, xmm7);
105 TestArithSS(32, xmm6, xmm7, xmm0);
106 TestArithSS(32, xmm7, xmm0, xmm1);
107
108 TestArithSS(64, xmm0, xmm1, xmm2);
109 TestArithSS(64, xmm1, xmm2, xmm3);
110 TestArithSS(64, xmm2, xmm3, xmm4);
111 TestArithSS(64, xmm3, xmm4, xmm5);
112 TestArithSS(64, xmm4, xmm5, xmm6);
113 TestArithSS(64, xmm5, xmm6, xmm7);
114 TestArithSS(64, xmm6, xmm7, xmm0);
115 TestArithSS(64, xmm7, xmm0, xmm1);
116
117 #undef TestArithSS
118 #undef TestArithSSXmmAddr
119 #undef TestArithSSXmmXmm
120 }
121
TEST_F(AssemblerX8632Test,PArith)122 TEST_F(AssemblerX8632Test, PArith) {
123 #define TestPArithXmmXmm(Dst, Value0, Src, Value1, Inst, Op, Type, Size) \
124 do { \
125 static constexpr char TestString[] = \
126 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \
127 ", " #Type ", " #Size ")"; \
128 const uint32_t T0 = allocateDqword(); \
129 const Dqword V0 Value0; \
130 \
131 const uint32_t T1 = allocateDqword(); \
132 const Dqword V1 Value1; \
133 \
134 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
135 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
136 __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \
137 XmmRegister::Encoded_Reg_##Src); \
138 \
139 AssembledTest test = assemble(); \
140 test.setDqwordTo(T0, V0); \
141 test.setDqwordTo(T1, V1); \
142 test.run(); \
143 \
144 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>()) \
145 << TestString; \
146 reset(); \
147 } while (0)
148
149 #define TestPArithXmmAddr(Dst, Value0, Value1, Inst, Op, Type, Size) \
150 do { \
151 static constexpr char TestString[] = \
152 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \
153 ", " #Type ", " #Size ")"; \
154 const uint32_t T0 = allocateDqword(); \
155 const Dqword V0 Value0; \
156 \
157 const uint32_t T1 = allocateDqword(); \
158 const Dqword V1 Value1; \
159 \
160 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
161 __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \
162 dwordAddress(T1)); \
163 \
164 AssembledTest test = assemble(); \
165 test.setDqwordTo(T0, V0); \
166 test.setDqwordTo(T1, V1); \
167 test.run(); \
168 \
169 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>()) \
170 << TestString; \
171 reset(); \
172 } while (0)
173
174 #define TestPArithXmmImm(Dst, Value0, Imm, Inst, Op, Type, Size) \
175 do { \
176 static constexpr char TestString[] = \
177 "(" #Dst ", " #Value0 ", " #Imm ", " #Inst ", " #Op ", " #Type \
178 ", " #Size ")"; \
179 const uint32_t T0 = allocateDqword(); \
180 const Dqword V0 Value0; \
181 \
182 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
183 __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, Immediate(Imm)); \
184 \
185 AssembledTest test = assemble(); \
186 test.setDqwordTo(T0, V0); \
187 test.run(); \
188 \
189 ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op Imm, test.Dst<Dqword>()) \
190 << TestString; \
191 reset(); \
192 } while (0)
193
194 #define TestPAndnXmmXmm(Dst, Value0, Src, Value1, Type, Size) \
195 do { \
196 static constexpr char TestString[] = \
197 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", pandn, " #Type \
198 ", " #Size ")"; \
199 const uint32_t T0 = allocateDqword(); \
200 const Dqword V0 Value0; \
201 \
202 const uint32_t T1 = allocateDqword(); \
203 const Dqword V1 Value1; \
204 \
205 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
206 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
207 __ pandn(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \
208 XmmRegister::Encoded_Reg_##Src); \
209 \
210 AssembledTest test = assemble(); \
211 test.setDqwordTo(T0, V0); \
212 test.setDqwordTo(T1, V1); \
213 test.run(); \
214 \
215 ASSERT_EQ(~(packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>()) \
216 << TestString; \
217 reset(); \
218 } while (0)
219
220 #define TestPAndnXmmAddr(Dst, Value0, Value1, Type, Size) \
221 do { \
222 static constexpr char TestString[] = \
223 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", pandn, " #Type ", " #Size \
224 ")"; \
225 const uint32_t T0 = allocateDqword(); \
226 const Dqword V0 Value0; \
227 \
228 const uint32_t T1 = allocateDqword(); \
229 const Dqword V1 Value1; \
230 \
231 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
232 __ pandn(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \
233 dwordAddress(T1)); \
234 \
235 AssembledTest test = assemble(); \
236 test.setDqwordTo(T0, V0); \
237 test.setDqwordTo(T1, V1); \
238 test.run(); \
239 \
240 ASSERT_EQ((~packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>()) \
241 << TestString; \
242 reset(); \
243 } while (0)
244
245 #define TestPArithSize(Dst, Src, Size) \
246 do { \
247 static_assert(Size == 8 || Size == 16 || Size == 32, "Invalid size."); \
248 if (Size != 8) { \
249 TestPArithXmmXmm( \
250 Dst, \
251 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
252 Src, (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size); \
253 TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \
254 uint64_t(0x8080404002020101ull)), \
255 (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size); \
256 TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \
257 uint64_t(0x8080404002020101ull)), \
258 3u, psra, >>, int, Size); \
259 TestPArithXmmXmm( \
260 Dst, \
261 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
262 Src, (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size); \
263 TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \
264 uint64_t(0x8080404002020101ull)), \
265 (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size); \
266 TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \
267 uint64_t(0x8080404002020101ull)), \
268 3u, psrl, >>, uint, Size); \
269 TestPArithXmmXmm( \
270 Dst, \
271 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
272 Src, (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size); \
273 TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull), \
274 uint64_t(0x8080404002020101ull)), \
275 (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size); \
276 TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull), \
277 uint64_t(0x8080404002020101ull)), \
278 3u, psll, <<, uint, Size); \
279 \
280 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
281 uint64_t(0x8080404002020101ull)), \
282 Src, (uint64_t(0xFFFFFFFF00000000ull), \
283 uint64_t(0x0123456789ABCDEull)), \
284 pmull, *, int, Size); \
285 TestPArithXmmAddr( \
286 Dst, \
287 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
288 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
289 pmull, *, int, Size); \
290 if (Size != 16) { \
291 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
292 uint64_t(0x8080404002020101ull)), \
293 Src, (uint64_t(0xFFFFFFFF00000000ull), \
294 uint64_t(0x0123456789ABCDEull)), \
295 pmuludq, *, uint, Size); \
296 TestPArithXmmAddr( \
297 Dst, (uint64_t(0x8040201008040201ull), \
298 uint64_t(0x8080404002020101ull)), \
299 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
300 pmuludq, *, uint, Size); \
301 } \
302 } \
303 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
304 uint64_t(0x8080404002020101ull)), \
305 Src, (uint64_t(0xFFFFFFFF00000000ull), \
306 uint64_t(0x0123456789ABCDEull)), \
307 padd, +, int, Size); \
308 TestPArithXmmAddr( \
309 Dst, \
310 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
311 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
312 padd, +, int, Size); \
313 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
314 uint64_t(0x8080404002020101ull)), \
315 Src, (uint64_t(0xFFFFFFFF00000000ull), \
316 uint64_t(0x0123456789ABCDEull)), \
317 psub, -, int, Size); \
318 TestPArithXmmAddr( \
319 Dst, \
320 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
321 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
322 psub, -, int, Size); \
323 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
324 uint64_t(0x8080404002020101ull)), \
325 Src, (uint64_t(0xFFFFFFFF00000000ull), \
326 uint64_t(0x0123456789ABCDEull)), \
327 pand, &, int, Size); \
328 TestPArithXmmAddr( \
329 Dst, \
330 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
331 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
332 pand, &, int, Size); \
333 \
334 TestPAndnXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
335 uint64_t(0x8080404002020101ull)), \
336 Src, (uint64_t(0xFFFFFFFF00000000ull), \
337 uint64_t(0x0123456789ABCDEull)), \
338 int, Size); \
339 TestPAndnXmmAddr( \
340 Dst, \
341 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
342 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
343 int, Size); \
344 \
345 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
346 uint64_t(0x8080404002020101ull)), \
347 Src, (uint64_t(0xFFFFFFFF00000000ull), \
348 uint64_t(0x0123456789ABCDEull)), \
349 por, |, int, Size); \
350 TestPArithXmmAddr( \
351 Dst, \
352 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
353 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
354 por, |, int, Size); \
355 TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull), \
356 uint64_t(0x8080404002020101ull)), \
357 Src, (uint64_t(0xFFFFFFFF00000000ull), \
358 uint64_t(0x0123456789ABCDEull)), \
359 pxor, ^, int, Size); \
360 TestPArithXmmAddr( \
361 Dst, \
362 (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)), \
363 (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
364 pxor, ^, int, Size); \
365 } while (0)
366
367 #define TestPArith(Src, Dst) \
368 do { \
369 TestPArithSize(Src, Dst, 8); \
370 TestPArithSize(Src, Dst, 16); \
371 TestPArithSize(Src, Dst, 32); \
372 } while (0)
373
374 TestPArith(xmm0, xmm1);
375 TestPArith(xmm1, xmm2);
376 TestPArith(xmm2, xmm3);
377 TestPArith(xmm3, xmm4);
378 TestPArith(xmm4, xmm5);
379 TestPArith(xmm5, xmm6);
380 TestPArith(xmm6, xmm7);
381 TestPArith(xmm7, xmm0);
382
383 #undef TestPArith
384 #undef TestPArithSize
385 #undef TestPAndnXmmAddr
386 #undef TestPAndnXmmXmm
387 #undef TestPArithXmmImm
388 #undef TestPArithXmmAddr
389 #undef TestPArithXmmXmm
390 }
391
TEST_F(AssemblerX8632Test,ArithPS)392 TEST_F(AssemblerX8632Test, ArithPS) {
393 #define TestArithPSXmmXmm(FloatSize, Dst, Value0, Src, Value1, Inst, Op, Type) \
394 do { \
395 static constexpr char TestString[] = \
396 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \
397 ", " #Type ")"; \
398 const uint32_t T0 = allocateDqword(); \
399 const Dqword V0 Value0; \
400 const uint32_t T1 = allocateDqword(); \
401 const Dqword V1 Value1; \
402 \
403 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
404 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
405 __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \
406 XmmRegister::Encoded_Reg_##Src); \
407 \
408 AssembledTest test = assemble(); \
409 test.setDqwordTo(T0, V0); \
410 test.setDqwordTo(T1, V1); \
411 test.run(); \
412 \
413 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
414 \
415 reset(); \
416 } while (0)
417
418 #define TestArithPSXmmXmmUntyped(Dst, Value0, Src, Value1, Inst, Op, Type) \
419 do { \
420 static constexpr char TestString[] = \
421 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op \
422 ", " #Type ")"; \
423 const uint32_t T0 = allocateDqword(); \
424 const Dqword V0 Value0; \
425 const uint32_t T1 = allocateDqword(); \
426 const Dqword V1 Value1; \
427 \
428 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
429 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
430 __ Inst(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src); \
431 \
432 AssembledTest test = assemble(); \
433 test.setDqwordTo(T0, V0); \
434 test.setDqwordTo(T1, V1); \
435 test.run(); \
436 \
437 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
438 \
439 reset(); \
440 } while (0)
441
442 #define TestArithPSXmmAddrUntyped(Dst, Value0, Value1, Inst, Op, Type) \
443 do { \
444 static constexpr char TestString[] = \
445 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \
446 ", " #Type ")"; \
447 const uint32_t T0 = allocateDqword(); \
448 const Dqword V0 Value0; \
449 const uint32_t T1 = allocateDqword(); \
450 const Dqword V1 Value1; \
451 \
452 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
453 __ Inst(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
454 \
455 AssembledTest test = assemble(); \
456 test.setDqwordTo(T0, V0); \
457 test.setDqwordTo(T1, V1); \
458 test.run(); \
459 \
460 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
461 \
462 reset(); \
463 } while (0)
464
465 #define TestMinMaxPS(FloatSize, Dst, Value0, Src, Value1, Inst, Type) \
466 do { \
467 static constexpr char TestString[] = \
468 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Type \
469 ")"; \
470 const uint32_t T0 = allocateDqword(); \
471 const Dqword V0 Value0; \
472 const uint32_t T1 = allocateDqword(); \
473 const Dqword V1 Value1; \
474 \
475 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
476 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
477 __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \
478 XmmRegister::Encoded_Reg_##Src); \
479 \
480 AssembledTest test = assemble(); \
481 test.setDqwordTo(T0, V0); \
482 test.setDqwordTo(T1, V1); \
483 test.run(); \
484 \
485 ASSERT_EQ(packedAs<Type>(V0).Inst(V1), test.Dst<Dqword>()) << TestString; \
486 \
487 reset(); \
488 } while (0)
489
490 #define TestArithPSXmmAddr(FloatSize, Dst, Value0, Value1, Inst, Op, Type) \
491 do { \
492 static constexpr char TestString[] = \
493 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op \
494 ", " #Type ")"; \
495 const uint32_t T0 = allocateDqword(); \
496 const Dqword V0 Value0; \
497 const uint32_t T1 = allocateDqword(); \
498 const Dqword V1 Value1; \
499 \
500 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
501 __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \
502 dwordAddress(T1)); \
503 \
504 AssembledTest test = assemble(); \
505 test.setDqwordTo(T0, V0); \
506 test.setDqwordTo(T1, V1); \
507 test.run(); \
508 \
509 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
510 \
511 reset(); \
512 } while (0)
513
514 #define TestArithPS(Dst, Src) \
515 do { \
516 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
517 (0.55, 0.43, 0.23, 1.21), addps, +, float); \
518 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \
519 (0.55, 0.43, 0.23, 1.21), addps, +, float); \
520 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
521 (0.55, 0.43, 0.23, 1.21), subps, -, float); \
522 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \
523 (0.55, 0.43, 0.23, 1.21), subps, -, float); \
524 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
525 (0.55, 0.43, 0.23, 1.21), mulps, *, float); \
526 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \
527 (0.55, 0.43, 0.23, 1.21), mulps, *, float); \
528 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
529 (0.55, 0.43, 0.23, 1.21), divps, /, float); \
530 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \
531 (0.55, 0.43, 0.23, 1.21), divps, /, float); \
532 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
533 (0.55, 0.43, 0.23, 1.21), andps, &, float); \
534 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \
535 (0.55, 0.43, 0.23, 1.21), andps, &, float); \
536 TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), andps, &, \
537 double); \
538 TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), andps, &, \
539 double); \
540 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
541 (0.55, 0.43, 0.23, 1.21), orps, |, float); \
542 TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), orps, |, \
543 double); \
544 TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
545 (0.55, 0.43, 0.23, 1.21), minps, float); \
546 TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
547 (0.55, 0.43, 0.23, 1.21), maxps, float); \
548 TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), minps, double); \
549 TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), maxps, double); \
550 TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src, \
551 (0.55, 0.43, 0.23, 1.21), xorps, ^, float); \
552 TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0), \
553 (0.55, 0.43, 0.23, 1.21), xorps, ^, float); \
554 TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), xorps, ^, \
555 double); \
556 TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), xorps, ^, \
557 double); \
558 } while (0)
559
560 #if 0
561
562 #endif
563
564 TestArithPS(xmm0, xmm1);
565 TestArithPS(xmm1, xmm2);
566 TestArithPS(xmm2, xmm3);
567 TestArithPS(xmm3, xmm4);
568 TestArithPS(xmm4, xmm5);
569 TestArithPS(xmm5, xmm6);
570 TestArithPS(xmm6, xmm7);
571 TestArithPS(xmm7, xmm0);
572
573 #undef TestArithPs
574 #undef TestMinMaxPS
575 #undef TestArithPSXmmXmmUntyped
576 #undef TestArithPSXmmAddr
577 #undef TestArithPSXmmXmm
578 }
579
TEST_F(AssemblerX8632Test,Blending)580 TEST_F(AssemblerX8632Test, Blending) {
581 using f32 = float;
582 using i8 = uint8_t;
583
584 #define TestBlendingXmmXmm(Dst, Value0, Src, Value1, M /*ask*/, Inst, Type) \
585 do { \
586 static constexpr char TestString[] = \
587 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #M ", " #Inst \
588 ", " #Type ")"; \
589 const uint32_t T0 = allocateDqword(); \
590 const Dqword V0 Value0; \
591 const uint32_t T1 = allocateDqword(); \
592 const Dqword V1 Value1; \
593 const uint32_t Mask = allocateDqword(); \
594 const Dqword MaskValue M; \
595 \
596 __ movups(XmmRegister::Encoded_Reg_xmm0, dwordAddress(Mask)); \
597 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
598 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
599 __ Inst(IceType_##Type, XmmRegister::Encoded_Reg_##Dst, \
600 XmmRegister::Encoded_Reg_##Src); \
601 \
602 AssembledTest test = assemble(); \
603 test.setDqwordTo(T0, V0); \
604 test.setDqwordTo(T1, V1); \
605 test.setDqwordTo(Mask, MaskValue); \
606 test.run(); \
607 \
608 ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
609 << TestString; \
610 reset(); \
611 } while (0)
612
613 #define TestBlendingXmmAddr(Dst, Value0, Value1, M /*ask*/, Inst, Type) \
614 do { \
615 static constexpr char TestString[] = \
616 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #M ", " #Inst ", " #Type \
617 ")"; \
618 const uint32_t T0 = allocateDqword(); \
619 const Dqword V0 Value0; \
620 const uint32_t T1 = allocateDqword(); \
621 const Dqword V1 Value1; \
622 const uint32_t Mask = allocateDqword(); \
623 const Dqword MaskValue M; \
624 \
625 __ movups(XmmRegister::Encoded_Reg_xmm0, dwordAddress(Mask)); \
626 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
627 __ Inst(IceType_##Type, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
628 \
629 AssembledTest test = assemble(); \
630 test.setDqwordTo(T0, V0); \
631 test.setDqwordTo(T1, V1); \
632 test.setDqwordTo(Mask, MaskValue); \
633 test.run(); \
634 \
635 ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
636 << TestString; \
637 reset(); \
638 } while (0)
639
640 #define TestBlending(Src, Dst) \
641 do { \
642 TestBlendingXmmXmm( \
643 Dst, (1.0, 2.0, 1.0, 2.0), Src, (-1.0, -2.0, -1.0, -2.0), \
644 (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)), \
645 blendvps, f32); \
646 TestBlendingXmmAddr( \
647 Dst, (1.0, 2.0, 1.0, 2.0), (-1.0, -2.0, -1.0, -2.0), \
648 (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)), \
649 blendvps, f32); \
650 TestBlendingXmmXmm( \
651 Dst, \
652 (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)), \
653 Src, \
654 (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)), \
655 (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)), \
656 pblendvb, i8); \
657 TestBlendingXmmAddr( \
658 Dst, \
659 (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)), \
660 (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)), \
661 (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)), \
662 pblendvb, i8); \
663 } while (0)
664
665 /* xmm0 is taken. It is the implicit mask . */
666 TestBlending(xmm1, xmm2);
667 TestBlending(xmm2, xmm3);
668 TestBlending(xmm3, xmm4);
669 TestBlending(xmm4, xmm5);
670 TestBlending(xmm5, xmm6);
671 TestBlending(xmm6, xmm7);
672 TestBlending(xmm7, xmm1);
673
674 #undef TestBlending
675 #undef TestBlendingXmmAddr
676 #undef TestBlendingXmmXmm
677 }
678
TEST_F(AssemblerX8632Test,Cmpps)679 TEST_F(AssemblerX8632Test, Cmpps) {
680 #define TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Op, Type) \
681 do { \
682 static constexpr char TestString[] = \
683 "(" #Src ", " #Dst ", " #C ", " #Op ")"; \
684 const uint32_t T0 = allocateDqword(); \
685 const Dqword V0 Value0; \
686 const uint32_t T1 = allocateDqword(); \
687 const Dqword V1 Value1; \
688 \
689 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
690 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
691 __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \
692 XmmRegister::Encoded_Reg_##Src, Cond::Cmpps_##C); \
693 \
694 AssembledTest test = assemble(); \
695 test.setDqwordTo(T0, V0); \
696 test.setDqwordTo(T1, V1); \
697 test.run(); \
698 \
699 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
700 ; \
701 reset(); \
702 } while (0)
703
704 #define TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, C, Op, Type) \
705 do { \
706 static constexpr char TestString[] = "(" #Dst ", Addr, " #C ", " #Op ")"; \
707 const uint32_t T0 = allocateDqword(); \
708 const Dqword V0 Value0; \
709 const uint32_t T1 = allocateDqword(); \
710 const Dqword V1 Value1; \
711 \
712 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
713 __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \
714 dwordAddress(T1), Cond::Cmpps_##C); \
715 \
716 AssembledTest test = assemble(); \
717 test.setDqwordTo(T0, V0); \
718 test.setDqwordTo(T1, V1); \
719 test.run(); \
720 \
721 ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString; \
722 ; \
723 reset(); \
724 } while (0)
725
726 #define TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Type) \
727 do { \
728 static constexpr char TestString[] = "(" #Src ", " #Dst ", " #C ")"; \
729 const uint32_t T0 = allocateDqword(); \
730 const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \
731 std::numeric_limits<float>::quiet_NaN()); \
732 const uint32_t T1 = allocateDqword(); \
733 const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \
734 std::numeric_limits<float>::quiet_NaN()); \
735 \
736 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
737 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
738 __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \
739 XmmRegister::Encoded_Reg_##Src, Cond::Cmpps_##C); \
740 \
741 AssembledTest test = assemble(); \
742 test.setDqwordTo(T0, V0); \
743 test.setDqwordTo(T1, V1); \
744 test.run(); \
745 \
746 ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString; \
747 ; \
748 reset(); \
749 } while (0)
750
751 #define TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, C, Type) \
752 do { \
753 static constexpr char TestString[] = "(" #Dst ", " #C ")"; \
754 const uint32_t T0 = allocateDqword(); \
755 const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \
756 std::numeric_limits<float>::quiet_NaN()); \
757 const uint32_t T1 = allocateDqword(); \
758 const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \
759 std::numeric_limits<float>::quiet_NaN()); \
760 \
761 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
762 __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst, \
763 dwordAddress(T1), Cond::Cmpps_##C); \
764 \
765 AssembledTest test = assemble(); \
766 test.setDqwordTo(T0, V0); \
767 test.setDqwordTo(T1, V1); \
768 test.run(); \
769 \
770 ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString; \
771 ; \
772 reset(); \
773 } while (0)
774
775 #define TestCmpps(FloatSize, Dst, Value0, Src, Value1, Type) \
776 do { \
777 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \
778 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \
779 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \
780 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \
781 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \
782 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \
783 TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, unord, Type); \
784 TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, unord, Type); \
785 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \
786 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \
787 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \
788 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \
789 TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type); \
790 TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type); \
791 if (FloatSize == 32) { \
792 TestCmppsOrdUnordXmmXmm( \
793 32, Dst, (1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \
794 std::numeric_limits<float>::quiet_NaN()), \
795 Src, (1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \
796 std::numeric_limits<float>::quiet_NaN()), \
797 unord, Type); \
798 TestCmppsOrdUnordXmmAddr( \
799 32, Dst, (1.0, 1.0, std::numeric_limits<float>::quiet_NaN(), \
800 std::numeric_limits<float>::quiet_NaN()), \
801 (1.0, std::numeric_limits<float>::quiet_NaN(), 1.0, \
802 std::numeric_limits<float>::quiet_NaN()), \
803 unord, Type); \
804 } else { \
805 TestCmppsOrdUnordXmmXmm(64, Dst, \
806 (1.0, std::numeric_limits<double>::quiet_NaN()), \
807 Src, (std::numeric_limits<double>::quiet_NaN(), \
808 std::numeric_limits<double>::quiet_NaN()), \
809 unord, Type); \
810 TestCmppsOrdUnordXmmXmm(64, Dst, (1.0, 1.0), Src, \
811 (1.0, std::numeric_limits<double>::quiet_NaN()), \
812 unord, Type); \
813 TestCmppsOrdUnordXmmAddr( \
814 64, Dst, (1.0, std::numeric_limits<double>::quiet_NaN()), \
815 (std::numeric_limits<double>::quiet_NaN(), \
816 std::numeric_limits<double>::quiet_NaN()), \
817 unord, Type); \
818 TestCmppsOrdUnordXmmAddr( \
819 64, Dst, (1.0, 1.0), \
820 (1.0, std::numeric_limits<double>::quiet_NaN()), unord, Type); \
821 } \
822 } while (0)
823
824 #define TestCmppsSize(FloatSize, Value0, Value1, Type) \
825 do { \
826 TestCmpps(FloatSize, xmm0, Value0, xmm1, Value1, Type); \
827 TestCmpps(FloatSize, xmm1, Value0, xmm2, Value1, Type); \
828 TestCmpps(FloatSize, xmm2, Value0, xmm3, Value1, Type); \
829 TestCmpps(FloatSize, xmm3, Value0, xmm4, Value1, Type); \
830 TestCmpps(FloatSize, xmm4, Value0, xmm5, Value1, Type); \
831 TestCmpps(FloatSize, xmm5, Value0, xmm6, Value1, Type); \
832 TestCmpps(FloatSize, xmm6, Value0, xmm7, Value1, Type); \
833 TestCmpps(FloatSize, xmm7, Value0, xmm0, Value1, Type); \
834 } while (0)
835
836 TestCmppsSize(32, (-1.0, 1.0, 3.14, 1024.5), (-1.0, 1.0, 3.14, 1024.5),
837 float);
838 TestCmppsSize(64, (1.0, -1000.0), (0.55, 1.21), double);
839
840 #undef TestCmpps
841 #undef TestCmppsOrdUnordXmmAddr
842 #undef TestCmppsOrdUnordXmmXmm
843 #undef TestCmppsXmmAddr
844 #undef TestCmppsXmmXmm
845 }
846
TEST_F(AssemblerX8632Test,Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd)847 TEST_F(AssemblerX8632Test, Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd) {
848 #define TestImplSingle(Dst, Inst, Expect) \
849 do { \
850 static constexpr char TestString[] = "(" #Dst ", " #Inst ")"; \
851 const uint32_t T0 = allocateDqword(); \
852 const Dqword V0(1.0, 4.0, 20.0, 3.14); \
853 \
854 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
855 __ Inst(XmmRegister::Encoded_Reg_##Dst); \
856 \
857 AssembledTest test = assemble(); \
858 test.setDqwordTo(T0, V0); \
859 test.run(); \
860 ASSERT_EQ(Dqword Expect, test.Dst<Dqword>()) << TestString; \
861 reset(); \
862 } while (0)
863
864 #define TestImpl(Dst) \
865 do { \
866 TestImplSingle(Dst, sqrtps, (uint64_t(0x400000003F800000ull), \
867 uint64_t(0x3FE2D10B408F1BBDull))); \
868 TestImplSingle(Dst, rsqrtps, (uint64_t(0x3EFFF0003F7FF000ull), \
869 uint64_t(0x3F1078003E64F000ull))); \
870 TestImplSingle(Dst, reciprocalps, (uint64_t(0x3E7FF0003F7FF000ull), \
871 uint64_t(0x3EA310003D4CC000ull))); \
872 \
873 TestImplSingle(Dst, sqrtpd, (uint64_t(0x4036A09E9365F5F3ull), \
874 uint64_t(0x401C42FAE40282A8ull))); \
875 } while (0)
876
877 TestImpl(xmm0);
878 TestImpl(xmm1);
879 TestImpl(xmm2);
880 TestImpl(xmm3);
881 TestImpl(xmm4);
882 TestImpl(xmm5);
883 TestImpl(xmm6);
884 TestImpl(xmm7);
885
886 #undef TestImpl
887 #undef TestImplSingle
888 }
889
TEST_F(AssemblerX8632Test,Unpck)890 TEST_F(AssemblerX8632Test, Unpck) {
891 const Dqword V0(uint64_t(0xAAAAAAAABBBBBBBBull),
892 uint64_t(0xCCCCCCCCDDDDDDDDull));
893 const Dqword V1(uint64_t(0xEEEEEEEEFFFFFFFFull),
894 uint64_t(0x9999999988888888ull));
895
896 const Dqword unpcklpsExpected(uint64_t(0xFFFFFFFFBBBBBBBBull),
897 uint64_t(0xEEEEEEEEAAAAAAAAull));
898 const Dqword unpcklpdExpected(uint64_t(0xAAAAAAAABBBBBBBBull),
899 uint64_t(0xEEEEEEEEFFFFFFFFull));
900 const Dqword unpckhpsExpected(uint64_t(0x88888888DDDDDDDDull),
901 uint64_t(0x99999999CCCCCCCCull));
902 const Dqword unpckhpdExpected(uint64_t(0xCCCCCCCCDDDDDDDDull),
903 uint64_t(0x9999999988888888ull));
904
905 #define TestImplSingle(Dst, Src, Inst) \
906 do { \
907 static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
908 const uint32_t T0 = allocateDqword(); \
909 const uint32_t T1 = allocateDqword(); \
910 \
911 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
912 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
913 __ Inst(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src); \
914 \
915 AssembledTest test = assemble(); \
916 test.setDqwordTo(T0, V0); \
917 test.setDqwordTo(T1, V1); \
918 test.run(); \
919 \
920 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \
921 reset(); \
922 } while (0)
923
924 #define TestImpl(Dst, Src) \
925 do { \
926 TestImplSingle(Dst, Src, unpcklps); \
927 TestImplSingle(Dst, Src, unpcklpd); \
928 TestImplSingle(Dst, Src, unpckhps); \
929 TestImplSingle(Dst, Src, unpckhpd); \
930 } while (0)
931
932 TestImpl(xmm0, xmm1);
933 TestImpl(xmm1, xmm2);
934 TestImpl(xmm2, xmm3);
935 TestImpl(xmm3, xmm4);
936 TestImpl(xmm4, xmm5);
937 TestImpl(xmm5, xmm6);
938 TestImpl(xmm6, xmm7);
939 TestImpl(xmm7, xmm0);
940
941 #undef TestImpl
942 #undef TestImplSingle
943 }
944
TEST_F(AssemblerX8632Test,Shufp)945 TEST_F(AssemblerX8632Test, Shufp) {
946 const Dqword V0(uint64_t(0x1111111122222222ull),
947 uint64_t(0x5555555577777777ull));
948 const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
949 uint64_t(0xCCCCCCCCDDDDDDDDull));
950
951 const uint8_t pshufdImm = 0x63;
952 const Dqword pshufdExpected(uint64_t(0xBBBBBBBBCCCCCCCCull),
953 uint64_t(0xAAAAAAAADDDDDDDDull));
954
955 const uint8_t shufpsImm = 0xf9;
956 const Dqword shufpsExpected(uint64_t(0x7777777711111111ull),
957 uint64_t(0xCCCCCCCCCCCCCCCCull));
958
959 #define TestImplSingleXmmXmm(Dst, Src, Inst) \
960 do { \
961 static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
962 const uint32_t T0 = allocateDqword(); \
963 const uint32_t T1 = allocateDqword(); \
964 \
965 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
966 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
967 __ Inst(IceType_f32, XmmRegister::Encoded_Reg_##Dst, \
968 XmmRegister::Encoded_Reg_##Src, Immediate(Inst##Imm)); \
969 \
970 AssembledTest test = assemble(); \
971 test.setDqwordTo(T0, V0); \
972 test.setDqwordTo(T1, V1); \
973 test.run(); \
974 \
975 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \
976 reset(); \
977 } while (0)
978
979 #define TestImplSingleXmmAddr(Dst, Inst) \
980 do { \
981 static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \
982 const uint32_t T0 = allocateDqword(); \
983 const uint32_t T1 = allocateDqword(); \
984 \
985 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
986 __ Inst(IceType_f32, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1), \
987 Immediate(Inst##Imm)); \
988 \
989 AssembledTest test = assemble(); \
990 test.setDqwordTo(T0, V0); \
991 test.setDqwordTo(T1, V1); \
992 test.run(); \
993 \
994 ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString; \
995 reset(); \
996 } while (0)
997
998 #define TestImpl(Dst, Src) \
999 do { \
1000 TestImplSingleXmmXmm(Dst, Src, pshufd); \
1001 TestImplSingleXmmAddr(Dst, pshufd); \
1002 TestImplSingleXmmXmm(Dst, Src, shufps); \
1003 TestImplSingleXmmAddr(Dst, shufps); \
1004 } while (0)
1005
1006 TestImpl(xmm0, xmm1);
1007 TestImpl(xmm1, xmm2);
1008 TestImpl(xmm2, xmm3);
1009 TestImpl(xmm3, xmm4);
1010 TestImpl(xmm4, xmm5);
1011 TestImpl(xmm5, xmm6);
1012 TestImpl(xmm6, xmm7);
1013 TestImpl(xmm7, xmm0);
1014
1015 #undef TestImpl
1016 #undef TestImplSingleXmmAddr
1017 #undef TestImplSingleXmmXmm
1018 }
1019
TEST_F(AssemblerX8632Test,Punpckl)1020 TEST_F(AssemblerX8632Test, Punpckl) {
1021 const Dqword V0_v4i32(uint64_t(0x1111111122222222ull),
1022 uint64_t(0x5555555577777777ull));
1023 const Dqword V1_v4i32(uint64_t(0xAAAAAAAABBBBBBBBull),
1024 uint64_t(0xCCCCCCCCDDDDDDDDull));
1025 const Dqword Expected_v4i32(uint64_t(0xBBBBBBBB22222222ull),
1026 uint64_t(0xAAAAAAAA11111111ull));
1027
1028 const Dqword V0_v8i16(uint64_t(0x1111222233334444ull),
1029 uint64_t(0x5555666677778888ull));
1030 const Dqword V1_v8i16(uint64_t(0xAAAABBBBCCCCDDDDull),
1031 uint64_t(0xEEEEFFFF00009999ull));
1032 const Dqword Expected_v8i16(uint64_t(0xCCCC3333DDDD4444ull),
1033 uint64_t(0xAAAA1111BBBB2222ull));
1034
1035 const Dqword V0_v16i8(uint64_t(0x1122334455667788ull),
1036 uint64_t(0x99AABBCCDDEEFF00ull));
1037 const Dqword V1_v16i8(uint64_t(0xFFEEDDCCBBAA9900ull),
1038 uint64_t(0xBAADF00DFEEDFACEull));
1039 const Dqword Expected_v16i8(uint64_t(0xBB55AA6699770088ull),
1040 uint64_t(0xFF11EE22DD33CC44ull));
1041
1042 #define TestImplXmmXmm(Dst, Src, Inst, Ty) \
1043 do { \
1044 static constexpr char TestString[] = \
1045 "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \
1046 const uint32_t T0 = allocateDqword(); \
1047 const uint32_t T1 = allocateDqword(); \
1048 \
1049 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1050 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
1051 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \
1052 XmmRegister::Encoded_Reg_##Src); \
1053 \
1054 AssembledTest test = assemble(); \
1055 test.setDqwordTo(T0, V0_##Ty); \
1056 test.setDqwordTo(T1, V1_##Ty); \
1057 test.run(); \
1058 \
1059 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
1060 reset(); \
1061 } while (0)
1062
1063 #define TestImplXmmAddr(Dst, Inst, Ty) \
1064 do { \
1065 static constexpr char TestString[] = \
1066 "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \
1067 const uint32_t T0 = allocateDqword(); \
1068 const uint32_t T1 = allocateDqword(); \
1069 \
1070 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1071 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
1072 \
1073 AssembledTest test = assemble(); \
1074 test.setDqwordTo(T0, V0_##Ty); \
1075 test.setDqwordTo(T1, V1_##Ty); \
1076 test.run(); \
1077 \
1078 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
1079 reset(); \
1080 } while (0)
1081
1082 #define TestImpl(Dst, Src) \
1083 do { \
1084 TestImplXmmXmm(Dst, Src, punpckl, v4i32); \
1085 TestImplXmmAddr(Dst, punpckl, v4i32); \
1086 TestImplXmmXmm(Dst, Src, punpckl, v8i16); \
1087 TestImplXmmAddr(Dst, punpckl, v8i16); \
1088 TestImplXmmXmm(Dst, Src, punpckl, v16i8); \
1089 TestImplXmmAddr(Dst, punpckl, v16i8); \
1090 } while (0)
1091
1092 TestImpl(xmm0, xmm1);
1093 TestImpl(xmm1, xmm2);
1094 TestImpl(xmm2, xmm3);
1095 TestImpl(xmm3, xmm4);
1096 TestImpl(xmm4, xmm5);
1097 TestImpl(xmm5, xmm6);
1098 TestImpl(xmm6, xmm7);
1099 TestImpl(xmm7, xmm0);
1100
1101 #undef TestImpl
1102 #undef TestImplXmmAddr
1103 #undef TestImplXmmXmm
1104 }
1105
TEST_F(AssemblerX8632Test,Packss)1106 TEST_F(AssemblerX8632Test, Packss) {
1107 const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
1108 uint64_t(0x7FFFFFFF80000000ull));
1109 const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
1110 uint64_t(0x0000800100007FFEull));
1111 const Dqword Expected_v4i32(uint64_t(0x7FFF80007FFF1234ull),
1112 uint64_t(0x7FFF7FFEFFFEFFFFull));
1113
1114 const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
1115 uint64_t(0xFFFEFFFF7FFF8000ull));
1116 const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
1117 uint64_t(0x0088007700660055ull));
1118 const Dqword Expected_v8i16(uint64_t(0xFEFF7F8001001234ull),
1119 uint64_t(0x7F776655057F7F7Eull));
1120
1121 #define TestImplXmmXmm(Dst, Src, Inst, Ty) \
1122 do { \
1123 static constexpr char TestString[] = \
1124 "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \
1125 const uint32_t T0 = allocateDqword(); \
1126 const uint32_t T1 = allocateDqword(); \
1127 \
1128 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1129 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
1130 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \
1131 XmmRegister::Encoded_Reg_##Src); \
1132 \
1133 AssembledTest test = assemble(); \
1134 test.setDqwordTo(T0, V0_##Ty); \
1135 test.setDqwordTo(T1, V1_##Ty); \
1136 test.run(); \
1137 \
1138 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
1139 reset(); \
1140 } while (0)
1141
1142 #define TestImplXmmAddr(Dst, Inst, Ty) \
1143 do { \
1144 static constexpr char TestString[] = \
1145 "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \
1146 const uint32_t T0 = allocateDqword(); \
1147 const uint32_t T1 = allocateDqword(); \
1148 \
1149 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1150 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
1151 \
1152 AssembledTest test = assemble(); \
1153 test.setDqwordTo(T0, V0_##Ty); \
1154 test.setDqwordTo(T1, V1_##Ty); \
1155 test.run(); \
1156 \
1157 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
1158 reset(); \
1159 } while (0)
1160
1161 #define TestImpl(Dst, Src) \
1162 do { \
1163 TestImplXmmXmm(Dst, Src, packss, v4i32); \
1164 TestImplXmmAddr(Dst, packss, v4i32); \
1165 TestImplXmmXmm(Dst, Src, packss, v8i16); \
1166 TestImplXmmAddr(Dst, packss, v8i16); \
1167 } while (0)
1168
1169 TestImpl(xmm0, xmm1);
1170 TestImpl(xmm1, xmm2);
1171 TestImpl(xmm2, xmm3);
1172 TestImpl(xmm3, xmm4);
1173 TestImpl(xmm4, xmm5);
1174 TestImpl(xmm5, xmm6);
1175 TestImpl(xmm6, xmm7);
1176 TestImpl(xmm7, xmm0);
1177
1178 #undef TestImpl
1179 #undef TestImplXmmAddr
1180 #undef TestImplXmmXmm
1181 }
1182
TEST_F(AssemblerX8632Test,Packus)1183 TEST_F(AssemblerX8632Test, Packus) {
1184 const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
1185 uint64_t(0x7FFFFFFF80000000ull));
1186 const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
1187 uint64_t(0x0000800100007FFEull));
1188 const Dqword Expected_v4i32(uint64_t(0xFFFF0000FFFF1234ull),
1189 uint64_t(0x80017FFE00000000ull));
1190
1191 const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
1192 uint64_t(0xFFFEFFFF7FFF8000ull));
1193 const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
1194 uint64_t(0x0088007700660055ull));
1195 const Dqword Expected_v8i16(uint64_t(0x0000FF0001001234ull),
1196 uint64_t(0x8877665505FF817Eull));
1197
1198 #define TestImplXmmXmm(Dst, Src, Inst, Ty) \
1199 do { \
1200 static constexpr char TestString[] = \
1201 "(" #Dst ", " #Src ", " #Inst ", " #Ty ")"; \
1202 const uint32_t T0 = allocateDqword(); \
1203 const uint32_t T1 = allocateDqword(); \
1204 \
1205 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1206 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
1207 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, \
1208 XmmRegister::Encoded_Reg_##Src); \
1209 \
1210 AssembledTest test = assemble(); \
1211 test.setDqwordTo(T0, V0_##Ty); \
1212 test.setDqwordTo(T1, V1_##Ty); \
1213 test.run(); \
1214 \
1215 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
1216 reset(); \
1217 } while (0)
1218
1219 #define TestImplXmmAddr(Dst, Inst, Ty) \
1220 do { \
1221 static constexpr char TestString[] = \
1222 "(" #Dst ", Addr, " #Inst ", " #Ty ")"; \
1223 const uint32_t T0 = allocateDqword(); \
1224 const uint32_t T1 = allocateDqword(); \
1225 \
1226 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1227 __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
1228 \
1229 AssembledTest test = assemble(); \
1230 test.setDqwordTo(T0, V0_##Ty); \
1231 test.setDqwordTo(T1, V1_##Ty); \
1232 test.run(); \
1233 \
1234 ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString; \
1235 reset(); \
1236 } while (0)
1237
1238 #define TestImpl(Dst, Src) \
1239 do { \
1240 TestImplXmmXmm(Dst, Src, packus, v4i32); \
1241 TestImplXmmAddr(Dst, packus, v4i32); \
1242 TestImplXmmXmm(Dst, Src, packus, v8i16); \
1243 TestImplXmmAddr(Dst, packus, v8i16); \
1244 } while (0)
1245
1246 TestImpl(xmm0, xmm1);
1247 TestImpl(xmm1, xmm2);
1248 TestImpl(xmm2, xmm3);
1249 TestImpl(xmm3, xmm4);
1250 TestImpl(xmm4, xmm5);
1251 TestImpl(xmm5, xmm6);
1252 TestImpl(xmm6, xmm7);
1253 TestImpl(xmm7, xmm0);
1254
1255 #undef TestImpl
1256 #undef TestImplXmmAddr
1257 #undef TestImplXmmXmm
1258 }
1259
TEST_F(AssemblerX8632Test,Pshufb)1260 TEST_F(AssemblerX8632Test, Pshufb) {
1261 const Dqword V0(uint64_t(0x1122334455667788ull),
1262 uint64_t(0x99aabbccddeeff32ull));
1263 const Dqword V1(uint64_t(0x0204050380060708ull),
1264 uint64_t(0x010306080a8b0c0dull));
1265
1266 const Dqword Expected(uint64_t(0x6644335500221132ull),
1267 uint64_t(0x77552232ee00ccbbull));
1268
1269 #define TestImplXmmXmm(Dst, Src, Inst) \
1270 do { \
1271 static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")"; \
1272 const uint32_t T0 = allocateDqword(); \
1273 const uint32_t T1 = allocateDqword(); \
1274 \
1275 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1276 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
1277 __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, \
1278 XmmRegister::Encoded_Reg_##Src); \
1279 \
1280 AssembledTest test = assemble(); \
1281 test.setDqwordTo(T0, V0); \
1282 test.setDqwordTo(T1, V1); \
1283 test.run(); \
1284 \
1285 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
1286 reset(); \
1287 } while (0)
1288
1289 #define TestImplXmmAddr(Dst, Inst) \
1290 do { \
1291 static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")"; \
1292 const uint32_t T0 = allocateDqword(); \
1293 const uint32_t T1 = allocateDqword(); \
1294 \
1295 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1296 __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
1297 \
1298 AssembledTest test = assemble(); \
1299 test.setDqwordTo(T0, V0); \
1300 test.setDqwordTo(T1, V1); \
1301 test.run(); \
1302 \
1303 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
1304 reset(); \
1305 } while (0)
1306
1307 #define TestImpl(Dst, Src) \
1308 do { \
1309 TestImplXmmXmm(Dst, Src, pshufb); \
1310 TestImplXmmAddr(Dst, pshufb); \
1311 } while (0)
1312
1313 TestImpl(xmm0, xmm1);
1314 TestImpl(xmm1, xmm2);
1315 TestImpl(xmm2, xmm3);
1316 TestImpl(xmm3, xmm4);
1317 TestImpl(xmm4, xmm5);
1318 TestImpl(xmm5, xmm6);
1319 TestImpl(xmm6, xmm7);
1320 TestImpl(xmm7, xmm0);
1321
1322 #undef TestImpl
1323 #undef TestImplXmmAddr
1324 #undef TestImplXmmXmm
1325 }
1326
TEST_F(AssemblerX8632Test,Cvt)1327 TEST_F(AssemblerX8632Test, Cvt) {
1328 const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1329 const Dqword dq2ps32SrcValue(-5, 3, 100, 200);
1330 const Dqword dq2ps32Expected(-5.0f, 3.0f, 100.0, 200.0);
1331
1332 const Dqword dq2ps64DstValue(0.0f, 0.0f, -1.0f, -1.0f);
1333 const Dqword dq2ps64SrcValue(-5, 3, 100, 200);
1334 const Dqword dq2ps64Expected(-5.0f, 3.0f, 100.0, 200.0);
1335
1336 const Dqword tps2dq32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1337 const Dqword tps2dq32SrcValue(-5.0f, 3.0f, 100.0, 200.0);
1338 const Dqword tps2dq32Expected(-5, 3, 100, 200);
1339
1340 const Dqword tps2dq64DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1341 const Dqword tps2dq64SrcValue(-5.0f, 3.0f, 100.0, 200.0);
1342 const Dqword tps2dq64Expected(-5, 3, 100, 200);
1343
1344 const Dqword si2ss32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1345 const int32_t si2ss32SrcValue = 5;
1346 const Dqword si2ss32Expected(5.0f, -1.0f, -1.0f, -1.0f);
1347
1348 const Dqword si2ss64DstValue(-1.0, -1.0);
1349 const int32_t si2ss64SrcValue = 5;
1350 const Dqword si2ss64Expected(5.0, -1.0);
1351
1352 const int32_t tss2si32DstValue = 0xF00F0FF0;
1353 const Dqword tss2si32SrcValue(-5.0f, -1.0f, -1.0f, -1.0f);
1354 const int32_t tss2si32Expected = -5;
1355
1356 const int32_t tss2si64DstValue = 0xF00F0FF0;
1357 const Dqword tss2si64SrcValue(-5.0, -1.0);
1358 const int32_t tss2si64Expected = -5;
1359
1360 const Dqword float2float32DstValue(-1.0, -1.0);
1361 const Dqword float2float32SrcValue(-5.0, 3, 100, 200);
1362 const Dqword float2float32Expected(-5.0, -1.0);
1363
1364 const Dqword float2float64DstValue(-1.0, -1.0, -1.0, -1.0);
1365 const Dqword float2float64SrcValue(-5.0, 3.0);
1366 const Dqword float2float64Expected(-5.0, -1.0, -1.0, -1.0);
1367
1368 #define TestImplPXmmXmm(Dst, Src, Inst, Size) \
1369 do { \
1370 static constexpr char TestString[] = \
1371 "(" #Dst ", " #Src ", cvt" #Inst ", f" #Size ")"; \
1372 const uint32_t T0 = allocateDqword(); \
1373 const uint32_t T1 = allocateDqword(); \
1374 \
1375 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1376 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
1377 __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \
1378 XmmRegister::Encoded_Reg_##Src); \
1379 \
1380 AssembledTest test = assemble(); \
1381 test.setDqwordTo(T0, Inst##Size##DstValue); \
1382 test.setDqwordTo(T1, Inst##Size##SrcValue); \
1383 test.run(); \
1384 \
1385 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \
1386 reset(); \
1387 } while (0)
1388
1389 #define TestImplSXmmReg(Dst, GPR, Inst, Size) \
1390 do { \
1391 static constexpr char TestString[] = \
1392 "(" #Dst ", " #GPR ", cvt" #Inst ", f" #Size ")"; \
1393 const uint32_t T0 = allocateDqword(); \
1394 \
1395 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1396 __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \
1397 Immediate(Inst##Size##SrcValue)); \
1398 __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \
1399 GPRRegister::Encoded_Reg_##GPR); \
1400 \
1401 AssembledTest test = assemble(); \
1402 test.setDqwordTo(T0, Inst##Size##DstValue); \
1403 test.run(); \
1404 \
1405 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \
1406 reset(); \
1407 } while (0)
1408
1409 #define TestImplSRegXmm(GPR, Src, Inst, Size) \
1410 do { \
1411 static constexpr char TestString[] = \
1412 "(" #GPR ", " #Src ", cvt" #Inst ", f" #Size ")"; \
1413 const uint32_t T0 = allocateDqword(); \
1414 \
1415 __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \
1416 Immediate(Inst##Size##DstValue)); \
1417 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \
1418 __ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \
1419 XmmRegister::Encoded_Reg_##Src); \
1420 \
1421 AssembledTest test = assemble(); \
1422 test.setDqwordTo(T0, Inst##Size##SrcValue); \
1423 test.run(); \
1424 \
1425 ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR()) \
1426 << TestString; \
1427 reset(); \
1428 } while (0)
1429
1430 #define TestImplPXmmAddr(Dst, Inst, Size) \
1431 do { \
1432 static constexpr char TestString[] = \
1433 "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")"; \
1434 const uint32_t T0 = allocateDqword(); \
1435 const uint32_t T1 = allocateDqword(); \
1436 \
1437 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1438 __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \
1439 dwordAddress(T1)); \
1440 \
1441 AssembledTest test = assemble(); \
1442 test.setDqwordTo(T0, Inst##Size##DstValue); \
1443 test.setDqwordTo(T1, Inst##Size##SrcValue); \
1444 test.run(); \
1445 \
1446 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \
1447 reset(); \
1448 } while (0)
1449
1450 #define TestImplSXmmAddr(Dst, Inst, Size) \
1451 do { \
1452 static constexpr char TestString[] = \
1453 "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")"; \
1454 const uint32_t T0 = allocateDqword(); \
1455 const uint32_t T1 = allocateDword(); \
1456 \
1457 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1458 __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \
1459 dwordAddress(T1)); \
1460 \
1461 AssembledTest test = assemble(); \
1462 test.setDqwordTo(T0, Inst##Size##DstValue); \
1463 test.setDwordTo(T1, Inst##Size##SrcValue); \
1464 test.run(); \
1465 \
1466 ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString; \
1467 reset(); \
1468 } while (0)
1469
1470 #define TestImplSRegAddr(GPR, Inst, Size) \
1471 do { \
1472 static constexpr char TestString[] = \
1473 "(" #GPR ", Addr, cvt" #Inst ", f" #Size ")"; \
1474 const uint32_t T0 = allocateDqword(); \
1475 \
1476 __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, \
1477 Immediate(Inst##Size##DstValue)); \
1478 __ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \
1479 dwordAddress(T0)); \
1480 \
1481 AssembledTest test = assemble(); \
1482 test.setDqwordTo(T0, Inst##Size##SrcValue); \
1483 test.run(); \
1484 \
1485 ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR()) \
1486 << TestString; \
1487 reset(); \
1488 } while (0)
1489
1490 #define TestImplSize(Dst, Src, GPR, Size) \
1491 do { \
1492 TestImplPXmmXmm(Dst, Src, dq2ps, Size); \
1493 TestImplPXmmAddr(Src, dq2ps, Size); \
1494 TestImplPXmmXmm(Dst, Src, tps2dq, Size); \
1495 TestImplPXmmAddr(Src, tps2dq, Size); \
1496 TestImplSXmmReg(Dst, GPR, si2ss, Size); \
1497 TestImplSXmmAddr(Dst, si2ss, Size); \
1498 TestImplSRegXmm(GPR, Src, tss2si, Size); \
1499 TestImplSRegAddr(GPR, tss2si, Size); \
1500 TestImplPXmmXmm(Dst, Src, float2float, Size); \
1501 TestImplPXmmAddr(Src, float2float, Size); \
1502 } while (0)
1503
1504 #define TestImpl(Dst, Src, GPR) \
1505 do { \
1506 TestImplSize(Dst, Src, GPR, 32); \
1507 TestImplSize(Dst, Src, GPR, 64); \
1508 } while (0)
1509
1510 TestImpl(xmm0, xmm1, eax);
1511 TestImpl(xmm1, xmm2, ebx);
1512 TestImpl(xmm2, xmm3, ecx);
1513 TestImpl(xmm3, xmm4, edx);
1514 TestImpl(xmm4, xmm5, esi);
1515 TestImpl(xmm5, xmm6, edi);
1516 TestImpl(xmm6, xmm7, eax);
1517 TestImpl(xmm7, xmm0, ebx);
1518
1519 #undef TestImpl
1520 #undef TestImplSize
1521 #undef TestImplSRegAddr
1522 #undef TestImplSXmmAddr
1523 #undef TestImplPXmmAddr
1524 #undef TestImplSRegXmm
1525 #undef TestImplSXmmReg
1526 #undef TestImplPXmmXmm
1527 }
1528
TEST_F(AssemblerX8632Test,Ucomiss)1529 TEST_F(AssemblerX8632Test, Ucomiss) {
1530 static constexpr float qnan32 = std::numeric_limits<float>::quiet_NaN();
1531 static constexpr double qnan64 = std::numeric_limits<float>::quiet_NaN();
1532
1533 Dqword test32DstValue(0.0, qnan32, qnan32, qnan32);
1534 Dqword test32SrcValue(0.0, qnan32, qnan32, qnan32);
1535
1536 Dqword test64DstValue(0.0, qnan64);
1537 Dqword test64SrcValue(0.0, qnan64);
1538
1539 #define TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, \
1540 BOther) \
1541 do { \
1542 static constexpr char NearBranch = AssemblerX8632::kNearJump; \
1543 static constexpr char TestString[] = \
1544 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #CompType \
1545 ", " #BParity ", " #BOther ")"; \
1546 const uint32_t T0 = allocateDqword(); \
1547 test##Size##DstValue.F##Size[0] = Value0; \
1548 const uint32_t T1 = allocateDqword(); \
1549 test##Size##SrcValue.F##Size[0] = Value1; \
1550 const uint32_t ImmIfTrue = 0xBEEF; \
1551 const uint32_t ImmIfFalse = 0xC0FFE; \
1552 \
1553 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1554 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
1555 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse)); \
1556 __ ucomiss(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \
1557 XmmRegister::Encoded_Reg_##Src); \
1558 Label Done; \
1559 __ j(Cond::Br_##BParity, &Done, NearBranch); \
1560 __ j(Cond::Br_##BOther, &Done, NearBranch); \
1561 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue)); \
1562 __ bind(&Done); \
1563 \
1564 AssembledTest test = assemble(); \
1565 test.setDqwordTo(T0, test##Size##DstValue); \
1566 test.setDqwordTo(T1, test##Size##SrcValue); \
1567 test.run(); \
1568 \
1569 ASSERT_EQ(ImmIfTrue, test.eax()) << TestString; \
1570 reset(); \
1571 } while (0)
1572
1573 #define TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther) \
1574 do { \
1575 static constexpr char NearBranch = AssemblerX8632::kNearJump; \
1576 static constexpr char TestString[] = \
1577 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #CompType \
1578 ", " #BParity ", " #BOther ")"; \
1579 const uint32_t T0 = allocateDqword(); \
1580 test##Size##DstValue.F##Size[0] = Value0; \
1581 const uint32_t T1 = allocateDqword(); \
1582 test##Size##SrcValue.F##Size[0] = Value1; \
1583 const uint32_t ImmIfTrue = 0xBEEF; \
1584 const uint32_t ImmIfFalse = 0xC0FFE; \
1585 \
1586 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1587 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse)); \
1588 __ ucomiss(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \
1589 dwordAddress(T1)); \
1590 Label Done; \
1591 __ j(Cond::Br_##BParity, &Done, NearBranch); \
1592 __ j(Cond::Br_##BOther, &Done, NearBranch); \
1593 __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue)); \
1594 __ bind(&Done); \
1595 \
1596 AssembledTest test = assemble(); \
1597 test.setDqwordTo(T0, test##Size##DstValue); \
1598 test.setDqwordTo(T1, test##Size##SrcValue); \
1599 test.run(); \
1600 \
1601 ASSERT_EQ(ImmIfTrue, test.eax()) << TestString; \
1602 reset(); \
1603 } while (0)
1604
1605 #define TestImplCond(Dst, Value0, Src, Value1, Size, CompType, BParity, \
1606 BOther) \
1607 do { \
1608 TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, BOther); \
1609 TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther); \
1610 } while (0)
1611
1612 #define TestImplSize(Dst, Src, Size) \
1613 do { \
1614 TestImplCond(Dst, 1.0, Src, 1.0, Size, isEq, p, ne); \
1615 TestImplCond(Dst, 1.0, Src, 2.0, Size, isNe, p, e); \
1616 TestImplCond(Dst, 1.0, Src, 2.0, Size, isLe, p, a); \
1617 TestImplCond(Dst, 1.0, Src, 1.0, Size, isLe, p, a); \
1618 TestImplCond(Dst, 1.0, Src, 2.0, Size, isLt, p, ae); \
1619 TestImplCond(Dst, 2.0, Src, 1.0, Size, isGe, p, b); \
1620 TestImplCond(Dst, 1.0, Src, 1.0, Size, isGe, p, b); \
1621 TestImplCond(Dst, 2.0, Src, 1.0, Size, isGt, p, be); \
1622 TestImplCond(Dst, qnan##Size, Src, 1.0, Size, isUnord, np, o); \
1623 TestImplCond(Dst, 1.0, Src, qnan##Size, Size, isUnord, np, s); \
1624 TestImplCond(Dst, qnan##Size, Src, qnan##Size, Size, isUnord, np, s); \
1625 } while (0)
1626
1627 #define TestImpl(Dst, Src) \
1628 do { \
1629 TestImplSize(Dst, Src, 32); \
1630 TestImplSize(Dst, Src, 64); \
1631 } while (0)
1632
1633 TestImpl(xmm0, xmm1);
1634 TestImpl(xmm1, xmm2);
1635 TestImpl(xmm2, xmm3);
1636 TestImpl(xmm3, xmm4);
1637 TestImpl(xmm4, xmm5);
1638 TestImpl(xmm5, xmm6);
1639 TestImpl(xmm6, xmm7);
1640 TestImpl(xmm7, xmm0);
1641
1642 #undef TestImpl
1643 #undef TestImplSize
1644 #undef TestImplCond
1645 #undef TestImplXmmAddr
1646 #undef TestImplXmmXmm
1647 }
1648
TEST_F(AssemblerX8632Test,Sqrtss)1649 TEST_F(AssemblerX8632Test, Sqrtss) {
1650 Dqword test32SrcValue(-100.0, -100.0, -100.0, -100.0);
1651 Dqword test32DstValue(-1.0, -1.0, -1.0, -1.0);
1652
1653 Dqword test64SrcValue(-100.0, -100.0);
1654 Dqword test64DstValue(-1.0, -1.0);
1655
1656 #define TestSqrtssXmmXmm(Dst, Src, Value1, Result, Size) \
1657 do { \
1658 static constexpr char TestString[] = \
1659 "(" #Dst ", " #Src ", " #Value1 ", " #Result ", " #Size ")"; \
1660 const uint32_t T0 = allocateDqword(); \
1661 test##Size##SrcValue.F##Size[0] = Value1; \
1662 const uint32_t T1 = allocateDqword(); \
1663 \
1664 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \
1665 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
1666 __ sqrt(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \
1667 XmmRegister::Encoded_Reg_##Src); \
1668 \
1669 AssembledTest test = assemble(); \
1670 test.setDqwordTo(T0, test##Size##SrcValue); \
1671 test.setDqwordTo(T1, test##Size##DstValue); \
1672 test.run(); \
1673 \
1674 Dqword Expected = test##Size##DstValue; \
1675 Expected.F##Size[0] = Result; \
1676 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
1677 reset(); \
1678 } while (0)
1679
1680 #define TestSqrtssXmmAddr(Dst, Value1, Result, Size) \
1681 do { \
1682 static constexpr char TestString[] = \
1683 "(" #Dst ", Addr, " #Value1 ", " #Result ", " #Size ")"; \
1684 const uint32_t T0 = allocateDqword(); \
1685 test##Size##SrcValue.F##Size[0] = Value1; \
1686 const uint32_t T1 = allocateDqword(); \
1687 \
1688 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
1689 __ sqrt(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, \
1690 dwordAddress(T0)); \
1691 \
1692 AssembledTest test = assemble(); \
1693 test.setDqwordTo(T0, test##Size##SrcValue); \
1694 test.setDqwordTo(T1, test##Size##DstValue); \
1695 test.run(); \
1696 \
1697 Dqword Expected = test##Size##DstValue; \
1698 Expected.F##Size[0] = Result; \
1699 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
1700 reset(); \
1701 } while (0)
1702
1703 #define TestSqrtssSize(Dst, Src, Size) \
1704 do { \
1705 TestSqrtssXmmXmm(Dst, Src, 4.0, 2.0, Size); \
1706 TestSqrtssXmmAddr(Dst, 4.0, 2.0, Size); \
1707 TestSqrtssXmmXmm(Dst, Src, 9.0, 3.0, Size); \
1708 TestSqrtssXmmAddr(Dst, 9.0, 3.0, Size); \
1709 TestSqrtssXmmXmm(Dst, Src, 100.0, 10.0, Size); \
1710 TestSqrtssXmmAddr(Dst, 100.0, 10.0, Size); \
1711 } while (0)
1712
1713 #define TestSqrtss(Dst, Src) \
1714 do { \
1715 TestSqrtssSize(Dst, Src, 32); \
1716 TestSqrtssSize(Dst, Src, 64); \
1717 } while (0)
1718
1719 TestSqrtss(xmm0, xmm1);
1720 TestSqrtss(xmm1, xmm2);
1721 TestSqrtss(xmm2, xmm3);
1722 TestSqrtss(xmm3, xmm4);
1723 TestSqrtss(xmm4, xmm5);
1724 TestSqrtss(xmm5, xmm6);
1725 TestSqrtss(xmm6, xmm7);
1726 TestSqrtss(xmm7, xmm0);
1727
1728 #undef TestSqrtss
1729 #undef TestSqrtssSize
1730 #undef TestSqrtssXmmAddr
1731 #undef TestSqrtssXmmXmm
1732 }
1733
TEST_F(AssemblerX8632Test,Insertps)1734 TEST_F(AssemblerX8632Test, Insertps) {
1735 #define TestInsertpsXmmXmmImm(Dst, Value0, Src, Value1, Imm, Expected) \
1736 do { \
1737 static constexpr char TestString[] = \
1738 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Imm ", " #Expected \
1739 ")"; \
1740 const uint32_t T0 = allocateDqword(); \
1741 const Dqword V0 Value0; \
1742 const uint32_t T1 = allocateDqword(); \
1743 const Dqword V1 Value1; \
1744 \
1745 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1746 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
1747 __ insertps(IceType_v4f32, XmmRegister::Encoded_Reg_##Dst, \
1748 XmmRegister::Encoded_Reg_##Src, Immediate(Imm)); \
1749 \
1750 AssembledTest test = assemble(); \
1751 test.setDqwordTo(T0, V0); \
1752 test.setDqwordTo(T1, V1); \
1753 test.run(); \
1754 \
1755 ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString; \
1756 reset(); \
1757 } while (0)
1758
1759 #define TestInsertpsXmmAddrImm(Dst, Value0, Value1, Imm, Expected) \
1760 do { \
1761 static constexpr char TestString[] = \
1762 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Expected ")"; \
1763 const uint32_t T0 = allocateDqword(); \
1764 const Dqword V0 Value0; \
1765 const uint32_t T1 = allocateDqword(); \
1766 const Dqword V1 Value1; \
1767 \
1768 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1769 __ insertps(IceType_v4f32, XmmRegister::Encoded_Reg_##Dst, \
1770 dwordAddress(T1), Immediate(Imm)); \
1771 \
1772 AssembledTest test = assemble(); \
1773 test.setDqwordTo(T0, V0); \
1774 test.setDqwordTo(T1, V1); \
1775 test.run(); \
1776 \
1777 ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString; \
1778 reset(); \
1779 } while (0)
1780
1781 #define TestInsertps(Dst, Src) \
1782 do { \
1783 TestInsertpsXmmXmmImm( \
1784 Dst, (uint64_t(-1), uint64_t(-1)), Src, \
1785 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \
1786 0x99, \
1787 (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x00000000FFFFFFFFull))); \
1788 TestInsertpsXmmAddrImm( \
1789 Dst, (uint64_t(-1), uint64_t(-1)), \
1790 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \
1791 0x99, \
1792 (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x00000000FFFFFFFFull))); \
1793 TestInsertpsXmmXmmImm( \
1794 Dst, (uint64_t(-1), uint64_t(-1)), Src, \
1795 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \
1796 0x9D, \
1797 (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x0000000000000000ull))); \
1798 TestInsertpsXmmAddrImm( \
1799 Dst, (uint64_t(-1), uint64_t(-1)), \
1800 (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)), \
1801 0x9D, \
1802 (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x0000000000000000ull))); \
1803 } while (0)
1804
1805 TestInsertps(xmm0, xmm1);
1806 TestInsertps(xmm1, xmm2);
1807 TestInsertps(xmm2, xmm3);
1808 TestInsertps(xmm3, xmm4);
1809 TestInsertps(xmm4, xmm5);
1810 TestInsertps(xmm5, xmm6);
1811 TestInsertps(xmm6, xmm7);
1812 TestInsertps(xmm7, xmm0);
1813
1814 #undef TestInsertps
1815 #undef TestInsertpsXmmXmmAddr
1816 #undef TestInsertpsXmmXmmImm
1817 }
1818
TEST_F(AssemblerX8632Test,Pinsr)1819 TEST_F(AssemblerX8632Test, Pinsr) {
1820 static constexpr uint8_t Mask32 = 0x03;
1821 static constexpr uint8_t Mask16 = 0x07;
1822 static constexpr uint8_t Mask8 = 0x0F;
1823
1824 #define TestPinsrXmmGPRImm(Dst, Value0, GPR, Value1, Imm, Size) \
1825 do { \
1826 static constexpr char TestString[] = \
1827 "(" #Dst ", " #Value0 ", " #GPR ", " #Value1 ", " #Imm ", " #Size ")"; \
1828 const uint32_t T0 = allocateDqword(); \
1829 const Dqword V0 Value0; \
1830 \
1831 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1832 __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, Immediate(Value1)); \
1833 __ pinsr(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \
1834 GPRRegister::Encoded_Reg_##GPR, Immediate(Imm)); \
1835 \
1836 AssembledTest test = assemble(); \
1837 test.setDqwordTo(T0, V0); \
1838 test.run(); \
1839 \
1840 constexpr uint8_t sel = (Imm)&Mask##Size; \
1841 Dqword Expected = V0; \
1842 Expected.U##Size[sel] = Value1; \
1843 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
1844 reset(); \
1845 } while (0)
1846
1847 #define TestPinsrXmmAddrImm(Dst, Value0, Value1, Imm, Size) \
1848 do { \
1849 static constexpr char TestString[] = \
1850 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Size ")"; \
1851 const uint32_t T0 = allocateDqword(); \
1852 const Dqword V0 Value0; \
1853 const uint32_t T1 = allocateDword(); \
1854 const uint32_t V1 = Value1; \
1855 \
1856 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1857 __ pinsr(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \
1858 dwordAddress(T1), Immediate(Imm)); \
1859 \
1860 AssembledTest test = assemble(); \
1861 test.setDqwordTo(T0, V0); \
1862 test.setDwordTo(T1, V1); \
1863 test.run(); \
1864 \
1865 constexpr uint8_t sel = (Imm)&Mask##Size; \
1866 Dqword Expected = V0; \
1867 Expected.U##Size[sel] = Value1; \
1868 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
1869 reset(); \
1870 } while (0)
1871
1872 #define TestPinsrSize(Dst, GPR, Value1, Imm, Size) \
1873 do { \
1874 TestPinsrXmmGPRImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull), \
1875 uint64_t(0xFFFFFFFFDDDDDDDDull)), \
1876 GPR, Value1, Imm, Size); \
1877 TestPinsrXmmAddrImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull), \
1878 uint64_t(0xFFFFFFFFDDDDDDDDull)), \
1879 Value1, Imm, Size); \
1880 } while (0)
1881
1882 #define TestPinsr(Src, Dst) \
1883 do { \
1884 TestPinsrSize(Src, Dst, 0xEE, 0x03, 8); \
1885 TestPinsrSize(Src, Dst, 0xFFEE, 0x03, 16); \
1886 TestPinsrSize(Src, Dst, 0xC0FFEE, 0x03, 32); \
1887 } while (0)
1888
1889 TestPinsr(xmm0, eax);
1890 TestPinsr(xmm1, ebx);
1891 TestPinsr(xmm2, ecx);
1892 TestPinsr(xmm3, edx);
1893 TestPinsr(xmm4, esi);
1894 TestPinsr(xmm5, edi);
1895 TestPinsr(xmm6, eax);
1896 TestPinsr(xmm7, ebx);
1897
1898 #undef TestPinsr
1899 #undef TestPinsrSize
1900 #undef TestPinsrXmmAddrImm
1901 #undef TestPinsrXmmGPRImm
1902 }
1903
TEST_F(AssemblerX8632Test,Pextr)1904 TEST_F(AssemblerX8632Test, Pextr) {
1905 static constexpr uint8_t Mask32 = 0x03;
1906 static constexpr uint8_t Mask16 = 0x07;
1907 static constexpr uint8_t Mask8 = 0x0F;
1908
1909 #define TestPextrGPRXmmImm(GPR, Src, Value1, Imm, Size) \
1910 do { \
1911 static constexpr char TestString[] = \
1912 "(" #GPR ", " #Src ", " #Value1 ", " #Imm ", " #Size ")"; \
1913 const uint32_t T0 = allocateDqword(); \
1914 const Dqword V0 Value1; \
1915 \
1916 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0)); \
1917 __ pextr(IceType_i##Size, GPRRegister::Encoded_Reg_##GPR, \
1918 XmmRegister::Encoded_Reg_##Src, Immediate(Imm)); \
1919 \
1920 AssembledTest test = assemble(); \
1921 test.setDqwordTo(T0, V0); \
1922 test.run(); \
1923 \
1924 constexpr uint8_t sel = (Imm)&Mask##Size; \
1925 ASSERT_EQ(V0.U##Size[sel], test.GPR()) << TestString; \
1926 reset(); \
1927 } while (0)
1928
1929 #define TestPextrSize(GPR, Src, Value1, Imm, Size) \
1930 do { \
1931 TestPextrGPRXmmImm(GPR, Src, (uint64_t(0xAAAAAAAABBBBBBBBull), \
1932 uint64_t(0xFFFFFFFFDDDDDDDDull)), \
1933 Imm, Size); \
1934 } while (0)
1935
1936 #define TestPextr(Src, Dst) \
1937 do { \
1938 TestPextrSize(Src, Dst, 0xEE, 0x03, 8); \
1939 TestPextrSize(Src, Dst, 0xFFEE, 0x03, 16); \
1940 TestPextrSize(Src, Dst, 0xC0FFEE, 0x03, 32); \
1941 } while (0)
1942
1943 TestPextr(eax, xmm0);
1944 TestPextr(ebx, xmm1);
1945 TestPextr(ecx, xmm2);
1946 TestPextr(edx, xmm3);
1947 TestPextr(esi, xmm4);
1948 TestPextr(edi, xmm5);
1949 TestPextr(eax, xmm6);
1950 TestPextr(ebx, xmm7);
1951
1952 #undef TestPextr
1953 #undef TestPextrSize
1954 #undef TestPextrXmmGPRImm
1955 }
1956
TEST_F(AssemblerX8632Test,Pcmpeq_Pcmpgt)1957 TEST_F(AssemblerX8632Test, Pcmpeq_Pcmpgt) {
1958 #define TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, Inst, Op) \
1959 do { \
1960 static constexpr char TestString[] = \
1961 "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #Op ")"; \
1962 const uint32_t T0 = allocateDqword(); \
1963 const Dqword V0 Value0; \
1964 const uint32_t T1 = allocateDqword(); \
1965 const Dqword V1 Value1; \
1966 \
1967 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1968 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
1969 __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \
1970 XmmRegister::Encoded_Reg_##Src); \
1971 \
1972 AssembledTest test = assemble(); \
1973 test.setDqwordTo(T0, V0); \
1974 test.setDqwordTo(T1, V1); \
1975 test.run(); \
1976 \
1977 Dqword Expected(uint64_t(0), uint64_t(0)); \
1978 static constexpr uint8_t ArraySize = \
1979 sizeof(Dqword) / sizeof(uint##Size##_t); \
1980 for (uint8_t i = 0; i < ArraySize; ++i) { \
1981 Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0; \
1982 } \
1983 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
1984 reset(); \
1985 } while (0)
1986
1987 #define TestPcmpXmmAddr(Dst, Value0, Value1, Size, Inst, Op) \
1988 do { \
1989 static constexpr char TestString[] = \
1990 "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #Op ")"; \
1991 const uint32_t T0 = allocateDqword(); \
1992 const Dqword V0 Value0; \
1993 const uint32_t T1 = allocateDqword(); \
1994 const Dqword V1 Value1; \
1995 \
1996 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
1997 __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, \
1998 dwordAddress(T1)); \
1999 \
2000 AssembledTest test = assemble(); \
2001 test.setDqwordTo(T0, V0); \
2002 test.setDqwordTo(T1, V1); \
2003 test.run(); \
2004 \
2005 Dqword Expected(uint64_t(0), uint64_t(0)); \
2006 static constexpr uint8_t ArraySize = \
2007 sizeof(Dqword) / sizeof(uint##Size##_t); \
2008 for (uint8_t i = 0; i < ArraySize; ++i) { \
2009 Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0; \
2010 } \
2011 ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
2012 reset(); \
2013 } while (0)
2014
2015 #define TestPcmpValues(Dst, Value0, Src, Value1, Size) \
2016 do { \
2017 TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpeq, == ); \
2018 TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpeq, == ); \
2019 TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpgt, < ); \
2020 TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpgt, < ); \
2021 } while (0)
2022
2023 #define TestPcmpSize(Dst, Src, Size) \
2024 do { \
2025 TestPcmpValues(Dst, (uint64_t(0x8888888888888888ull), \
2026 uint64_t(0x0000000000000000ull)), \
2027 Src, (uint64_t(0x0000008800008800ull), \
2028 uint64_t(0xFFFFFFFFFFFFFFFFull)), \
2029 Size); \
2030 TestPcmpValues(Dst, (uint64_t(0x123567ABAB55DE01ull), \
2031 uint64_t(0x12345abcde12345Aull)), \
2032 Src, (uint64_t(0x0000008800008800ull), \
2033 uint64_t(0xAABBCCDD1234321Aull)), \
2034 Size); \
2035 } while (0)
2036
2037 #define TestPcmp(Dst, Src) \
2038 do { \
2039 TestPcmpSize(xmm0, xmm1, 8); \
2040 TestPcmpSize(xmm0, xmm1, 16); \
2041 TestPcmpSize(xmm0, xmm1, 32); \
2042 } while (0)
2043
2044 TestPcmp(xmm0, xmm1);
2045 TestPcmp(xmm1, xmm2);
2046 TestPcmp(xmm2, xmm3);
2047 TestPcmp(xmm3, xmm4);
2048 TestPcmp(xmm4, xmm5);
2049 TestPcmp(xmm5, xmm6);
2050 TestPcmp(xmm6, xmm7);
2051 TestPcmp(xmm7, xmm0);
2052
2053 #undef TestPcmp
2054 #undef TestPcmpSize
2055 #undef TestPcmpValues
2056 #undef TestPcmpXmmAddr
2057 #undef TestPcmpXmmXmm
2058 }
2059
TEST_F(AssemblerX8632Test,Roundsd)2060 TEST_F(AssemblerX8632Test, Roundsd) {
2061 #define TestRoundsdXmmXmm(Dst, Src, Mode, Input, RN) \
2062 do { \
2063 static constexpr char TestString[] = \
2064 "(" #Dst ", " #Src ", " #Mode ", " #Input ", " #RN ")"; \
2065 const uint32_t T0 = allocateDqword(); \
2066 const Dqword V0(-3.0, -3.0); \
2067 const uint32_t T1 = allocateDqword(); \
2068 const Dqword V1(double(Input), -123.4); \
2069 \
2070 __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0)); \
2071 __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1)); \
2072 __ round(IceType_f64, XmmRegister::Encoded_Reg_##Dst, \
2073 XmmRegister::Encoded_Reg_##Src, \
2074 Immediate(AssemblerX8632::k##Mode)); \
2075 \
2076 AssembledTest test = assemble(); \
2077 test.setDqwordTo(T0, V0); \
2078 test.setDqwordTo(T1, V1); \
2079 test.run(); \
2080 \
2081 const Dqword Expected(double(RN), -3.0); \
2082 EXPECT_EQ(Expected, test.Dst<Dqword>()) << TestString; \
2083 reset(); \
2084 } while (0)
2085
2086 #define TestRoundsd(Dst, Src) \
2087 do { \
2088 TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.51, 6); \
2089 TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.49, 5); \
2090 TestRoundsdXmmXmm(Dst, Src, RoundDown, 5.51, 5); \
2091 TestRoundsdXmmXmm(Dst, Src, RoundUp, 5.49, 6); \
2092 TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.49, 5); \
2093 TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.51, 5); \
2094 } while (0)
2095
2096 TestRoundsd(xmm0, xmm1);
2097 TestRoundsd(xmm1, xmm2);
2098 TestRoundsd(xmm2, xmm3);
2099 TestRoundsd(xmm3, xmm4);
2100 TestRoundsd(xmm4, xmm5);
2101 TestRoundsd(xmm5, xmm6);
2102 TestRoundsd(xmm6, xmm7);
2103 TestRoundsd(xmm7, xmm0);
2104
2105 #undef TestRoundsd
2106 #undef TestRoundsdXmmXmm
2107 }
2108
TEST_F(AssemblerX8632Test,Set1ps)2109 TEST_F(AssemblerX8632Test, Set1ps) {
2110 #define TestImpl(Xmm, Src, Imm) \
2111 do { \
2112 __ set1ps(XmmRegister::Encoded_Reg_##Xmm, GPRRegister::Encoded_Reg_##Src, \
2113 Immediate(Imm)); \
2114 \
2115 AssembledTest test = assemble(); \
2116 test.run(); \
2117 \
2118 const Dqword Expected((uint64_t(Imm) << 32) | uint32_t(Imm), \
2119 (uint64_t(Imm) << 32) | uint32_t(Imm)); \
2120 ASSERT_EQ(Expected, test.Xmm<Dqword>()) \
2121 << "(" #Xmm ", " #Src ", " #Imm ")"; \
2122 reset(); \
2123 } while (0)
2124
2125 TestImpl(xmm0, ebx, 1);
2126 TestImpl(xmm1, ecx, 2);
2127 TestImpl(xmm2, edx, 3);
2128 TestImpl(xmm3, esi, 4);
2129 TestImpl(xmm4, edi, 5);
2130 TestImpl(xmm5, eax, 6);
2131 TestImpl(xmm6, ebx, 7);
2132 TestImpl(xmm7, ecx, 8);
2133
2134 #undef TestImpl
2135 }
2136
2137 } // end of anonymous namespace
2138 } // end of namespace Test
2139 } // end of namespace X8632
2140 } // end of namespace Ice
2141