• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //===- subzero/unittest/AssemblerX8632/XmmArith.cpp -----------------------===//
2 //
3 //                        The Subzero Code Generator
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "AssemblerX8632/TestUtil.h"
10 
11 namespace Ice {
12 namespace X8632 {
13 namespace Test {
14 namespace {
15 
TEST_F(AssemblerX8632Test,ArithSS)16 TEST_F(AssemblerX8632Test, ArithSS) {
17 #define TestArithSSXmmXmm(FloatSize, Src, Value0, Dst, Value1, Inst, Op)       \
18   do {                                                                         \
19     static_assert(FloatSize == 32 || FloatSize == 64,                          \
20                   "Invalid fp size " #FloatSize);                              \
21     static constexpr char TestString[] =                                       \
22         "(" #FloatSize ", " #Src ", " #Value0 ", " #Dst ", " #Value1           \
23         ", " #Inst ", " #Op ")";                                               \
24     static constexpr bool IsDouble = FloatSize == 64;                          \
25     using Type = std::conditional<IsDouble, double, float>::type;              \
26     const uint32_t T0 = allocateQword();                                       \
27     const Type V0 = Value0;                                                    \
28     const uint32_t T1 = allocateQword();                                       \
29     const Type V1 = Value1;                                                    \
30                                                                                \
31     __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
32              dwordAddress(T0));                                                \
33     __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Src,             \
34              dwordAddress(T1));                                                \
35     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
36             XmmRegister::Encoded_Reg_##Src);                                   \
37                                                                                \
38     AssembledTest test = assemble();                                           \
39     if (IsDouble) {                                                            \
40       test.setQwordTo(T0, static_cast<double>(V0));                            \
41       test.setQwordTo(T1, static_cast<double>(V1));                            \
42     } else {                                                                   \
43       test.setDwordTo(T0, static_cast<float>(V0));                             \
44       test.setDwordTo(T1, static_cast<float>(V1));                             \
45     }                                                                          \
46                                                                                \
47     test.run();                                                                \
48                                                                                \
49     ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString;                \
50     reset();                                                                   \
51   } while (0)
52 
53 #define TestArithSSXmmAddr(FloatSize, Value0, Dst, Value1, Inst, Op)           \
54   do {                                                                         \
55     static_assert(FloatSize == 32 || FloatSize == 64,                          \
56                   "Invalid fp size " #FloatSize);                              \
57     static constexpr char TestString[] =                                       \
58         "(" #FloatSize ", Addr, " #Value0 ", " #Dst ", " #Value1 ", " #Inst    \
59         ", " #Op ")";                                                          \
60     static constexpr bool IsDouble = FloatSize == 64;                          \
61     using Type = std::conditional<IsDouble, double, float>::type;              \
62     const uint32_t T0 = allocateQword();                                       \
63     const Type V0 = Value0;                                                    \
64     const uint32_t T1 = allocateQword();                                       \
65     const Type V1 = Value1;                                                    \
66                                                                                \
67     __ movss(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
68              dwordAddress(T0));                                                \
69     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
70             dwordAddress(T1));                                                 \
71                                                                                \
72     AssembledTest test = assemble();                                           \
73     if (IsDouble) {                                                            \
74       test.setQwordTo(T0, static_cast<double>(V0));                            \
75       test.setQwordTo(T1, static_cast<double>(V1));                            \
76     } else {                                                                   \
77       test.setDwordTo(T0, static_cast<float>(V0));                             \
78       test.setDwordTo(T1, static_cast<float>(V1));                             \
79     }                                                                          \
80                                                                                \
81     test.run();                                                                \
82                                                                                \
83     ASSERT_DOUBLE_EQ(V0 Op V1, test.Dst<Type>()) << TestString;                \
84     reset();                                                                   \
85   } while (0)
86 
87 #define TestArithSS(FloatSize, Src, Dst0, Dst1)                                \
88   do {                                                                         \
89     TestArithSSXmmXmm(FloatSize, Src, 1.0, Dst0, 10.0, addss, +);              \
90     TestArithSSXmmAddr(FloatSize, 2.0, Dst1, 20.0, addss, +);                  \
91     TestArithSSXmmXmm(FloatSize, Src, 3.0, Dst0, 30.0, subss, -);              \
92     TestArithSSXmmAddr(FloatSize, 4.0, Dst1, 40.0, subss, -);                  \
93     TestArithSSXmmXmm(FloatSize, Src, 5.0, Dst0, 50.0, mulss, *);              \
94     TestArithSSXmmAddr(FloatSize, 6.0, Dst1, 60.0, mulss, *);                  \
95     TestArithSSXmmXmm(FloatSize, Src, 7.0, Dst0, 70.0, divss, / );             \
96     TestArithSSXmmAddr(FloatSize, 8.0, Dst1, 80.0, divss, / );                 \
97   } while (0)
98 
99   TestArithSS(32, xmm0, xmm1, xmm2);
100   TestArithSS(32, xmm1, xmm2, xmm3);
101   TestArithSS(32, xmm2, xmm3, xmm4);
102   TestArithSS(32, xmm3, xmm4, xmm5);
103   TestArithSS(32, xmm4, xmm5, xmm6);
104   TestArithSS(32, xmm5, xmm6, xmm7);
105   TestArithSS(32, xmm6, xmm7, xmm0);
106   TestArithSS(32, xmm7, xmm0, xmm1);
107 
108   TestArithSS(64, xmm0, xmm1, xmm2);
109   TestArithSS(64, xmm1, xmm2, xmm3);
110   TestArithSS(64, xmm2, xmm3, xmm4);
111   TestArithSS(64, xmm3, xmm4, xmm5);
112   TestArithSS(64, xmm4, xmm5, xmm6);
113   TestArithSS(64, xmm5, xmm6, xmm7);
114   TestArithSS(64, xmm6, xmm7, xmm0);
115   TestArithSS(64, xmm7, xmm0, xmm1);
116 
117 #undef TestArithSS
118 #undef TestArithSSXmmAddr
119 #undef TestArithSSXmmXmm
120 }
121 
TEST_F(AssemblerX8632Test,PArith)122 TEST_F(AssemblerX8632Test, PArith) {
123 #define TestPArithXmmXmm(Dst, Value0, Src, Value1, Inst, Op, Type, Size)       \
124   do {                                                                         \
125     static constexpr char TestString[] =                                       \
126         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
127         ", " #Type ", " #Size ")";                                             \
128     const uint32_t T0 = allocateDqword();                                      \
129     const Dqword V0 Value0;                                                    \
130                                                                                \
131     const uint32_t T1 = allocateDqword();                                      \
132     const Dqword V1 Value1;                                                    \
133                                                                                \
134     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
135     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
136     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                   \
137             XmmRegister::Encoded_Reg_##Src);                                   \
138                                                                                \
139     AssembledTest test = assemble();                                           \
140     test.setDqwordTo(T0, V0);                                                  \
141     test.setDqwordTo(T1, V1);                                                  \
142     test.run();                                                                \
143                                                                                \
144     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>())          \
145         << TestString;                                                         \
146     reset();                                                                   \
147   } while (0)
148 
149 #define TestPArithXmmAddr(Dst, Value0, Value1, Inst, Op, Type, Size)           \
150   do {                                                                         \
151     static constexpr char TestString[] =                                       \
152         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
153         ", " #Type ", " #Size ")";                                             \
154     const uint32_t T0 = allocateDqword();                                      \
155     const Dqword V0 Value0;                                                    \
156                                                                                \
157     const uint32_t T1 = allocateDqword();                                      \
158     const Dqword V1 Value1;                                                    \
159                                                                                \
160     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
161     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                   \
162             dwordAddress(T1));                                                 \
163                                                                                \
164     AssembledTest test = assemble();                                           \
165     test.setDqwordTo(T0, V0);                                                  \
166     test.setDqwordTo(T1, V1);                                                  \
167     test.run();                                                                \
168                                                                                \
169     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op V1, test.Dst<Dqword>())          \
170         << TestString;                                                         \
171     reset();                                                                   \
172   } while (0)
173 
174 #define TestPArithXmmImm(Dst, Value0, Imm, Inst, Op, Type, Size)               \
175   do {                                                                         \
176     static constexpr char TestString[] =                                       \
177         "(" #Dst ", " #Value0 ", " #Imm ", " #Inst ", " #Op ", " #Type         \
178         ", " #Size ")";                                                        \
179     const uint32_t T0 = allocateDqword();                                      \
180     const Dqword V0 Value0;                                                    \
181                                                                                \
182     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
183     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst, Immediate(Imm));  \
184                                                                                \
185     AssembledTest test = assemble();                                           \
186     test.setDqwordTo(T0, V0);                                                  \
187     test.run();                                                                \
188                                                                                \
189     ASSERT_EQ(packedAs<Type##Size##_t>(V0) Op Imm, test.Dst<Dqword>())         \
190         << TestString;                                                         \
191     reset();                                                                   \
192   } while (0)
193 
194 #define TestPAndnXmmXmm(Dst, Value0, Src, Value1, Type, Size)                  \
195   do {                                                                         \
196     static constexpr char TestString[] =                                       \
197         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", pandn, " #Type         \
198         ", " #Size ")";                                                        \
199     const uint32_t T0 = allocateDqword();                                      \
200     const Dqword V0 Value0;                                                    \
201                                                                                \
202     const uint32_t T1 = allocateDqword();                                      \
203     const Dqword V1 Value1;                                                    \
204                                                                                \
205     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
206     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
207     __ pandn(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                  \
208              XmmRegister::Encoded_Reg_##Src);                                  \
209                                                                                \
210     AssembledTest test = assemble();                                           \
211     test.setDqwordTo(T0, V0);                                                  \
212     test.setDqwordTo(T1, V1);                                                  \
213     test.run();                                                                \
214                                                                                \
215     ASSERT_EQ(~(packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>())        \
216         << TestString;                                                         \
217     reset();                                                                   \
218   } while (0)
219 
220 #define TestPAndnXmmAddr(Dst, Value0, Value1, Type, Size)                      \
221   do {                                                                         \
222     static constexpr char TestString[] =                                       \
223         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", pandn, " #Type ", " #Size  \
224         ")";                                                                   \
225     const uint32_t T0 = allocateDqword();                                      \
226     const Dqword V0 Value0;                                                    \
227                                                                                \
228     const uint32_t T1 = allocateDqword();                                      \
229     const Dqword V1 Value1;                                                    \
230                                                                                \
231     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
232     __ pandn(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                  \
233              dwordAddress(T1));                                                \
234                                                                                \
235     AssembledTest test = assemble();                                           \
236     test.setDqwordTo(T0, V0);                                                  \
237     test.setDqwordTo(T1, V1);                                                  \
238     test.run();                                                                \
239                                                                                \
240     ASSERT_EQ((~packedAs<Type##Size##_t>(V0)) & V1, test.Dst<Dqword>())        \
241         << TestString;                                                         \
242     reset();                                                                   \
243   } while (0)
244 
245 #define TestPArithSize(Dst, Src, Size)                                         \
246   do {                                                                         \
247     static_assert(Size == 8 || Size == 16 || Size == 32, "Invalid size.");     \
248     if (Size != 8) {                                                           \
249       TestPArithXmmXmm(                                                        \
250           Dst,                                                                 \
251           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
252           Src, (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size);             \
253       TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull),                 \
254                               uint64_t(0x8080404002020101ull)),                \
255                         (uint64_t(3u), uint64_t(0u)), psra, >>, int, Size);    \
256       TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull),                  \
257                              uint64_t(0x8080404002020101ull)),                 \
258                        3u, psra, >>, int, Size);                               \
259       TestPArithXmmXmm(                                                        \
260           Dst,                                                                 \
261           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
262           Src, (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size);            \
263       TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull),                 \
264                               uint64_t(0x8080404002020101ull)),                \
265                         (uint64_t(3u), uint64_t(0u)), psrl, >>, uint, Size);   \
266       TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull),                  \
267                              uint64_t(0x8080404002020101ull)),                 \
268                        3u, psrl, >>, uint, Size);                              \
269       TestPArithXmmXmm(                                                        \
270           Dst,                                                                 \
271           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
272           Src, (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size);            \
273       TestPArithXmmAddr(Dst, (uint64_t(0x8040201008040201ull),                 \
274                               uint64_t(0x8080404002020101ull)),                \
275                         (uint64_t(3u), uint64_t(0u)), psll, <<, uint, Size);   \
276       TestPArithXmmImm(Dst, (uint64_t(0x8040201008040201ull),                  \
277                              uint64_t(0x8080404002020101ull)),                 \
278                        3u, psll, <<, uint, Size);                              \
279                                                                                \
280       TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                  \
281                              uint64_t(0x8080404002020101ull)),                 \
282                        Src, (uint64_t(0xFFFFFFFF00000000ull),                  \
283                              uint64_t(0x0123456789ABCDEull)),                  \
284                        pmull, *, int, Size);                                   \
285       TestPArithXmmAddr(                                                       \
286           Dst,                                                                 \
287           (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),  \
288           (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),   \
289           pmull, *, int, Size);                                                \
290       if (Size != 16) {                                                        \
291         TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                \
292                                uint64_t(0x8080404002020101ull)),               \
293                          Src, (uint64_t(0xFFFFFFFF00000000ull),                \
294                                uint64_t(0x0123456789ABCDEull)),                \
295                          pmuludq, *, uint, Size);                              \
296         TestPArithXmmAddr(                                                     \
297             Dst, (uint64_t(0x8040201008040201ull),                             \
298                   uint64_t(0x8080404002020101ull)),                            \
299             (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)), \
300             pmuludq, *, uint, Size);                                           \
301       }                                                                        \
302     }                                                                          \
303     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
304                            uint64_t(0x8080404002020101ull)),                   \
305                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
306                            uint64_t(0x0123456789ABCDEull)),                    \
307                      padd, +, int, Size);                                      \
308     TestPArithXmmAddr(                                                         \
309         Dst,                                                                   \
310         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
311         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
312         padd, +, int, Size);                                                   \
313     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
314                            uint64_t(0x8080404002020101ull)),                   \
315                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
316                            uint64_t(0x0123456789ABCDEull)),                    \
317                      psub, -, int, Size);                                      \
318     TestPArithXmmAddr(                                                         \
319         Dst,                                                                   \
320         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
321         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
322         psub, -, int, Size);                                                   \
323     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
324                            uint64_t(0x8080404002020101ull)),                   \
325                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
326                            uint64_t(0x0123456789ABCDEull)),                    \
327                      pand, &, int, Size);                                      \
328     TestPArithXmmAddr(                                                         \
329         Dst,                                                                   \
330         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
331         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
332         pand, &, int, Size);                                                   \
333                                                                                \
334     TestPAndnXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                     \
335                           uint64_t(0x8080404002020101ull)),                    \
336                     Src, (uint64_t(0xFFFFFFFF00000000ull),                     \
337                           uint64_t(0x0123456789ABCDEull)),                     \
338                     int, Size);                                                \
339     TestPAndnXmmAddr(                                                          \
340         Dst,                                                                   \
341         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
342         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
343         int, Size);                                                            \
344                                                                                \
345     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
346                            uint64_t(0x8080404002020101ull)),                   \
347                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
348                            uint64_t(0x0123456789ABCDEull)),                    \
349                      por, |, int, Size);                                       \
350     TestPArithXmmAddr(                                                         \
351         Dst,                                                                   \
352         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
353         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
354         por, |, int, Size);                                                    \
355     TestPArithXmmXmm(Dst, (uint64_t(0x8040201008040201ull),                    \
356                            uint64_t(0x8080404002020101ull)),                   \
357                      Src, (uint64_t(0xFFFFFFFF00000000ull),                    \
358                            uint64_t(0x0123456789ABCDEull)),                    \
359                      pxor, ^, int, Size);                                      \
360     TestPArithXmmAddr(                                                         \
361         Dst,                                                                   \
362         (uint64_t(0x8040201008040201ull), uint64_t(0x8080404002020101ull)),    \
363         (uint64_t(0xFFFFFFFF00000000ull), uint64_t(0x0123456789ABCDEull)),     \
364         pxor, ^, int, Size);                                                   \
365   } while (0)
366 
367 #define TestPArith(Src, Dst)                                                   \
368   do {                                                                         \
369     TestPArithSize(Src, Dst, 8);                                               \
370     TestPArithSize(Src, Dst, 16);                                              \
371     TestPArithSize(Src, Dst, 32);                                              \
372   } while (0)
373 
374   TestPArith(xmm0, xmm1);
375   TestPArith(xmm1, xmm2);
376   TestPArith(xmm2, xmm3);
377   TestPArith(xmm3, xmm4);
378   TestPArith(xmm4, xmm5);
379   TestPArith(xmm5, xmm6);
380   TestPArith(xmm6, xmm7);
381   TestPArith(xmm7, xmm0);
382 
383 #undef TestPArith
384 #undef TestPArithSize
385 #undef TestPAndnXmmAddr
386 #undef TestPAndnXmmXmm
387 #undef TestPArithXmmImm
388 #undef TestPArithXmmAddr
389 #undef TestPArithXmmXmm
390 }
391 
TEST_F(AssemblerX8632Test,ArithPS)392 TEST_F(AssemblerX8632Test, ArithPS) {
393 #define TestArithPSXmmXmm(FloatSize, Dst, Value0, Src, Value1, Inst, Op, Type) \
394   do {                                                                         \
395     static constexpr char TestString[] =                                       \
396         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
397         ", " #Type ")";                                                        \
398     const uint32_t T0 = allocateDqword();                                      \
399     const Dqword V0 Value0;                                                    \
400     const uint32_t T1 = allocateDqword();                                      \
401     const Dqword V1 Value1;                                                    \
402                                                                                \
403     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
404     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
405     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
406             XmmRegister::Encoded_Reg_##Src);                                   \
407                                                                                \
408     AssembledTest test = assemble();                                           \
409     test.setDqwordTo(T0, V0);                                                  \
410     test.setDqwordTo(T1, V1);                                                  \
411     test.run();                                                                \
412                                                                                \
413     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
414                                                                                \
415     reset();                                                                   \
416   } while (0)
417 
418 #define TestArithPSXmmXmmUntyped(Dst, Value0, Src, Value1, Inst, Op, Type)     \
419   do {                                                                         \
420     static constexpr char TestString[] =                                       \
421         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Op       \
422         ", " #Type ")";                                                        \
423     const uint32_t T0 = allocateDqword();                                      \
424     const Dqword V0 Value0;                                                    \
425     const uint32_t T1 = allocateDqword();                                      \
426     const Dqword V1 Value1;                                                    \
427                                                                                \
428     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
429     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
430     __ Inst(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src);   \
431                                                                                \
432     AssembledTest test = assemble();                                           \
433     test.setDqwordTo(T0, V0);                                                  \
434     test.setDqwordTo(T1, V1);                                                  \
435     test.run();                                                                \
436                                                                                \
437     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
438                                                                                \
439     reset();                                                                   \
440   } while (0)
441 
442 #define TestArithPSXmmAddrUntyped(Dst, Value0, Value1, Inst, Op, Type)         \
443   do {                                                                         \
444     static constexpr char TestString[] =                                       \
445         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
446         ", " #Type ")";                                                        \
447     const uint32_t T0 = allocateDqword();                                      \
448     const Dqword V0 Value0;                                                    \
449     const uint32_t T1 = allocateDqword();                                      \
450     const Dqword V1 Value1;                                                    \
451                                                                                \
452     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
453     __ Inst(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));                 \
454                                                                                \
455     AssembledTest test = assemble();                                           \
456     test.setDqwordTo(T0, V0);                                                  \
457     test.setDqwordTo(T1, V1);                                                  \
458     test.run();                                                                \
459                                                                                \
460     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
461                                                                                \
462     reset();                                                                   \
463   } while (0)
464 
465 #define TestMinMaxPS(FloatSize, Dst, Value0, Src, Value1, Inst, Type)          \
466   do {                                                                         \
467     static constexpr char TestString[] =                                       \
468         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Inst ", " #Type     \
469         ")";                                                                   \
470     const uint32_t T0 = allocateDqword();                                      \
471     const Dqword V0 Value0;                                                    \
472     const uint32_t T1 = allocateDqword();                                      \
473     const Dqword V1 Value1;                                                    \
474                                                                                \
475     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
476     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
477     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
478             XmmRegister::Encoded_Reg_##Src);                                   \
479                                                                                \
480     AssembledTest test = assemble();                                           \
481     test.setDqwordTo(T0, V0);                                                  \
482     test.setDqwordTo(T1, V1);                                                  \
483     test.run();                                                                \
484                                                                                \
485     ASSERT_EQ(packedAs<Type>(V0).Inst(V1), test.Dst<Dqword>()) << TestString;  \
486                                                                                \
487     reset();                                                                   \
488   } while (0)
489 
490 #define TestArithPSXmmAddr(FloatSize, Dst, Value0, Value1, Inst, Op, Type)     \
491   do {                                                                         \
492     static constexpr char TestString[] =                                       \
493         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Inst ", " #Op           \
494         ", " #Type ")";                                                        \
495     const uint32_t T0 = allocateDqword();                                      \
496     const Dqword V0 Value0;                                                    \
497     const uint32_t T1 = allocateDqword();                                      \
498     const Dqword V1 Value1;                                                    \
499                                                                                \
500     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
501     __ Inst(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,              \
502             dwordAddress(T1));                                                 \
503                                                                                \
504     AssembledTest test = assemble();                                           \
505     test.setDqwordTo(T0, V0);                                                  \
506     test.setDqwordTo(T1, V1);                                                  \
507     test.run();                                                                \
508                                                                                \
509     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
510                                                                                \
511     reset();                                                                   \
512   } while (0)
513 
514 #define TestArithPS(Dst, Src)                                                  \
515   do {                                                                         \
516     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
517                       (0.55, 0.43, 0.23, 1.21), addps, +, float);              \
518     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
519                        (0.55, 0.43, 0.23, 1.21), addps, +, float);             \
520     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
521                       (0.55, 0.43, 0.23, 1.21), subps, -, float);              \
522     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
523                        (0.55, 0.43, 0.23, 1.21), subps, -, float);             \
524     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
525                       (0.55, 0.43, 0.23, 1.21), mulps, *, float);              \
526     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
527                        (0.55, 0.43, 0.23, 1.21), mulps, *, float);             \
528     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
529                       (0.55, 0.43, 0.23, 1.21), divps, /, float);              \
530     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
531                        (0.55, 0.43, 0.23, 1.21), divps, /, float);             \
532     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
533                       (0.55, 0.43, 0.23, 1.21), andps, &, float);              \
534     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
535                        (0.55, 0.43, 0.23, 1.21), andps, &, float);             \
536     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), andps, &,    \
537                       double);                                                 \
538     TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), andps, &,        \
539                        double);                                                \
540     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
541                       (0.55, 0.43, 0.23, 1.21), orps, |, float);               \
542     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), orps, |,     \
543                       double);                                                 \
544     TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,                    \
545                  (0.55, 0.43, 0.23, 1.21), minps, float);                      \
546     TestMinMaxPS(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,                    \
547                  (0.55, 0.43, 0.23, 1.21), maxps, float);                      \
548     TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), minps, double);   \
549     TestMinMaxPS(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), maxps, double);   \
550     TestArithPSXmmXmm(32, Dst, (1.0, 100.0, -1000.0, 20.0), Src,               \
551                       (0.55, 0.43, 0.23, 1.21), xorps, ^, float);              \
552     TestArithPSXmmAddr(32, Dst, (1.0, 100.0, -1000.0, 20.0),                   \
553                        (0.55, 0.43, 0.23, 1.21), xorps, ^, float);             \
554     TestArithPSXmmXmm(64, Dst, (1.0, -1000.0), Src, (0.55, 1.21), xorps, ^,    \
555                       double);                                                 \
556     TestArithPSXmmAddr(64, Dst, (1.0, -1000.0), (0.55, 1.21), xorps, ^,        \
557                        double);                                                \
558   } while (0)
559 
560 #if 0
561 
562 #endif
563 
564   TestArithPS(xmm0, xmm1);
565   TestArithPS(xmm1, xmm2);
566   TestArithPS(xmm2, xmm3);
567   TestArithPS(xmm3, xmm4);
568   TestArithPS(xmm4, xmm5);
569   TestArithPS(xmm5, xmm6);
570   TestArithPS(xmm6, xmm7);
571   TestArithPS(xmm7, xmm0);
572 
573 #undef TestArithPs
574 #undef TestMinMaxPS
575 #undef TestArithPSXmmXmmUntyped
576 #undef TestArithPSXmmAddr
577 #undef TestArithPSXmmXmm
578 }
579 
TEST_F(AssemblerX8632Test,Blending)580 TEST_F(AssemblerX8632Test, Blending) {
581   using f32 = float;
582   using i8 = uint8_t;
583 
584 #define TestBlendingXmmXmm(Dst, Value0, Src, Value1, M /*ask*/, Inst, Type)    \
585   do {                                                                         \
586     static constexpr char TestString[] =                                       \
587         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #M ", " #Inst        \
588         ", " #Type ")";                                                        \
589     const uint32_t T0 = allocateDqword();                                      \
590     const Dqword V0 Value0;                                                    \
591     const uint32_t T1 = allocateDqword();                                      \
592     const Dqword V1 Value1;                                                    \
593     const uint32_t Mask = allocateDqword();                                    \
594     const Dqword MaskValue M;                                                  \
595                                                                                \
596     __ movups(XmmRegister::Encoded_Reg_xmm0, dwordAddress(Mask));              \
597     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
598     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
599     __ Inst(IceType_##Type, XmmRegister::Encoded_Reg_##Dst,                    \
600             XmmRegister::Encoded_Reg_##Src);                                   \
601                                                                                \
602     AssembledTest test = assemble();                                           \
603     test.setDqwordTo(T0, V0);                                                  \
604     test.setDqwordTo(T1, V1);                                                  \
605     test.setDqwordTo(Mask, MaskValue);                                         \
606     test.run();                                                                \
607                                                                                \
608     ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
609         << TestString;                                                         \
610     reset();                                                                   \
611   } while (0)
612 
613 #define TestBlendingXmmAddr(Dst, Value0, Value1, M /*ask*/, Inst, Type)        \
614   do {                                                                         \
615     static constexpr char TestString[] =                                       \
616         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #M ", " #Inst ", " #Type \
617         ")";                                                                   \
618     const uint32_t T0 = allocateDqword();                                      \
619     const Dqword V0 Value0;                                                    \
620     const uint32_t T1 = allocateDqword();                                      \
621     const Dqword V1 Value1;                                                    \
622     const uint32_t Mask = allocateDqword();                                    \
623     const Dqword MaskValue M;                                                  \
624                                                                                \
625     __ movups(XmmRegister::Encoded_Reg_xmm0, dwordAddress(Mask));              \
626     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
627     __ Inst(IceType_##Type, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1)); \
628                                                                                \
629     AssembledTest test = assemble();                                           \
630     test.setDqwordTo(T0, V0);                                                  \
631     test.setDqwordTo(T1, V1);                                                  \
632     test.setDqwordTo(Mask, MaskValue);                                         \
633     test.run();                                                                \
634                                                                                \
635     ASSERT_EQ(packedAs<Type>(V0).blendWith(V1, MaskValue), test.Dst<Dqword>()) \
636         << TestString;                                                         \
637     reset();                                                                   \
638   } while (0)
639 
640 #define TestBlending(Src, Dst)                                                 \
641   do {                                                                         \
642     TestBlendingXmmXmm(                                                        \
643         Dst, (1.0, 2.0, 1.0, 2.0), Src, (-1.0, -2.0, -1.0, -2.0),              \
644         (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)),    \
645         blendvps, f32);                                                        \
646     TestBlendingXmmAddr(                                                       \
647         Dst, (1.0, 2.0, 1.0, 2.0), (-1.0, -2.0, -1.0, -2.0),                   \
648         (uint64_t(0x8000000000000000ull), uint64_t(0x0000000080000000ull)),    \
649         blendvps, f32);                                                        \
650     TestBlendingXmmXmm(                                                        \
651         Dst,                                                                   \
652         (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)),    \
653         Src,                                                                   \
654         (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)),    \
655         (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)),    \
656         pblendvb, i8);                                                         \
657     TestBlendingXmmAddr(                                                       \
658         Dst,                                                                   \
659         (uint64_t(0xFFFFFFFFFFFFFFFFull), uint64_t(0xBBBBBBBBBBBBBBBBull)),    \
660         (uint64_t(0xAAAAAAAAAAAAAAAAull), uint64_t(0xEEEEEEEEEEEEEEEEull)),    \
661         (uint64_t(0x8000000000000080ull), uint64_t(0x8080808000000000ull)),    \
662         pblendvb, i8);                                                         \
663   } while (0)
664 
665   /* xmm0 is taken. It is the implicit mask . */
666   TestBlending(xmm1, xmm2);
667   TestBlending(xmm2, xmm3);
668   TestBlending(xmm3, xmm4);
669   TestBlending(xmm4, xmm5);
670   TestBlending(xmm5, xmm6);
671   TestBlending(xmm6, xmm7);
672   TestBlending(xmm7, xmm1);
673 
674 #undef TestBlending
675 #undef TestBlendingXmmAddr
676 #undef TestBlendingXmmXmm
677 }
678 
TEST_F(AssemblerX8632Test,Cmpps)679 TEST_F(AssemblerX8632Test, Cmpps) {
680 #define TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Op, Type)      \
681   do {                                                                         \
682     static constexpr char TestString[] =                                       \
683         "(" #Src ", " #Dst ", " #C ", " #Op ")";                               \
684     const uint32_t T0 = allocateDqword();                                      \
685     const Dqword V0 Value0;                                                    \
686     const uint32_t T1 = allocateDqword();                                      \
687     const Dqword V1 Value1;                                                    \
688                                                                                \
689     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
690     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
691     __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
692              XmmRegister::Encoded_Reg_##Src, Cond::Cmpps_##C);                 \
693                                                                                \
694     AssembledTest test = assemble();                                           \
695     test.setDqwordTo(T0, V0);                                                  \
696     test.setDqwordTo(T1, V1);                                                  \
697     test.run();                                                                \
698                                                                                \
699     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
700     ;                                                                          \
701     reset();                                                                   \
702   } while (0)
703 
704 #define TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, C, Op, Type)          \
705   do {                                                                         \
706     static constexpr char TestString[] = "(" #Dst ", Addr, " #C ", " #Op ")";  \
707     const uint32_t T0 = allocateDqword();                                      \
708     const Dqword V0 Value0;                                                    \
709     const uint32_t T1 = allocateDqword();                                      \
710     const Dqword V1 Value1;                                                    \
711                                                                                \
712     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
713     __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
714              dwordAddress(T1), Cond::Cmpps_##C);                               \
715                                                                                \
716     AssembledTest test = assemble();                                           \
717     test.setDqwordTo(T0, V0);                                                  \
718     test.setDqwordTo(T1, V1);                                                  \
719     test.run();                                                                \
720                                                                                \
721     ASSERT_EQ(packedAs<Type>(V0) Op V1, test.Dst<Dqword>()) << TestString;     \
722     ;                                                                          \
723     reset();                                                                   \
724   } while (0)
725 
726 #define TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, C, Type)  \
727   do {                                                                         \
728     static constexpr char TestString[] = "(" #Src ", " #Dst ", " #C ")";       \
729     const uint32_t T0 = allocateDqword();                                      \
730     const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
731                     std::numeric_limits<float>::quiet_NaN());                  \
732     const uint32_t T1 = allocateDqword();                                      \
733     const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,         \
734                     std::numeric_limits<float>::quiet_NaN());                  \
735                                                                                \
736     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
737     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
738     __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
739              XmmRegister::Encoded_Reg_##Src, Cond::Cmpps_##C);                 \
740                                                                                \
741     AssembledTest test = assemble();                                           \
742     test.setDqwordTo(T0, V0);                                                  \
743     test.setDqwordTo(T1, V1);                                                  \
744     test.run();                                                                \
745                                                                                \
746     ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString;     \
747     ;                                                                          \
748     reset();                                                                   \
749   } while (0)
750 
751 #define TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, C, Type)      \
752   do {                                                                         \
753     static constexpr char TestString[] = "(" #Dst ", " #C ")";                 \
754     const uint32_t T0 = allocateDqword();                                      \
755     const Dqword V0(1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
756                     std::numeric_limits<float>::quiet_NaN());                  \
757     const uint32_t T1 = allocateDqword();                                      \
758     const Dqword V1(1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,         \
759                     std::numeric_limits<float>::quiet_NaN());                  \
760                                                                                \
761     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
762     __ cmpps(IceType_f##FloatSize, XmmRegister::Encoded_Reg_##Dst,             \
763              dwordAddress(T1), Cond::Cmpps_##C);                               \
764                                                                                \
765     AssembledTest test = assemble();                                           \
766     test.setDqwordTo(T0, V0);                                                  \
767     test.setDqwordTo(T1, V1);                                                  \
768     test.run();                                                                \
769                                                                                \
770     ASSERT_EQ(packedAs<Type>(V0).C(V1), test.Dst<Dqword>()) << TestString;     \
771     ;                                                                          \
772     reset();                                                                   \
773   } while (0)
774 
775 #define TestCmpps(FloatSize, Dst, Value0, Src, Value1, Type)                   \
776   do {                                                                         \
777     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
778     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
779     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
780     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
781     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
782     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
783     TestCmppsOrdUnordXmmXmm(FloatSize, Dst, Value0, Src, Value1, unord, Type); \
784     TestCmppsOrdUnordXmmAddr(FloatSize, Dst, Value0, Value1, unord, Type);     \
785     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
786     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
787     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
788     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
789     TestCmppsXmmXmm(FloatSize, Dst, Value0, Src, Value1, eq, ==, Type);        \
790     TestCmppsXmmAddr(FloatSize, Dst, Value0, Value1, eq, ==, Type);            \
791     if (FloatSize == 32) {                                                     \
792       TestCmppsOrdUnordXmmXmm(                                                 \
793           32, Dst, (1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
794                     std::numeric_limits<float>::quiet_NaN()),                  \
795           Src, (1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,             \
796                 std::numeric_limits<float>::quiet_NaN()),                      \
797           unord, Type);                                                        \
798       TestCmppsOrdUnordXmmAddr(                                                \
799           32, Dst, (1.0, 1.0, std::numeric_limits<float>::quiet_NaN(),         \
800                     std::numeric_limits<float>::quiet_NaN()),                  \
801           (1.0, std::numeric_limits<float>::quiet_NaN(), 1.0,                  \
802            std::numeric_limits<float>::quiet_NaN()),                           \
803           unord, Type);                                                        \
804     } else {                                                                   \
805       TestCmppsOrdUnordXmmXmm(64, Dst,                                         \
806                               (1.0, std::numeric_limits<double>::quiet_NaN()), \
807                               Src, (std::numeric_limits<double>::quiet_NaN(),  \
808                                     std::numeric_limits<double>::quiet_NaN()), \
809                               unord, Type);                                    \
810       TestCmppsOrdUnordXmmXmm(64, Dst, (1.0, 1.0), Src,                        \
811                               (1.0, std::numeric_limits<double>::quiet_NaN()), \
812                               unord, Type);                                    \
813       TestCmppsOrdUnordXmmAddr(                                                \
814           64, Dst, (1.0, std::numeric_limits<double>::quiet_NaN()),            \
815           (std::numeric_limits<double>::quiet_NaN(),                           \
816            std::numeric_limits<double>::quiet_NaN()),                          \
817           unord, Type);                                                        \
818       TestCmppsOrdUnordXmmAddr(                                                \
819           64, Dst, (1.0, 1.0),                                                 \
820           (1.0, std::numeric_limits<double>::quiet_NaN()), unord, Type);       \
821     }                                                                          \
822   } while (0)
823 
824 #define TestCmppsSize(FloatSize, Value0, Value1, Type)                         \
825   do {                                                                         \
826     TestCmpps(FloatSize, xmm0, Value0, xmm1, Value1, Type);                    \
827     TestCmpps(FloatSize, xmm1, Value0, xmm2, Value1, Type);                    \
828     TestCmpps(FloatSize, xmm2, Value0, xmm3, Value1, Type);                    \
829     TestCmpps(FloatSize, xmm3, Value0, xmm4, Value1, Type);                    \
830     TestCmpps(FloatSize, xmm4, Value0, xmm5, Value1, Type);                    \
831     TestCmpps(FloatSize, xmm5, Value0, xmm6, Value1, Type);                    \
832     TestCmpps(FloatSize, xmm6, Value0, xmm7, Value1, Type);                    \
833     TestCmpps(FloatSize, xmm7, Value0, xmm0, Value1, Type);                    \
834   } while (0)
835 
836   TestCmppsSize(32, (-1.0, 1.0, 3.14, 1024.5), (-1.0, 1.0, 3.14, 1024.5),
837                 float);
838   TestCmppsSize(64, (1.0, -1000.0), (0.55, 1.21), double);
839 
840 #undef TestCmpps
841 #undef TestCmppsOrdUnordXmmAddr
842 #undef TestCmppsOrdUnordXmmXmm
843 #undef TestCmppsXmmAddr
844 #undef TestCmppsXmmXmm
845 }
846 
TEST_F(AssemblerX8632Test,Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd)847 TEST_F(AssemblerX8632Test, Sqrtps_Rsqrtps_Reciprocalps_Sqrtpd) {
848 #define TestImplSingle(Dst, Inst, Expect)                                      \
849   do {                                                                         \
850     static constexpr char TestString[] = "(" #Dst ", " #Inst ")";              \
851     const uint32_t T0 = allocateDqword();                                      \
852     const Dqword V0(1.0, 4.0, 20.0, 3.14);                                     \
853                                                                                \
854     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
855     __ Inst(XmmRegister::Encoded_Reg_##Dst);                                   \
856                                                                                \
857     AssembledTest test = assemble();                                           \
858     test.setDqwordTo(T0, V0);                                                  \
859     test.run();                                                                \
860     ASSERT_EQ(Dqword Expect, test.Dst<Dqword>()) << TestString;                \
861     reset();                                                                   \
862   } while (0)
863 
864 #define TestImpl(Dst)                                                          \
865   do {                                                                         \
866     TestImplSingle(Dst, sqrtps, (uint64_t(0x400000003F800000ull),              \
867                                  uint64_t(0x3FE2D10B408F1BBDull)));            \
868     TestImplSingle(Dst, rsqrtps, (uint64_t(0x3EFFF0003F7FF000ull),             \
869                                   uint64_t(0x3F1078003E64F000ull)));           \
870     TestImplSingle(Dst, reciprocalps, (uint64_t(0x3E7FF0003F7FF000ull),        \
871                                        uint64_t(0x3EA310003D4CC000ull)));      \
872                                                                                \
873     TestImplSingle(Dst, sqrtpd, (uint64_t(0x4036A09E9365F5F3ull),              \
874                                  uint64_t(0x401C42FAE40282A8ull)));            \
875   } while (0)
876 
877   TestImpl(xmm0);
878   TestImpl(xmm1);
879   TestImpl(xmm2);
880   TestImpl(xmm3);
881   TestImpl(xmm4);
882   TestImpl(xmm5);
883   TestImpl(xmm6);
884   TestImpl(xmm7);
885 
886 #undef TestImpl
887 #undef TestImplSingle
888 }
889 
TEST_F(AssemblerX8632Test,Unpck)890 TEST_F(AssemblerX8632Test, Unpck) {
891   const Dqword V0(uint64_t(0xAAAAAAAABBBBBBBBull),
892                   uint64_t(0xCCCCCCCCDDDDDDDDull));
893   const Dqword V1(uint64_t(0xEEEEEEEEFFFFFFFFull),
894                   uint64_t(0x9999999988888888ull));
895 
896   const Dqword unpcklpsExpected(uint64_t(0xFFFFFFFFBBBBBBBBull),
897                                 uint64_t(0xEEEEEEEEAAAAAAAAull));
898   const Dqword unpcklpdExpected(uint64_t(0xAAAAAAAABBBBBBBBull),
899                                 uint64_t(0xEEEEEEEEFFFFFFFFull));
900   const Dqword unpckhpsExpected(uint64_t(0x88888888DDDDDDDDull),
901                                 uint64_t(0x99999999CCCCCCCCull));
902   const Dqword unpckhpdExpected(uint64_t(0xCCCCCCCCDDDDDDDDull),
903                                 uint64_t(0x9999999988888888ull));
904 
905 #define TestImplSingle(Dst, Src, Inst)                                         \
906   do {                                                                         \
907     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
908     const uint32_t T0 = allocateDqword();                                      \
909     const uint32_t T1 = allocateDqword();                                      \
910                                                                                \
911     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
912     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
913     __ Inst(XmmRegister::Encoded_Reg_##Dst, XmmRegister::Encoded_Reg_##Src);   \
914                                                                                \
915     AssembledTest test = assemble();                                           \
916     test.setDqwordTo(T0, V0);                                                  \
917     test.setDqwordTo(T1, V1);                                                  \
918     test.run();                                                                \
919                                                                                \
920     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
921     reset();                                                                   \
922   } while (0)
923 
924 #define TestImpl(Dst, Src)                                                     \
925   do {                                                                         \
926     TestImplSingle(Dst, Src, unpcklps);                                        \
927     TestImplSingle(Dst, Src, unpcklpd);                                        \
928     TestImplSingle(Dst, Src, unpckhps);                                        \
929     TestImplSingle(Dst, Src, unpckhpd);                                        \
930   } while (0)
931 
932   TestImpl(xmm0, xmm1);
933   TestImpl(xmm1, xmm2);
934   TestImpl(xmm2, xmm3);
935   TestImpl(xmm3, xmm4);
936   TestImpl(xmm4, xmm5);
937   TestImpl(xmm5, xmm6);
938   TestImpl(xmm6, xmm7);
939   TestImpl(xmm7, xmm0);
940 
941 #undef TestImpl
942 #undef TestImplSingle
943 }
944 
TEST_F(AssemblerX8632Test,Shufp)945 TEST_F(AssemblerX8632Test, Shufp) {
946   const Dqword V0(uint64_t(0x1111111122222222ull),
947                   uint64_t(0x5555555577777777ull));
948   const Dqword V1(uint64_t(0xAAAAAAAABBBBBBBBull),
949                   uint64_t(0xCCCCCCCCDDDDDDDDull));
950 
951   const uint8_t pshufdImm = 0x63;
952   const Dqword pshufdExpected(uint64_t(0xBBBBBBBBCCCCCCCCull),
953                               uint64_t(0xAAAAAAAADDDDDDDDull));
954 
955   const uint8_t shufpsImm = 0xf9;
956   const Dqword shufpsExpected(uint64_t(0x7777777711111111ull),
957                               uint64_t(0xCCCCCCCCCCCCCCCCull));
958 
959 #define TestImplSingleXmmXmm(Dst, Src, Inst)                                   \
960   do {                                                                         \
961     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
962     const uint32_t T0 = allocateDqword();                                      \
963     const uint32_t T1 = allocateDqword();                                      \
964                                                                                \
965     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
966     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
967     __ Inst(IceType_f32, XmmRegister::Encoded_Reg_##Dst,                       \
968             XmmRegister::Encoded_Reg_##Src, Immediate(Inst##Imm));             \
969                                                                                \
970     AssembledTest test = assemble();                                           \
971     test.setDqwordTo(T0, V0);                                                  \
972     test.setDqwordTo(T1, V1);                                                  \
973     test.run();                                                                \
974                                                                                \
975     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
976     reset();                                                                   \
977   } while (0)
978 
979 #define TestImplSingleXmmAddr(Dst, Inst)                                       \
980   do {                                                                         \
981     static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")";        \
982     const uint32_t T0 = allocateDqword();                                      \
983     const uint32_t T1 = allocateDqword();                                      \
984                                                                                \
985     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
986     __ Inst(IceType_f32, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1),     \
987             Immediate(Inst##Imm));                                             \
988                                                                                \
989     AssembledTest test = assemble();                                           \
990     test.setDqwordTo(T0, V0);                                                  \
991     test.setDqwordTo(T1, V1);                                                  \
992     test.run();                                                                \
993                                                                                \
994     ASSERT_EQ(Inst##Expected, test.Dst<Dqword>()) << TestString;               \
995     reset();                                                                   \
996   } while (0)
997 
998 #define TestImpl(Dst, Src)                                                     \
999   do {                                                                         \
1000     TestImplSingleXmmXmm(Dst, Src, pshufd);                                    \
1001     TestImplSingleXmmAddr(Dst, pshufd);                                        \
1002     TestImplSingleXmmXmm(Dst, Src, shufps);                                    \
1003     TestImplSingleXmmAddr(Dst, shufps);                                        \
1004   } while (0)
1005 
1006   TestImpl(xmm0, xmm1);
1007   TestImpl(xmm1, xmm2);
1008   TestImpl(xmm2, xmm3);
1009   TestImpl(xmm3, xmm4);
1010   TestImpl(xmm4, xmm5);
1011   TestImpl(xmm5, xmm6);
1012   TestImpl(xmm6, xmm7);
1013   TestImpl(xmm7, xmm0);
1014 
1015 #undef TestImpl
1016 #undef TestImplSingleXmmAddr
1017 #undef TestImplSingleXmmXmm
1018 }
1019 
TEST_F(AssemblerX8632Test,Punpckl)1020 TEST_F(AssemblerX8632Test, Punpckl) {
1021   const Dqword V0_v4i32(uint64_t(0x1111111122222222ull),
1022                         uint64_t(0x5555555577777777ull));
1023   const Dqword V1_v4i32(uint64_t(0xAAAAAAAABBBBBBBBull),
1024                         uint64_t(0xCCCCCCCCDDDDDDDDull));
1025   const Dqword Expected_v4i32(uint64_t(0xBBBBBBBB22222222ull),
1026                               uint64_t(0xAAAAAAAA11111111ull));
1027 
1028   const Dqword V0_v8i16(uint64_t(0x1111222233334444ull),
1029                         uint64_t(0x5555666677778888ull));
1030   const Dqword V1_v8i16(uint64_t(0xAAAABBBBCCCCDDDDull),
1031                         uint64_t(0xEEEEFFFF00009999ull));
1032   const Dqword Expected_v8i16(uint64_t(0xCCCC3333DDDD4444ull),
1033                               uint64_t(0xAAAA1111BBBB2222ull));
1034 
1035   const Dqword V0_v16i8(uint64_t(0x1122334455667788ull),
1036                         uint64_t(0x99AABBCCDDEEFF00ull));
1037   const Dqword V1_v16i8(uint64_t(0xFFEEDDCCBBAA9900ull),
1038                         uint64_t(0xBAADF00DFEEDFACEull));
1039   const Dqword Expected_v16i8(uint64_t(0xBB55AA6699770088ull),
1040                               uint64_t(0xFF11EE22DD33CC44ull));
1041 
1042 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
1043   do {                                                                         \
1044     static constexpr char TestString[] =                                       \
1045         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
1046     const uint32_t T0 = allocateDqword();                                      \
1047     const uint32_t T1 = allocateDqword();                                      \
1048                                                                                \
1049     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1050     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1051     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
1052             XmmRegister::Encoded_Reg_##Src);                                   \
1053                                                                                \
1054     AssembledTest test = assemble();                                           \
1055     test.setDqwordTo(T0, V0_##Ty);                                             \
1056     test.setDqwordTo(T1, V1_##Ty);                                             \
1057     test.run();                                                                \
1058                                                                                \
1059     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1060     reset();                                                                   \
1061   } while (0)
1062 
1063 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
1064   do {                                                                         \
1065     static constexpr char TestString[] =                                       \
1066         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
1067     const uint32_t T0 = allocateDqword();                                      \
1068     const uint32_t T1 = allocateDqword();                                      \
1069                                                                                \
1070     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1071     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1072                                                                                \
1073     AssembledTest test = assemble();                                           \
1074     test.setDqwordTo(T0, V0_##Ty);                                             \
1075     test.setDqwordTo(T1, V1_##Ty);                                             \
1076     test.run();                                                                \
1077                                                                                \
1078     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1079     reset();                                                                   \
1080   } while (0)
1081 
1082 #define TestImpl(Dst, Src)                                                     \
1083   do {                                                                         \
1084     TestImplXmmXmm(Dst, Src, punpckl, v4i32);                                  \
1085     TestImplXmmAddr(Dst, punpckl, v4i32);                                      \
1086     TestImplXmmXmm(Dst, Src, punpckl, v8i16);                                  \
1087     TestImplXmmAddr(Dst, punpckl, v8i16);                                      \
1088     TestImplXmmXmm(Dst, Src, punpckl, v16i8);                                  \
1089     TestImplXmmAddr(Dst, punpckl, v16i8);                                      \
1090   } while (0)
1091 
1092   TestImpl(xmm0, xmm1);
1093   TestImpl(xmm1, xmm2);
1094   TestImpl(xmm2, xmm3);
1095   TestImpl(xmm3, xmm4);
1096   TestImpl(xmm4, xmm5);
1097   TestImpl(xmm5, xmm6);
1098   TestImpl(xmm6, xmm7);
1099   TestImpl(xmm7, xmm0);
1100 
1101 #undef TestImpl
1102 #undef TestImplXmmAddr
1103 #undef TestImplXmmXmm
1104 }
1105 
TEST_F(AssemblerX8632Test,Packss)1106 TEST_F(AssemblerX8632Test, Packss) {
1107   const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
1108                         uint64_t(0x7FFFFFFF80000000ull));
1109   const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
1110                         uint64_t(0x0000800100007FFEull));
1111   const Dqword Expected_v4i32(uint64_t(0x7FFF80007FFF1234ull),
1112                               uint64_t(0x7FFF7FFEFFFEFFFFull));
1113 
1114   const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
1115                         uint64_t(0xFFFEFFFF7FFF8000ull));
1116   const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
1117                         uint64_t(0x0088007700660055ull));
1118   const Dqword Expected_v8i16(uint64_t(0xFEFF7F8001001234ull),
1119                               uint64_t(0x7F776655057F7F7Eull));
1120 
1121 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
1122   do {                                                                         \
1123     static constexpr char TestString[] =                                       \
1124         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
1125     const uint32_t T0 = allocateDqword();                                      \
1126     const uint32_t T1 = allocateDqword();                                      \
1127                                                                                \
1128     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1129     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1130     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
1131             XmmRegister::Encoded_Reg_##Src);                                   \
1132                                                                                \
1133     AssembledTest test = assemble();                                           \
1134     test.setDqwordTo(T0, V0_##Ty);                                             \
1135     test.setDqwordTo(T1, V1_##Ty);                                             \
1136     test.run();                                                                \
1137                                                                                \
1138     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1139     reset();                                                                   \
1140   } while (0)
1141 
1142 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
1143   do {                                                                         \
1144     static constexpr char TestString[] =                                       \
1145         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
1146     const uint32_t T0 = allocateDqword();                                      \
1147     const uint32_t T1 = allocateDqword();                                      \
1148                                                                                \
1149     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1150     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1151                                                                                \
1152     AssembledTest test = assemble();                                           \
1153     test.setDqwordTo(T0, V0_##Ty);                                             \
1154     test.setDqwordTo(T1, V1_##Ty);                                             \
1155     test.run();                                                                \
1156                                                                                \
1157     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1158     reset();                                                                   \
1159   } while (0)
1160 
1161 #define TestImpl(Dst, Src)                                                     \
1162   do {                                                                         \
1163     TestImplXmmXmm(Dst, Src, packss, v4i32);                                   \
1164     TestImplXmmAddr(Dst, packss, v4i32);                                       \
1165     TestImplXmmXmm(Dst, Src, packss, v8i16);                                   \
1166     TestImplXmmAddr(Dst, packss, v8i16);                                       \
1167   } while (0)
1168 
1169   TestImpl(xmm0, xmm1);
1170   TestImpl(xmm1, xmm2);
1171   TestImpl(xmm2, xmm3);
1172   TestImpl(xmm3, xmm4);
1173   TestImpl(xmm4, xmm5);
1174   TestImpl(xmm5, xmm6);
1175   TestImpl(xmm6, xmm7);
1176   TestImpl(xmm7, xmm0);
1177 
1178 #undef TestImpl
1179 #undef TestImplXmmAddr
1180 #undef TestImplXmmXmm
1181 }
1182 
TEST_F(AssemblerX8632Test,Packus)1183 TEST_F(AssemblerX8632Test, Packus) {
1184   const Dqword V0_v4i32(uint64_t(0x0001000000001234ull),
1185                         uint64_t(0x7FFFFFFF80000000ull));
1186   const Dqword V1_v4i32(uint64_t(0xFFFFFFFEFFFFFFFFull),
1187                         uint64_t(0x0000800100007FFEull));
1188   const Dqword Expected_v4i32(uint64_t(0xFFFF0000FFFF1234ull),
1189                               uint64_t(0x80017FFE00000000ull));
1190 
1191   const Dqword V0_v8i16(uint64_t(0x0001000000120034ull),
1192                         uint64_t(0xFFFEFFFF7FFF8000ull));
1193   const Dqword V1_v8i16(uint64_t(0x00057FF80081007Eull),
1194                         uint64_t(0x0088007700660055ull));
1195   const Dqword Expected_v8i16(uint64_t(0x0000FF0001001234ull),
1196                               uint64_t(0x8877665505FF817Eull));
1197 
1198 #define TestImplXmmXmm(Dst, Src, Inst, Ty)                                     \
1199   do {                                                                         \
1200     static constexpr char TestString[] =                                       \
1201         "(" #Dst ", " #Src ", " #Inst ", " #Ty ")";                            \
1202     const uint32_t T0 = allocateDqword();                                      \
1203     const uint32_t T1 = allocateDqword();                                      \
1204                                                                                \
1205     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1206     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1207     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst,                      \
1208             XmmRegister::Encoded_Reg_##Src);                                   \
1209                                                                                \
1210     AssembledTest test = assemble();                                           \
1211     test.setDqwordTo(T0, V0_##Ty);                                             \
1212     test.setDqwordTo(T1, V1_##Ty);                                             \
1213     test.run();                                                                \
1214                                                                                \
1215     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1216     reset();                                                                   \
1217   } while (0)
1218 
1219 #define TestImplXmmAddr(Dst, Inst, Ty)                                         \
1220   do {                                                                         \
1221     static constexpr char TestString[] =                                       \
1222         "(" #Dst ", Addr, " #Inst ", " #Ty ")";                                \
1223     const uint32_t T0 = allocateDqword();                                      \
1224     const uint32_t T1 = allocateDqword();                                      \
1225                                                                                \
1226     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1227     __ Inst(IceType_##Ty, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1228                                                                                \
1229     AssembledTest test = assemble();                                           \
1230     test.setDqwordTo(T0, V0_##Ty);                                             \
1231     test.setDqwordTo(T1, V1_##Ty);                                             \
1232     test.run();                                                                \
1233                                                                                \
1234     ASSERT_EQ(Expected_##Ty, test.Dst<Dqword>()) << TestString;                \
1235     reset();                                                                   \
1236   } while (0)
1237 
1238 #define TestImpl(Dst, Src)                                                     \
1239   do {                                                                         \
1240     TestImplXmmXmm(Dst, Src, packus, v4i32);                                   \
1241     TestImplXmmAddr(Dst, packus, v4i32);                                       \
1242     TestImplXmmXmm(Dst, Src, packus, v8i16);                                   \
1243     TestImplXmmAddr(Dst, packus, v8i16);                                       \
1244   } while (0)
1245 
1246   TestImpl(xmm0, xmm1);
1247   TestImpl(xmm1, xmm2);
1248   TestImpl(xmm2, xmm3);
1249   TestImpl(xmm3, xmm4);
1250   TestImpl(xmm4, xmm5);
1251   TestImpl(xmm5, xmm6);
1252   TestImpl(xmm6, xmm7);
1253   TestImpl(xmm7, xmm0);
1254 
1255 #undef TestImpl
1256 #undef TestImplXmmAddr
1257 #undef TestImplXmmXmm
1258 }
1259 
TEST_F(AssemblerX8632Test,Pshufb)1260 TEST_F(AssemblerX8632Test, Pshufb) {
1261   const Dqword V0(uint64_t(0x1122334455667788ull),
1262                   uint64_t(0x99aabbccddeeff32ull));
1263   const Dqword V1(uint64_t(0x0204050380060708ull),
1264                   uint64_t(0x010306080a8b0c0dull));
1265 
1266   const Dqword Expected(uint64_t(0x6644335500221132ull),
1267                         uint64_t(0x77552232ee00ccbbull));
1268 
1269 #define TestImplXmmXmm(Dst, Src, Inst)                                         \
1270   do {                                                                         \
1271     static constexpr char TestString[] = "(" #Dst ", " #Src ", " #Inst ")";    \
1272     const uint32_t T0 = allocateDqword();                                      \
1273     const uint32_t T1 = allocateDqword();                                      \
1274                                                                                \
1275     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1276     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1277     __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst,                      \
1278             XmmRegister::Encoded_Reg_##Src);                                   \
1279                                                                                \
1280     AssembledTest test = assemble();                                           \
1281     test.setDqwordTo(T0, V0);                                                  \
1282     test.setDqwordTo(T1, V1);                                                  \
1283     test.run();                                                                \
1284                                                                                \
1285     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1286     reset();                                                                   \
1287   } while (0)
1288 
1289 #define TestImplXmmAddr(Dst, Inst)                                             \
1290   do {                                                                         \
1291     static constexpr char TestString[] = "(" #Dst ", Addr, " #Inst ")";        \
1292     const uint32_t T0 = allocateDqword();                                      \
1293     const uint32_t T1 = allocateDqword();                                      \
1294                                                                                \
1295     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1296     __ Inst(IceType_void, XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));   \
1297                                                                                \
1298     AssembledTest test = assemble();                                           \
1299     test.setDqwordTo(T0, V0);                                                  \
1300     test.setDqwordTo(T1, V1);                                                  \
1301     test.run();                                                                \
1302                                                                                \
1303     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1304     reset();                                                                   \
1305   } while (0)
1306 
1307 #define TestImpl(Dst, Src)                                                     \
1308   do {                                                                         \
1309     TestImplXmmXmm(Dst, Src, pshufb);                                          \
1310     TestImplXmmAddr(Dst, pshufb);                                              \
1311   } while (0)
1312 
1313   TestImpl(xmm0, xmm1);
1314   TestImpl(xmm1, xmm2);
1315   TestImpl(xmm2, xmm3);
1316   TestImpl(xmm3, xmm4);
1317   TestImpl(xmm4, xmm5);
1318   TestImpl(xmm5, xmm6);
1319   TestImpl(xmm6, xmm7);
1320   TestImpl(xmm7, xmm0);
1321 
1322 #undef TestImpl
1323 #undef TestImplXmmAddr
1324 #undef TestImplXmmXmm
1325 }
1326 
TEST_F(AssemblerX8632Test,Cvt)1327 TEST_F(AssemblerX8632Test, Cvt) {
1328   const Dqword dq2ps32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1329   const Dqword dq2ps32SrcValue(-5, 3, 100, 200);
1330   const Dqword dq2ps32Expected(-5.0f, 3.0f, 100.0, 200.0);
1331 
1332   const Dqword dq2ps64DstValue(0.0f, 0.0f, -1.0f, -1.0f);
1333   const Dqword dq2ps64SrcValue(-5, 3, 100, 200);
1334   const Dqword dq2ps64Expected(-5.0f, 3.0f, 100.0, 200.0);
1335 
1336   const Dqword tps2dq32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1337   const Dqword tps2dq32SrcValue(-5.0f, 3.0f, 100.0, 200.0);
1338   const Dqword tps2dq32Expected(-5, 3, 100, 200);
1339 
1340   const Dqword tps2dq64DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1341   const Dqword tps2dq64SrcValue(-5.0f, 3.0f, 100.0, 200.0);
1342   const Dqword tps2dq64Expected(-5, 3, 100, 200);
1343 
1344   const Dqword si2ss32DstValue(-1.0f, -1.0f, -1.0f, -1.0f);
1345   const int32_t si2ss32SrcValue = 5;
1346   const Dqword si2ss32Expected(5.0f, -1.0f, -1.0f, -1.0f);
1347 
1348   const Dqword si2ss64DstValue(-1.0, -1.0);
1349   const int32_t si2ss64SrcValue = 5;
1350   const Dqword si2ss64Expected(5.0, -1.0);
1351 
1352   const int32_t tss2si32DstValue = 0xF00F0FF0;
1353   const Dqword tss2si32SrcValue(-5.0f, -1.0f, -1.0f, -1.0f);
1354   const int32_t tss2si32Expected = -5;
1355 
1356   const int32_t tss2si64DstValue = 0xF00F0FF0;
1357   const Dqword tss2si64SrcValue(-5.0, -1.0);
1358   const int32_t tss2si64Expected = -5;
1359 
1360   const Dqword float2float32DstValue(-1.0, -1.0);
1361   const Dqword float2float32SrcValue(-5.0, 3, 100, 200);
1362   const Dqword float2float32Expected(-5.0, -1.0);
1363 
1364   const Dqword float2float64DstValue(-1.0, -1.0, -1.0, -1.0);
1365   const Dqword float2float64SrcValue(-5.0, 3.0);
1366   const Dqword float2float64Expected(-5.0, -1.0, -1.0, -1.0);
1367 
1368 #define TestImplPXmmXmm(Dst, Src, Inst, Size)                                  \
1369   do {                                                                         \
1370     static constexpr char TestString[] =                                       \
1371         "(" #Dst ", " #Src ", cvt" #Inst ", f" #Size ")";                      \
1372     const uint32_t T0 = allocateDqword();                                      \
1373     const uint32_t T1 = allocateDqword();                                      \
1374                                                                                \
1375     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1376     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1377     __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,              \
1378                  XmmRegister::Encoded_Reg_##Src);                              \
1379                                                                                \
1380     AssembledTest test = assemble();                                           \
1381     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1382     test.setDqwordTo(T1, Inst##Size##SrcValue);                                \
1383     test.run();                                                                \
1384                                                                                \
1385     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1386     reset();                                                                   \
1387   } while (0)
1388 
1389 #define TestImplSXmmReg(Dst, GPR, Inst, Size)                                  \
1390   do {                                                                         \
1391     static constexpr char TestString[] =                                       \
1392         "(" #Dst ", " #GPR ", cvt" #Inst ", f" #Size ")";                      \
1393     const uint32_t T0 = allocateDqword();                                      \
1394                                                                                \
1395     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1396     __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR,                        \
1397            Immediate(Inst##Size##SrcValue));                                   \
1398     __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \
1399                  GPRRegister::Encoded_Reg_##GPR);                              \
1400                                                                                \
1401     AssembledTest test = assemble();                                           \
1402     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1403     test.run();                                                                \
1404                                                                                \
1405     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1406     reset();                                                                   \
1407   } while (0)
1408 
1409 #define TestImplSRegXmm(GPR, Src, Inst, Size)                                  \
1410   do {                                                                         \
1411     static constexpr char TestString[] =                                       \
1412         "(" #GPR ", " #Src ", cvt" #Inst ", f" #Size ")";                      \
1413     const uint32_t T0 = allocateDqword();                                      \
1414                                                                                \
1415     __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR,                        \
1416            Immediate(Inst##Size##DstValue));                                   \
1417     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0));               \
1418     __ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \
1419                  XmmRegister::Encoded_Reg_##Src);                              \
1420                                                                                \
1421     AssembledTest test = assemble();                                           \
1422     test.setDqwordTo(T0, Inst##Size##SrcValue);                                \
1423     test.run();                                                                \
1424                                                                                \
1425     ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR())         \
1426         << TestString;                                                         \
1427     reset();                                                                   \
1428   } while (0)
1429 
1430 #define TestImplPXmmAddr(Dst, Inst, Size)                                      \
1431   do {                                                                         \
1432     static constexpr char TestString[] =                                       \
1433         "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")";                          \
1434     const uint32_t T0 = allocateDqword();                                      \
1435     const uint32_t T1 = allocateDqword();                                      \
1436                                                                                \
1437     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1438     __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,              \
1439                  dwordAddress(T1));                                            \
1440                                                                                \
1441     AssembledTest test = assemble();                                           \
1442     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1443     test.setDqwordTo(T1, Inst##Size##SrcValue);                                \
1444     test.run();                                                                \
1445                                                                                \
1446     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1447     reset();                                                                   \
1448   } while (0)
1449 
1450 #define TestImplSXmmAddr(Dst, Inst, Size)                                      \
1451   do {                                                                         \
1452     static constexpr char TestString[] =                                       \
1453         "(" #Dst ", Addr, cvt" #Inst ", f" #Size ")";                          \
1454     const uint32_t T0 = allocateDqword();                                      \
1455     const uint32_t T1 = allocateDword();                                       \
1456                                                                                \
1457     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1458     __ cvt##Inst(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst, IceType_i32, \
1459                  dwordAddress(T1));                                            \
1460                                                                                \
1461     AssembledTest test = assemble();                                           \
1462     test.setDqwordTo(T0, Inst##Size##DstValue);                                \
1463     test.setDwordTo(T1, Inst##Size##SrcValue);                                 \
1464     test.run();                                                                \
1465                                                                                \
1466     ASSERT_EQ(Inst##Size##Expected, test.Dst<Dqword>()) << TestString;         \
1467     reset();                                                                   \
1468   } while (0)
1469 
1470 #define TestImplSRegAddr(GPR, Inst, Size)                                      \
1471   do {                                                                         \
1472     static constexpr char TestString[] =                                       \
1473         "(" #GPR ", Addr, cvt" #Inst ", f" #Size ")";                          \
1474     const uint32_t T0 = allocateDqword();                                      \
1475                                                                                \
1476     __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR,                        \
1477            Immediate(Inst##Size##DstValue));                                   \
1478     __ cvt##Inst(IceType_i32, GPRRegister::Encoded_Reg_##GPR, IceType_f##Size, \
1479                  dwordAddress(T0));                                            \
1480                                                                                \
1481     AssembledTest test = assemble();                                           \
1482     test.setDqwordTo(T0, Inst##Size##SrcValue);                                \
1483     test.run();                                                                \
1484                                                                                \
1485     ASSERT_EQ(static_cast<uint32_t>(Inst##Size##Expected), test.GPR())         \
1486         << TestString;                                                         \
1487     reset();                                                                   \
1488   } while (0)
1489 
1490 #define TestImplSize(Dst, Src, GPR, Size)                                      \
1491   do {                                                                         \
1492     TestImplPXmmXmm(Dst, Src, dq2ps, Size);                                    \
1493     TestImplPXmmAddr(Src, dq2ps, Size);                                        \
1494     TestImplPXmmXmm(Dst, Src, tps2dq, Size);                                   \
1495     TestImplPXmmAddr(Src, tps2dq, Size);                                       \
1496     TestImplSXmmReg(Dst, GPR, si2ss, Size);                                    \
1497     TestImplSXmmAddr(Dst, si2ss, Size);                                        \
1498     TestImplSRegXmm(GPR, Src, tss2si, Size);                                   \
1499     TestImplSRegAddr(GPR, tss2si, Size);                                       \
1500     TestImplPXmmXmm(Dst, Src, float2float, Size);                              \
1501     TestImplPXmmAddr(Src, float2float, Size);                                  \
1502   } while (0)
1503 
1504 #define TestImpl(Dst, Src, GPR)                                                \
1505   do {                                                                         \
1506     TestImplSize(Dst, Src, GPR, 32);                                           \
1507     TestImplSize(Dst, Src, GPR, 64);                                           \
1508   } while (0)
1509 
1510   TestImpl(xmm0, xmm1, eax);
1511   TestImpl(xmm1, xmm2, ebx);
1512   TestImpl(xmm2, xmm3, ecx);
1513   TestImpl(xmm3, xmm4, edx);
1514   TestImpl(xmm4, xmm5, esi);
1515   TestImpl(xmm5, xmm6, edi);
1516   TestImpl(xmm6, xmm7, eax);
1517   TestImpl(xmm7, xmm0, ebx);
1518 
1519 #undef TestImpl
1520 #undef TestImplSize
1521 #undef TestImplSRegAddr
1522 #undef TestImplSXmmAddr
1523 #undef TestImplPXmmAddr
1524 #undef TestImplSRegXmm
1525 #undef TestImplSXmmReg
1526 #undef TestImplPXmmXmm
1527 }
1528 
TEST_F(AssemblerX8632Test,Ucomiss)1529 TEST_F(AssemblerX8632Test, Ucomiss) {
1530   static constexpr float qnan32 = std::numeric_limits<float>::quiet_NaN();
1531   static constexpr double qnan64 = std::numeric_limits<float>::quiet_NaN();
1532 
1533   Dqword test32DstValue(0.0, qnan32, qnan32, qnan32);
1534   Dqword test32SrcValue(0.0, qnan32, qnan32, qnan32);
1535 
1536   Dqword test64DstValue(0.0, qnan64);
1537   Dqword test64SrcValue(0.0, qnan64);
1538 
1539 #define TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity,      \
1540                        BOther)                                                 \
1541   do {                                                                         \
1542     static constexpr char NearBranch = AssemblerX8632::kNearJump;              \
1543     static constexpr char TestString[] =                                       \
1544         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #CompType \
1545         ", " #BParity ", " #BOther ")";                                        \
1546     const uint32_t T0 = allocateDqword();                                      \
1547     test##Size##DstValue.F##Size[0] = Value0;                                  \
1548     const uint32_t T1 = allocateDqword();                                      \
1549     test##Size##SrcValue.F##Size[0] = Value1;                                  \
1550     const uint32_t ImmIfTrue = 0xBEEF;                                         \
1551     const uint32_t ImmIfFalse = 0xC0FFE;                                       \
1552                                                                                \
1553     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1554     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1555     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse));  \
1556     __ ucomiss(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                \
1557                XmmRegister::Encoded_Reg_##Src);                                \
1558     Label Done;                                                                \
1559     __ j(Cond::Br_##BParity, &Done, NearBranch);                               \
1560     __ j(Cond::Br_##BOther, &Done, NearBranch);                                \
1561     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue));   \
1562     __ bind(&Done);                                                            \
1563                                                                                \
1564     AssembledTest test = assemble();                                           \
1565     test.setDqwordTo(T0, test##Size##DstValue);                                \
1566     test.setDqwordTo(T1, test##Size##SrcValue);                                \
1567     test.run();                                                                \
1568                                                                                \
1569     ASSERT_EQ(ImmIfTrue, test.eax()) << TestString;                            \
1570     reset();                                                                   \
1571   } while (0)
1572 
1573 #define TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther)  \
1574   do {                                                                         \
1575     static constexpr char NearBranch = AssemblerX8632::kNearJump;              \
1576     static constexpr char TestString[] =                                       \
1577         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #CompType     \
1578         ", " #BParity ", " #BOther ")";                                        \
1579     const uint32_t T0 = allocateDqword();                                      \
1580     test##Size##DstValue.F##Size[0] = Value0;                                  \
1581     const uint32_t T1 = allocateDqword();                                      \
1582     test##Size##SrcValue.F##Size[0] = Value1;                                  \
1583     const uint32_t ImmIfTrue = 0xBEEF;                                         \
1584     const uint32_t ImmIfFalse = 0xC0FFE;                                       \
1585                                                                                \
1586     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1587     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfFalse));  \
1588     __ ucomiss(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                \
1589                dwordAddress(T1));                                              \
1590     Label Done;                                                                \
1591     __ j(Cond::Br_##BParity, &Done, NearBranch);                               \
1592     __ j(Cond::Br_##BOther, &Done, NearBranch);                                \
1593     __ mov(IceType_i32, GPRRegister::Encoded_Reg_eax, Immediate(ImmIfTrue));   \
1594     __ bind(&Done);                                                            \
1595                                                                                \
1596     AssembledTest test = assemble();                                           \
1597     test.setDqwordTo(T0, test##Size##DstValue);                                \
1598     test.setDqwordTo(T1, test##Size##SrcValue);                                \
1599     test.run();                                                                \
1600                                                                                \
1601     ASSERT_EQ(ImmIfTrue, test.eax()) << TestString;                            \
1602     reset();                                                                   \
1603   } while (0)
1604 
1605 #define TestImplCond(Dst, Value0, Src, Value1, Size, CompType, BParity,        \
1606                      BOther)                                                   \
1607   do {                                                                         \
1608     TestImplXmmXmm(Dst, Value0, Src, Value1, Size, CompType, BParity, BOther); \
1609     TestImplXmmAddr(Dst, Value0, Value1, Size, CompType, BParity, BOther);     \
1610   } while (0)
1611 
1612 #define TestImplSize(Dst, Src, Size)                                           \
1613   do {                                                                         \
1614     TestImplCond(Dst, 1.0, Src, 1.0, Size, isEq, p, ne);                       \
1615     TestImplCond(Dst, 1.0, Src, 2.0, Size, isNe, p, e);                        \
1616     TestImplCond(Dst, 1.0, Src, 2.0, Size, isLe, p, a);                        \
1617     TestImplCond(Dst, 1.0, Src, 1.0, Size, isLe, p, a);                        \
1618     TestImplCond(Dst, 1.0, Src, 2.0, Size, isLt, p, ae);                       \
1619     TestImplCond(Dst, 2.0, Src, 1.0, Size, isGe, p, b);                        \
1620     TestImplCond(Dst, 1.0, Src, 1.0, Size, isGe, p, b);                        \
1621     TestImplCond(Dst, 2.0, Src, 1.0, Size, isGt, p, be);                       \
1622     TestImplCond(Dst, qnan##Size, Src, 1.0, Size, isUnord, np, o);             \
1623     TestImplCond(Dst, 1.0, Src, qnan##Size, Size, isUnord, np, s);             \
1624     TestImplCond(Dst, qnan##Size, Src, qnan##Size, Size, isUnord, np, s);      \
1625   } while (0)
1626 
1627 #define TestImpl(Dst, Src)                                                     \
1628   do {                                                                         \
1629     TestImplSize(Dst, Src, 32);                                                \
1630     TestImplSize(Dst, Src, 64);                                                \
1631   } while (0)
1632 
1633   TestImpl(xmm0, xmm1);
1634   TestImpl(xmm1, xmm2);
1635   TestImpl(xmm2, xmm3);
1636   TestImpl(xmm3, xmm4);
1637   TestImpl(xmm4, xmm5);
1638   TestImpl(xmm5, xmm6);
1639   TestImpl(xmm6, xmm7);
1640   TestImpl(xmm7, xmm0);
1641 
1642 #undef TestImpl
1643 #undef TestImplSize
1644 #undef TestImplCond
1645 #undef TestImplXmmAddr
1646 #undef TestImplXmmXmm
1647 }
1648 
TEST_F(AssemblerX8632Test,Sqrtss)1649 TEST_F(AssemblerX8632Test, Sqrtss) {
1650   Dqword test32SrcValue(-100.0, -100.0, -100.0, -100.0);
1651   Dqword test32DstValue(-1.0, -1.0, -1.0, -1.0);
1652 
1653   Dqword test64SrcValue(-100.0, -100.0);
1654   Dqword test64DstValue(-1.0, -1.0);
1655 
1656 #define TestSqrtssXmmXmm(Dst, Src, Value1, Result, Size)                       \
1657   do {                                                                         \
1658     static constexpr char TestString[] =                                       \
1659         "(" #Dst ", " #Src ", " #Value1 ", " #Result ", " #Size ")";           \
1660     const uint32_t T0 = allocateDqword();                                      \
1661     test##Size##SrcValue.F##Size[0] = Value1;                                  \
1662     const uint32_t T1 = allocateDqword();                                      \
1663                                                                                \
1664     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0));               \
1665     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));               \
1666     __ sqrt(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                   \
1667             XmmRegister::Encoded_Reg_##Src);                                   \
1668                                                                                \
1669     AssembledTest test = assemble();                                           \
1670     test.setDqwordTo(T0, test##Size##SrcValue);                                \
1671     test.setDqwordTo(T1, test##Size##DstValue);                                \
1672     test.run();                                                                \
1673                                                                                \
1674     Dqword Expected = test##Size##DstValue;                                    \
1675     Expected.F##Size[0] = Result;                                              \
1676     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1677     reset();                                                                   \
1678   } while (0)
1679 
1680 #define TestSqrtssXmmAddr(Dst, Value1, Result, Size)                           \
1681   do {                                                                         \
1682     static constexpr char TestString[] =                                       \
1683         "(" #Dst ", Addr, " #Value1 ", " #Result ", " #Size ")";               \
1684     const uint32_t T0 = allocateDqword();                                      \
1685     test##Size##SrcValue.F##Size[0] = Value1;                                  \
1686     const uint32_t T1 = allocateDqword();                                      \
1687                                                                                \
1688     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T1));               \
1689     __ sqrt(IceType_f##Size, XmmRegister::Encoded_Reg_##Dst,                   \
1690             dwordAddress(T0));                                                 \
1691                                                                                \
1692     AssembledTest test = assemble();                                           \
1693     test.setDqwordTo(T0, test##Size##SrcValue);                                \
1694     test.setDqwordTo(T1, test##Size##DstValue);                                \
1695     test.run();                                                                \
1696                                                                                \
1697     Dqword Expected = test##Size##DstValue;                                    \
1698     Expected.F##Size[0] = Result;                                              \
1699     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1700     reset();                                                                   \
1701   } while (0)
1702 
1703 #define TestSqrtssSize(Dst, Src, Size)                                         \
1704   do {                                                                         \
1705     TestSqrtssXmmXmm(Dst, Src, 4.0, 2.0, Size);                                \
1706     TestSqrtssXmmAddr(Dst, 4.0, 2.0, Size);                                    \
1707     TestSqrtssXmmXmm(Dst, Src, 9.0, 3.0, Size);                                \
1708     TestSqrtssXmmAddr(Dst, 9.0, 3.0, Size);                                    \
1709     TestSqrtssXmmXmm(Dst, Src, 100.0, 10.0, Size);                             \
1710     TestSqrtssXmmAddr(Dst, 100.0, 10.0, Size);                                 \
1711   } while (0)
1712 
1713 #define TestSqrtss(Dst, Src)                                                   \
1714   do {                                                                         \
1715     TestSqrtssSize(Dst, Src, 32);                                              \
1716     TestSqrtssSize(Dst, Src, 64);                                              \
1717   } while (0)
1718 
1719   TestSqrtss(xmm0, xmm1);
1720   TestSqrtss(xmm1, xmm2);
1721   TestSqrtss(xmm2, xmm3);
1722   TestSqrtss(xmm3, xmm4);
1723   TestSqrtss(xmm4, xmm5);
1724   TestSqrtss(xmm5, xmm6);
1725   TestSqrtss(xmm6, xmm7);
1726   TestSqrtss(xmm7, xmm0);
1727 
1728 #undef TestSqrtss
1729 #undef TestSqrtssSize
1730 #undef TestSqrtssXmmAddr
1731 #undef TestSqrtssXmmXmm
1732 }
1733 
TEST_F(AssemblerX8632Test,Insertps)1734 TEST_F(AssemblerX8632Test, Insertps) {
1735 #define TestInsertpsXmmXmmImm(Dst, Value0, Src, Value1, Imm, Expected)         \
1736   do {                                                                         \
1737     static constexpr char TestString[] =                                       \
1738         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Imm ", " #Expected  \
1739         ")";                                                                   \
1740     const uint32_t T0 = allocateDqword();                                      \
1741     const Dqword V0 Value0;                                                    \
1742     const uint32_t T1 = allocateDqword();                                      \
1743     const Dqword V1 Value1;                                                    \
1744                                                                                \
1745     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1746     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1747     __ insertps(IceType_v4f32, XmmRegister::Encoded_Reg_##Dst,                 \
1748                 XmmRegister::Encoded_Reg_##Src, Immediate(Imm));               \
1749                                                                                \
1750     AssembledTest test = assemble();                                           \
1751     test.setDqwordTo(T0, V0);                                                  \
1752     test.setDqwordTo(T1, V1);                                                  \
1753     test.run();                                                                \
1754                                                                                \
1755     ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString;              \
1756     reset();                                                                   \
1757   } while (0)
1758 
1759 #define TestInsertpsXmmAddrImm(Dst, Value0, Value1, Imm, Expected)             \
1760   do {                                                                         \
1761     static constexpr char TestString[] =                                       \
1762         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Expected ")"; \
1763     const uint32_t T0 = allocateDqword();                                      \
1764     const Dqword V0 Value0;                                                    \
1765     const uint32_t T1 = allocateDqword();                                      \
1766     const Dqword V1 Value1;                                                    \
1767                                                                                \
1768     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1769     __ insertps(IceType_v4f32, XmmRegister::Encoded_Reg_##Dst,                 \
1770                 dwordAddress(T1), Immediate(Imm));                             \
1771                                                                                \
1772     AssembledTest test = assemble();                                           \
1773     test.setDqwordTo(T0, V0);                                                  \
1774     test.setDqwordTo(T1, V1);                                                  \
1775     test.run();                                                                \
1776                                                                                \
1777     ASSERT_EQ(Dqword Expected, test.Dst<Dqword>()) << TestString;              \
1778     reset();                                                                   \
1779   } while (0)
1780 
1781 #define TestInsertps(Dst, Src)                                                 \
1782   do {                                                                         \
1783     TestInsertpsXmmXmmImm(                                                     \
1784         Dst, (uint64_t(-1), uint64_t(-1)), Src,                                \
1785         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1786         0x99,                                                                  \
1787         (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x00000000FFFFFFFFull)));   \
1788     TestInsertpsXmmAddrImm(                                                    \
1789         Dst, (uint64_t(-1), uint64_t(-1)),                                     \
1790         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1791         0x99,                                                                  \
1792         (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x00000000FFFFFFFFull)));   \
1793     TestInsertpsXmmXmmImm(                                                     \
1794         Dst, (uint64_t(-1), uint64_t(-1)), Src,                                \
1795         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1796         0x9D,                                                                  \
1797         (uint64_t(0xDDDDDDDD00000000ull), uint64_t(0x0000000000000000ull)));   \
1798     TestInsertpsXmmAddrImm(                                                    \
1799         Dst, (uint64_t(-1), uint64_t(-1)),                                     \
1800         (uint64_t(0xAAAAAAAABBBBBBBBull), uint64_t(0xCCCCCCCCDDDDDDDDull)),    \
1801         0x9D,                                                                  \
1802         (uint64_t(0xBBBBBBBB00000000ull), uint64_t(0x0000000000000000ull)));   \
1803   } while (0)
1804 
1805   TestInsertps(xmm0, xmm1);
1806   TestInsertps(xmm1, xmm2);
1807   TestInsertps(xmm2, xmm3);
1808   TestInsertps(xmm3, xmm4);
1809   TestInsertps(xmm4, xmm5);
1810   TestInsertps(xmm5, xmm6);
1811   TestInsertps(xmm6, xmm7);
1812   TestInsertps(xmm7, xmm0);
1813 
1814 #undef TestInsertps
1815 #undef TestInsertpsXmmXmmAddr
1816 #undef TestInsertpsXmmXmmImm
1817 }
1818 
TEST_F(AssemblerX8632Test,Pinsr)1819 TEST_F(AssemblerX8632Test, Pinsr) {
1820   static constexpr uint8_t Mask32 = 0x03;
1821   static constexpr uint8_t Mask16 = 0x07;
1822   static constexpr uint8_t Mask8 = 0x0F;
1823 
1824 #define TestPinsrXmmGPRImm(Dst, Value0, GPR, Value1, Imm, Size)                \
1825   do {                                                                         \
1826     static constexpr char TestString[] =                                       \
1827         "(" #Dst ", " #Value0 ", " #GPR ", " #Value1 ", " #Imm ", " #Size ")"; \
1828     const uint32_t T0 = allocateDqword();                                      \
1829     const Dqword V0 Value0;                                                    \
1830                                                                                \
1831     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1832     __ mov(IceType_i32, GPRRegister::Encoded_Reg_##GPR, Immediate(Value1));    \
1833     __ pinsr(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                  \
1834              GPRRegister::Encoded_Reg_##GPR, Immediate(Imm));                  \
1835                                                                                \
1836     AssembledTest test = assemble();                                           \
1837     test.setDqwordTo(T0, V0);                                                  \
1838     test.run();                                                                \
1839                                                                                \
1840     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
1841     Dqword Expected = V0;                                                      \
1842     Expected.U##Size[sel] = Value1;                                            \
1843     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1844     reset();                                                                   \
1845   } while (0)
1846 
1847 #define TestPinsrXmmAddrImm(Dst, Value0, Value1, Imm, Size)                    \
1848   do {                                                                         \
1849     static constexpr char TestString[] =                                       \
1850         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Imm ", " #Size ")";     \
1851     const uint32_t T0 = allocateDqword();                                      \
1852     const Dqword V0 Value0;                                                    \
1853     const uint32_t T1 = allocateDword();                                       \
1854     const uint32_t V1 = Value1;                                                \
1855                                                                                \
1856     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1857     __ pinsr(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                  \
1858              dwordAddress(T1), Immediate(Imm));                                \
1859                                                                                \
1860     AssembledTest test = assemble();                                           \
1861     test.setDqwordTo(T0, V0);                                                  \
1862     test.setDwordTo(T1, V1);                                                   \
1863     test.run();                                                                \
1864                                                                                \
1865     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
1866     Dqword Expected = V0;                                                      \
1867     Expected.U##Size[sel] = Value1;                                            \
1868     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1869     reset();                                                                   \
1870   } while (0)
1871 
1872 #define TestPinsrSize(Dst, GPR, Value1, Imm, Size)                             \
1873   do {                                                                         \
1874     TestPinsrXmmGPRImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull),                  \
1875                              uint64_t(0xFFFFFFFFDDDDDDDDull)),                 \
1876                        GPR, Value1, Imm, Size);                                \
1877     TestPinsrXmmAddrImm(Dst, (uint64_t(0xAAAAAAAABBBBBBBBull),                 \
1878                               uint64_t(0xFFFFFFFFDDDDDDDDull)),                \
1879                         Value1, Imm, Size);                                    \
1880   } while (0)
1881 
1882 #define TestPinsr(Src, Dst)                                                    \
1883   do {                                                                         \
1884     TestPinsrSize(Src, Dst, 0xEE, 0x03, 8);                                    \
1885     TestPinsrSize(Src, Dst, 0xFFEE, 0x03, 16);                                 \
1886     TestPinsrSize(Src, Dst, 0xC0FFEE, 0x03, 32);                               \
1887   } while (0)
1888 
1889   TestPinsr(xmm0, eax);
1890   TestPinsr(xmm1, ebx);
1891   TestPinsr(xmm2, ecx);
1892   TestPinsr(xmm3, edx);
1893   TestPinsr(xmm4, esi);
1894   TestPinsr(xmm5, edi);
1895   TestPinsr(xmm6, eax);
1896   TestPinsr(xmm7, ebx);
1897 
1898 #undef TestPinsr
1899 #undef TestPinsrSize
1900 #undef TestPinsrXmmAddrImm
1901 #undef TestPinsrXmmGPRImm
1902 }
1903 
TEST_F(AssemblerX8632Test,Pextr)1904 TEST_F(AssemblerX8632Test, Pextr) {
1905   static constexpr uint8_t Mask32 = 0x03;
1906   static constexpr uint8_t Mask16 = 0x07;
1907   static constexpr uint8_t Mask8 = 0x0F;
1908 
1909 #define TestPextrGPRXmmImm(GPR, Src, Value1, Imm, Size)                        \
1910   do {                                                                         \
1911     static constexpr char TestString[] =                                       \
1912         "(" #GPR ", " #Src ", " #Value1 ", " #Imm ", " #Size ")";              \
1913     const uint32_t T0 = allocateDqword();                                      \
1914     const Dqword V0 Value1;                                                    \
1915                                                                                \
1916     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T0));               \
1917     __ pextr(IceType_i##Size, GPRRegister::Encoded_Reg_##GPR,                  \
1918              XmmRegister::Encoded_Reg_##Src, Immediate(Imm));                  \
1919                                                                                \
1920     AssembledTest test = assemble();                                           \
1921     test.setDqwordTo(T0, V0);                                                  \
1922     test.run();                                                                \
1923                                                                                \
1924     constexpr uint8_t sel = (Imm)&Mask##Size;                                  \
1925     ASSERT_EQ(V0.U##Size[sel], test.GPR()) << TestString;                      \
1926     reset();                                                                   \
1927   } while (0)
1928 
1929 #define TestPextrSize(GPR, Src, Value1, Imm, Size)                             \
1930   do {                                                                         \
1931     TestPextrGPRXmmImm(GPR, Src, (uint64_t(0xAAAAAAAABBBBBBBBull),             \
1932                                   uint64_t(0xFFFFFFFFDDDDDDDDull)),            \
1933                        Imm, Size);                                             \
1934   } while (0)
1935 
1936 #define TestPextr(Src, Dst)                                                    \
1937   do {                                                                         \
1938     TestPextrSize(Src, Dst, 0xEE, 0x03, 8);                                    \
1939     TestPextrSize(Src, Dst, 0xFFEE, 0x03, 16);                                 \
1940     TestPextrSize(Src, Dst, 0xC0FFEE, 0x03, 32);                               \
1941   } while (0)
1942 
1943   TestPextr(eax, xmm0);
1944   TestPextr(ebx, xmm1);
1945   TestPextr(ecx, xmm2);
1946   TestPextr(edx, xmm3);
1947   TestPextr(esi, xmm4);
1948   TestPextr(edi, xmm5);
1949   TestPextr(eax, xmm6);
1950   TestPextr(ebx, xmm7);
1951 
1952 #undef TestPextr
1953 #undef TestPextrSize
1954 #undef TestPextrXmmGPRImm
1955 }
1956 
TEST_F(AssemblerX8632Test,Pcmpeq_Pcmpgt)1957 TEST_F(AssemblerX8632Test, Pcmpeq_Pcmpgt) {
1958 #define TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, Inst, Op)               \
1959   do {                                                                         \
1960     static constexpr char TestString[] =                                       \
1961         "(" #Dst ", " #Value0 ", " #Src ", " #Value1 ", " #Size ", " #Op ")";  \
1962     const uint32_t T0 = allocateDqword();                                      \
1963     const Dqword V0 Value0;                                                    \
1964     const uint32_t T1 = allocateDqword();                                      \
1965     const Dqword V1 Value1;                                                    \
1966                                                                                \
1967     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1968     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
1969     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                   \
1970             XmmRegister::Encoded_Reg_##Src);                                   \
1971                                                                                \
1972     AssembledTest test = assemble();                                           \
1973     test.setDqwordTo(T0, V0);                                                  \
1974     test.setDqwordTo(T1, V1);                                                  \
1975     test.run();                                                                \
1976                                                                                \
1977     Dqword Expected(uint64_t(0), uint64_t(0));                                 \
1978     static constexpr uint8_t ArraySize =                                       \
1979         sizeof(Dqword) / sizeof(uint##Size##_t);                               \
1980     for (uint8_t i = 0; i < ArraySize; ++i) {                                  \
1981       Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0;         \
1982     }                                                                          \
1983     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
1984     reset();                                                                   \
1985   } while (0)
1986 
1987 #define TestPcmpXmmAddr(Dst, Value0, Value1, Size, Inst, Op)                   \
1988   do {                                                                         \
1989     static constexpr char TestString[] =                                       \
1990         "(" #Dst ", " #Value0 ", Addr, " #Value1 ", " #Size ", " #Op ")";      \
1991     const uint32_t T0 = allocateDqword();                                      \
1992     const Dqword V0 Value0;                                                    \
1993     const uint32_t T1 = allocateDqword();                                      \
1994     const Dqword V1 Value1;                                                    \
1995                                                                                \
1996     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
1997     __ Inst(IceType_i##Size, XmmRegister::Encoded_Reg_##Dst,                   \
1998             dwordAddress(T1));                                                 \
1999                                                                                \
2000     AssembledTest test = assemble();                                           \
2001     test.setDqwordTo(T0, V0);                                                  \
2002     test.setDqwordTo(T1, V1);                                                  \
2003     test.run();                                                                \
2004                                                                                \
2005     Dqword Expected(uint64_t(0), uint64_t(0));                                 \
2006     static constexpr uint8_t ArraySize =                                       \
2007         sizeof(Dqword) / sizeof(uint##Size##_t);                               \
2008     for (uint8_t i = 0; i < ArraySize; ++i) {                                  \
2009       Expected.I##Size[i] = (V1.I##Size[i] Op V0.I##Size[i]) ? -1 : 0;         \
2010     }                                                                          \
2011     ASSERT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
2012     reset();                                                                   \
2013   } while (0)
2014 
2015 #define TestPcmpValues(Dst, Value0, Src, Value1, Size)                         \
2016   do {                                                                         \
2017     TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpeq, == );               \
2018     TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpeq, == );                   \
2019     TestPcmpXmmXmm(Dst, Value0, Src, Value1, Size, pcmpgt, < );                \
2020     TestPcmpXmmAddr(Dst, Value0, Value1, Size, pcmpgt, < );                    \
2021   } while (0)
2022 
2023 #define TestPcmpSize(Dst, Src, Size)                                           \
2024   do {                                                                         \
2025     TestPcmpValues(Dst, (uint64_t(0x8888888888888888ull),                      \
2026                          uint64_t(0x0000000000000000ull)),                     \
2027                    Src, (uint64_t(0x0000008800008800ull),                      \
2028                          uint64_t(0xFFFFFFFFFFFFFFFFull)),                     \
2029                    Size);                                                      \
2030     TestPcmpValues(Dst, (uint64_t(0x123567ABAB55DE01ull),                      \
2031                          uint64_t(0x12345abcde12345Aull)),                     \
2032                    Src, (uint64_t(0x0000008800008800ull),                      \
2033                          uint64_t(0xAABBCCDD1234321Aull)),                     \
2034                    Size);                                                      \
2035   } while (0)
2036 
2037 #define TestPcmp(Dst, Src)                                                     \
2038   do {                                                                         \
2039     TestPcmpSize(xmm0, xmm1, 8);                                               \
2040     TestPcmpSize(xmm0, xmm1, 16);                                              \
2041     TestPcmpSize(xmm0, xmm1, 32);                                              \
2042   } while (0)
2043 
2044   TestPcmp(xmm0, xmm1);
2045   TestPcmp(xmm1, xmm2);
2046   TestPcmp(xmm2, xmm3);
2047   TestPcmp(xmm3, xmm4);
2048   TestPcmp(xmm4, xmm5);
2049   TestPcmp(xmm5, xmm6);
2050   TestPcmp(xmm6, xmm7);
2051   TestPcmp(xmm7, xmm0);
2052 
2053 #undef TestPcmp
2054 #undef TestPcmpSize
2055 #undef TestPcmpValues
2056 #undef TestPcmpXmmAddr
2057 #undef TestPcmpXmmXmm
2058 }
2059 
TEST_F(AssemblerX8632Test,Roundsd)2060 TEST_F(AssemblerX8632Test, Roundsd) {
2061 #define TestRoundsdXmmXmm(Dst, Src, Mode, Input, RN)                           \
2062   do {                                                                         \
2063     static constexpr char TestString[] =                                       \
2064         "(" #Dst ", " #Src ", " #Mode ", " #Input ", " #RN ")";                \
2065     const uint32_t T0 = allocateDqword();                                      \
2066     const Dqword V0(-3.0, -3.0);                                               \
2067     const uint32_t T1 = allocateDqword();                                      \
2068     const Dqword V1(double(Input), -123.4);                                    \
2069                                                                                \
2070     __ movups(XmmRegister::Encoded_Reg_##Dst, dwordAddress(T0));               \
2071     __ movups(XmmRegister::Encoded_Reg_##Src, dwordAddress(T1));               \
2072     __ round(IceType_f64, XmmRegister::Encoded_Reg_##Dst,                      \
2073              XmmRegister::Encoded_Reg_##Src,                                   \
2074              Immediate(AssemblerX8632::k##Mode));                              \
2075                                                                                \
2076     AssembledTest test = assemble();                                           \
2077     test.setDqwordTo(T0, V0);                                                  \
2078     test.setDqwordTo(T1, V1);                                                  \
2079     test.run();                                                                \
2080                                                                                \
2081     const Dqword Expected(double(RN), -3.0);                                   \
2082     EXPECT_EQ(Expected, test.Dst<Dqword>()) << TestString;                     \
2083     reset();                                                                   \
2084   } while (0)
2085 
2086 #define TestRoundsd(Dst, Src)                                                  \
2087   do {                                                                         \
2088     TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.51, 6);                      \
2089     TestRoundsdXmmXmm(Dst, Src, RoundToNearest, 5.49, 5);                      \
2090     TestRoundsdXmmXmm(Dst, Src, RoundDown, 5.51, 5);                           \
2091     TestRoundsdXmmXmm(Dst, Src, RoundUp, 5.49, 6);                             \
2092     TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.49, 5);                         \
2093     TestRoundsdXmmXmm(Dst, Src, RoundToZero, 5.51, 5);                         \
2094   } while (0)
2095 
2096   TestRoundsd(xmm0, xmm1);
2097   TestRoundsd(xmm1, xmm2);
2098   TestRoundsd(xmm2, xmm3);
2099   TestRoundsd(xmm3, xmm4);
2100   TestRoundsd(xmm4, xmm5);
2101   TestRoundsd(xmm5, xmm6);
2102   TestRoundsd(xmm6, xmm7);
2103   TestRoundsd(xmm7, xmm0);
2104 
2105 #undef TestRoundsd
2106 #undef TestRoundsdXmmXmm
2107 }
2108 
TEST_F(AssemblerX8632Test,Set1ps)2109 TEST_F(AssemblerX8632Test, Set1ps) {
2110 #define TestImpl(Xmm, Src, Imm)                                                \
2111   do {                                                                         \
2112     __ set1ps(XmmRegister::Encoded_Reg_##Xmm, GPRRegister::Encoded_Reg_##Src,  \
2113               Immediate(Imm));                                                 \
2114                                                                                \
2115     AssembledTest test = assemble();                                           \
2116     test.run();                                                                \
2117                                                                                \
2118     const Dqword Expected((uint64_t(Imm) << 32) | uint32_t(Imm),               \
2119                           (uint64_t(Imm) << 32) | uint32_t(Imm));              \
2120     ASSERT_EQ(Expected, test.Xmm<Dqword>())                                    \
2121         << "(" #Xmm ", " #Src ", " #Imm ")";                                   \
2122     reset();                                                                   \
2123   } while (0)
2124 
2125   TestImpl(xmm0, ebx, 1);
2126   TestImpl(xmm1, ecx, 2);
2127   TestImpl(xmm2, edx, 3);
2128   TestImpl(xmm3, esi, 4);
2129   TestImpl(xmm4, edi, 5);
2130   TestImpl(xmm5, eax, 6);
2131   TestImpl(xmm6, ebx, 7);
2132   TestImpl(xmm7, ecx, 8);
2133 
2134 #undef TestImpl
2135 }
2136 
2137 } // end of anonymous namespace
2138 } // end of namespace Test
2139 } // end of namespace X8632
2140 } // end of namespace Ice
2141