1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <assert.h>
4 #include <math.h>
5 #include "tests/malloc.h"
6
7 typedef unsigned char UChar;
8 typedef unsigned int UInt;
9 typedef unsigned long int UWord;
10 typedef unsigned long long int ULong;
11 typedef double Double;
12 typedef float Float;
13
14 #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr)))
15
16 typedef union { UChar u8[16]; UInt u32[4]; Float f32[4]; Double f64[2]; } XMM;
17 typedef union { UChar u8[32]; UInt u32[8]; XMM xmm[2]; } YMM;
18 typedef struct { YMM r1; YMM r2; YMM r3; YMM r4; YMM m; } Block;
19
showFloat(XMM * vec,int idx)20 void showFloat ( XMM* vec, int idx )
21 {
22 Float f = vec->f32[idx];
23 int neg = signbit (f);
24 char sign = neg != 0 ? '-' : ' ';
25 switch (fpclassify (f)) {
26 case FP_NORMAL: {
27 for (int i = idx * 4 + 3; i >= idx * 4; i--)
28 printf("%02x", (UInt)vec->u8[i]);
29 break;
30 }
31 case FP_INFINITE: {
32 printf ("[ %cINF ]", sign);
33 break;
34 }
35 case FP_ZERO: {
36 printf ("[%cZERO ]", sign);
37 break;
38 }
39 case FP_NAN: {
40 printf ("[ NAN ]");
41 break;
42 }
43 default: {
44 printf ("[%cSUBNR]", sign);
45 break;
46 }
47 }
48 }
49
showDouble(XMM * vec,int idx)50 void showDouble ( XMM* vec, int idx )
51 {
52 Double d = vec->f64[idx];
53 int neg = signbit (d);
54 char sign = neg != 0 ? '-' : ' ';
55 switch (fpclassify (d)) {
56 case FP_NORMAL: {
57 for (int i = idx * 8 + 7; i >= idx * 8; i--)
58 printf("%02x", (UInt)vec->u8[i]);
59 break;
60 }
61 case FP_INFINITE: {
62 printf ("[ %cINF ]", sign);
63 break;
64 }
65 case FP_ZERO: {
66 printf ("[ %cZERO ]", sign);
67 break;
68 }
69 case FP_NAN: {
70 printf ("[ NAN ]");
71 break;
72 }
73 default: {
74 printf ("[ %cSUBNORMAL ]", sign);
75 break;
76 }
77 }
78 }
79
showXMM(XMM * vec,int isDouble)80 void showXMM ( XMM* vec, int isDouble )
81 {
82 if (isDouble) {
83 showDouble ( vec, 1 );
84 printf (".");
85 showDouble ( vec, 0 );
86 } else {
87 showFloat ( vec, 3 );
88 printf (".");
89 showFloat ( vec, 2 );
90 printf (".");
91 showFloat ( vec, 1 );
92 printf (".");
93 showFloat ( vec, 0 );
94 }
95 }
96
showYMM(YMM * vec,int isDouble)97 void showYMM ( YMM* vec, int isDouble )
98 {
99 assert(IS_32_ALIGNED(vec));
100 showXMM ( &vec->xmm[1], isDouble );
101 printf(".");
102 showXMM ( &vec->xmm[0], isDouble );
103 }
104
showBlock(char * msg,Block * block,int isDouble)105 void showBlock ( char* msg, Block* block, int isDouble )
106 {
107 printf(" %s\n", msg);
108 printf("r1: "); showYMM(&block->r1, isDouble); printf("\n");
109 printf("r2: "); showYMM(&block->r2, isDouble); printf("\n");
110 printf("r3: "); showYMM(&block->r3, isDouble); printf("\n");
111 printf("r4: "); showYMM(&block->r4, isDouble); printf("\n");
112 printf(" m: "); showYMM(&block->m, isDouble); printf("\n");
113 }
114
115 static Double special_values[10];
116
117 static __attribute__((noinline))
negate(Double d)118 Double negate ( Double d ) { return -d; }
119 static __attribute__((noinline))
divf64(Double x,Double y)120 Double divf64 ( Double x, Double y ) { return x/y; }
121
122 static __attribute__((noinline))
plusZero(void)123 Double plusZero ( void ) { return 0.0; }
124 static __attribute__((noinline))
minusZero(void)125 Double minusZero ( void ) { return negate(plusZero()); }
126
127 static __attribute__((noinline))
plusOne(void)128 Double plusOne ( void ) { return 1.0; }
129 static __attribute__((noinline))
minusOne(void)130 Double minusOne ( void ) { return negate(plusOne()); }
131
132 static __attribute__((noinline))
plusInf(void)133 Double plusInf ( void ) { return 1.0 / 0.0; }
134 static __attribute__((noinline))
minusInf(void)135 Double minusInf ( void ) { return negate(plusInf()); }
136
137 static __attribute__((noinline))
plusNaN(void)138 Double plusNaN ( void ) { return divf64(plusInf(),plusInf()); }
139 static __attribute__((noinline))
minusNaN(void)140 Double minusNaN ( void ) { return negate(plusNaN()); }
141
142 static __attribute__((noinline))
plusDenorm(void)143 Double plusDenorm ( void ) { return 1.23e-315 / 1e3; }
144 static __attribute__((noinline))
minusDenorm(void)145 Double minusDenorm ( void ) { return negate(plusDenorm()); }
146
init_special_values(void)147 static void init_special_values ( void )
148 {
149 special_values[0] = plusZero();
150 special_values[1] = minusZero();
151 special_values[2] = plusOne();
152 special_values[3] = minusOne();
153 special_values[4] = plusInf();
154 special_values[5] = minusInf();
155 special_values[6] = plusNaN();
156 special_values[7] = minusNaN();
157 special_values[8] = plusDenorm();
158 special_values[9] = minusDenorm();
159 }
160
specialFBlock(Block * b)161 void specialFBlock ( Block* b )
162 {
163 int i;
164 Float* p = (Float*)b;
165 for (i = 0; i < sizeof(Block) / sizeof(Float); i++)
166 p[i] = (Float) special_values[i % 10];
167 }
168
specialDBlock(Block * b)169 void specialDBlock ( Block* b )
170 {
171 int i;
172 Double* p = (Double*)b;
173 for (i = 0; i < sizeof(Block) / sizeof(Double); i++)
174 p[i] = special_values[i % 10];
175 }
176
randUChar(void)177 UChar randUChar ( void )
178 {
179 static UInt seed = 80021;
180 seed = 1103515245 * seed + 12345;
181 return (seed >> 17) & 0xFF;
182 }
183
randBlock(Block * b)184 void randBlock ( Block* b )
185 {
186 int i;
187 UChar* p = (UChar*)b;
188 for (i = 0; i < sizeof(Block); i++)
189 p[i] = randUChar();
190 }
191
oneBlock(Block * b)192 void oneBlock ( Block* b )
193 {
194 int i;
195 UChar* p = (UChar*)b;
196 for (i = 0; i < sizeof(Block); i++)
197 p[i] = 1;
198 }
199
200 #define GEN_test(_name, _instr, _isD) \
201 __attribute__ ((noinline)) void \
202 test_##_name ( const char *n, Block* b) \
203 { \
204 printf("%s %s\n", #_name, n); \
205 showBlock("before", b, _isD); \
206 __asm__ __volatile__( \
207 "vmovdqa 0(%0),%%ymm7" "\n\t" \
208 "vmovdqa 32(%0),%%ymm8" "\n\t" \
209 "vmovdqa 64(%0),%%ymm6" "\n\t" \
210 "vmovdqa 96(%0),%%ymm9" "\n\t" \
211 "leaq 128(%0),%%r14" "\n\t" \
212 _instr "\n\t" \
213 "vmovdqa %%ymm7, 0(%0)" "\n\t" \
214 "vmovdqa %%ymm8, 32(%0)" "\n\t" \
215 "vmovdqa %%ymm6, 64(%0)" "\n\t" \
216 "vmovdqa %%ymm9, 96(%0)" "\n\t" \
217 : /*OUT*/ \
218 : /*IN*/"r"(b) \
219 : /*TRASH*/"xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \
220 ); \
221 showBlock("after", b, _isD); \
222 printf("\n"); \
223 }
224
225 /* All these defines do the same thing (and someone with stronger
226 preprocessor foo could probably express things much smaller).
227 They generate 4 different functions to test 4 variants of an
228 fma4 instruction. One with as input 4 registers, one where
229 the output register is also one of the input registers and
230 two versions where different inputs are a memory location.
231 The xmm variants create 128 versions, the ymm variants 256. */
232
233 #define GEN_test_VFMADDPD_xmm(_name) \
234 GEN_test(_name##_xmm, \
235 "vfmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
236 GEN_test(_name##_xmm_src_dst, \
237 "vfmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
238 GEN_test(_name##_xmm_mem1, \
239 "vfmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
240 GEN_test(_name##_xmm_mem2, \
241 "vfmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
242 GEN_test_VFMADDPD_xmm(VFMADDPD)
243
244 #define GEN_test_VFMADDPD_ymm(_name) \
245 GEN_test(_name##_ymm, \
246 "vfmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
247 GEN_test(_name##_ymm_src_dst, \
248 "vfmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
249 GEN_test(_name##_ymm_mem1, \
250 "vfmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
251 GEN_test(_name##_ymm_mem2, \
252 "vfmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
GEN_test_VFMADDPD_ymm(VFMADDPD)253 GEN_test_VFMADDPD_ymm(VFMADDPD)
254
255 #define GEN_test_VFMADDPS_xmm(_name) \
256 GEN_test(_name##_xmm, \
257 "vfmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
258 GEN_test(_name##_xmm_src_dst, \
259 "vfmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
260 GEN_test(_name##_xmm_mem1, \
261 "vfmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
262 GEN_test(_name##_xmm_mem2, \
263 "vfmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
264 GEN_test_VFMADDPS_xmm(VFMADDPS)
265
266 #define GEN_test_VFMADDPS_ymm(_name) \
267 GEN_test(_name##_ymm, \
268 "vfmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
269 GEN_test(_name##_ymm_src_dst, \
270 "vfmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
271 GEN_test(_name##_ymm_mem1, \
272 "vfmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
273 GEN_test(_name##_ymm_mem2, \
274 "vfmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
275 GEN_test_VFMADDPS_ymm(VFMADDPS)
276
277 #define GEN_test_VFMADDSD_xmm(_name) \
278 GEN_test(_name##_xmm, \
279 "vfmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
280 GEN_test(_name##_xmm_src_dst, \
281 "vfmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
282 GEN_test(_name##_xmm_mem1, \
283 "vfmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
284 GEN_test(_name##_xmm_mem2, \
285 "vfmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
286 GEN_test_VFMADDSD_xmm(VFMADDSD)
287
288 #define GEN_test_VFMADDSS_xmm(_name) \
289 GEN_test(_name##_xmm, \
290 "vfmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
291 GEN_test(_name##_xmm_src_dst, \
292 "vfmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
293 GEN_test(_name##_xmm_mem1, \
294 "vfmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
295 GEN_test(_name##_xmm_mem2, \
296 "vfmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
297 GEN_test_VFMADDSS_xmm(VFMADDSS)
298
299 #define GEN_test_VFMADDSUBPD_xmm(_name) \
300 GEN_test(_name##_xmm, \
301 "vfmaddsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
302 GEN_test(_name##_xmm_src_dst, \
303 "vfmaddsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
304 GEN_test(_name##_xmm_mem1, \
305 "vfmaddsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
306 GEN_test(_name##_xmm_mem2, \
307 "vfmaddsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
308 GEN_test_VFMADDSUBPD_xmm(VFMADDSUBPD)
309
310 #define GEN_test_VFMADDSUBPD_ymm(_name) \
311 GEN_test(_name##_ymm, \
312 "vfmaddsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
313 GEN_test(_name##_ymm_src_dst, \
314 "vfmaddsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
315 GEN_test(_name##_ymm_mem1, \
316 "vfmaddsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
317 GEN_test(_name##_ymm_mem2, \
318 "vfmaddsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
319 GEN_test_VFMADDSUBPD_ymm(VFMADDSUBPD)
320
321 #define GEN_test_VFMADDSUBPS_xmm(_name) \
322 GEN_test(_name##_xmm, \
323 "vfmaddsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
324 GEN_test(_name##_xmm_src_dst, \
325 "vfmaddsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
326 GEN_test(_name##_xmm_mem1, \
327 "vfmaddsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
328 GEN_test(_name##_xmm_mem2, \
329 "vfmaddsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
330 GEN_test_VFMADDSUBPS_xmm(VFMADDSUBPS)
331
332 #define GEN_test_VFMADDSUBPS_ymm(_name) \
333 GEN_test(_name##_ymm, \
334 "vfmaddsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
335 GEN_test(_name##_ymm_src_dst, \
336 "vfmaddsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
337 GEN_test(_name##_ymm_mem1, \
338 "vfmaddsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
339 GEN_test(_name##_ymm_mem2, \
340 "vfmaddsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
341 GEN_test_VFMADDSUBPS_ymm(VFMADDSUBPS)
342
343 #define GEN_test_VFMSUBADDPD_xmm(_name) \
344 GEN_test(_name##_xmm, \
345 "vfmsubaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
346 GEN_test(_name##_xmm_src_dst, \
347 "vfmsubaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
348 GEN_test(_name##_xmm_mem1, \
349 "vfmsubaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
350 GEN_test(_name##_xmm_mem2, \
351 "vfmsubaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
352 GEN_test_VFMSUBADDPD_xmm(VFMSUBADDPD)
353
354 #define GEN_test_VFMSUBADDPD_ymm(_name) \
355 GEN_test(_name##_ymm, \
356 "vfmsubaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
357 GEN_test(_name##_ymm_src_dst, \
358 "vfmsubaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
359 GEN_test(_name##_ymm_mem1, \
360 "vfmsubaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
361 GEN_test(_name##_ymm_mem2, \
362 "vfmsubaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
363 GEN_test_VFMSUBADDPD_ymm(VFMSUBADDPD)
364
365 #define GEN_test_VFMSUBADDPS_xmm(_name) \
366 GEN_test(_name##_xmm, \
367 "vfmsubaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
368 GEN_test(_name##_xmm_src_dst, \
369 "vfmsubaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
370 GEN_test(_name##_xmm_mem1, \
371 "vfmsubaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
372 GEN_test(_name##_xmm_mem2, \
373 "vfmsubaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
374 GEN_test_VFMSUBADDPS_xmm(VFMSUBADDPS)
375
376 #define GEN_test_VFMSUBADDPS_ymm(_name) \
377 GEN_test(_name##_ymm, \
378 "vfmsubaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
379 GEN_test(_name##_ymm_src_dst, \
380 "vfmsubaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
381 GEN_test(_name##_ymm_mem1, \
382 "vfmsubaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
383 GEN_test(_name##_ymm_mem2, \
384 "vfmsubaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
385 GEN_test_VFMSUBADDPS_ymm(VFMSUBADDPS)
386
387 #define GEN_test_VFMSUBPD_xmm(_name) \
388 GEN_test(_name##_xmm, \
389 "vfmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
390 GEN_test(_name##_xmm_src_dst, \
391 "vfmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
392 GEN_test(_name##_xmm_mem1, \
393 "vfmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
394 GEN_test(_name##_xmm_mem2, \
395 "vfmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
396 GEN_test_VFMSUBPD_xmm(VFMSUBPD)
397
398 #define GEN_test_VFMSUBPD_ymm(_name) \
399 GEN_test(_name##_ymm, \
400 "vfmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
401 GEN_test(_name##_ymm_src_dst, \
402 "vfmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
403 GEN_test(_name##_ymm_mem1, \
404 "vfmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
405 GEN_test(_name##_ymm_mem2, \
406 "vfmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
407 GEN_test_VFMSUBPD_ymm(VFMSUBPD)
408
409 #define GEN_test_VFMSUBPS_xmm(_name) \
410 GEN_test(_name##_xmm, \
411 "vfmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
412 GEN_test(_name##_xmm_src_dst, \
413 "vfmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
414 GEN_test(_name##_xmm_mem1, \
415 "vfmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
416 GEN_test(_name##_xmm_mem2, \
417 "vfmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
418 GEN_test_VFMSUBPS_xmm(VFMSUBPS)
419
420 #define GEN_test_VFMSUBPS_ymm(_name) \
421 GEN_test(_name##_ymm, \
422 "vfmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
423 GEN_test(_name##_ymm_src_dst, \
424 "vfmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
425 GEN_test(_name##_ymm_mem1, \
426 "vfmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
427 GEN_test(_name##_ymm_mem2, \
428 "vfmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
429 GEN_test_VFMSUBPS_ymm(VFMSUBPS)
430
431 #define GEN_test_VFMSUBSD_xmm(_name) \
432 GEN_test(_name##_xmm, \
433 "vfmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
434 GEN_test(_name##_xmm_src_dst, \
435 "vfmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
436 GEN_test(_name##_xmm_mem1, \
437 "vfmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
438 GEN_test(_name##_xmm_mem2, \
439 "vfmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
440 GEN_test_VFMSUBSD_xmm(VFMSUBSD)
441
442 #define GEN_test_VFMSUBSS_xmm(_name) \
443 GEN_test(_name##_xmm, \
444 "vfmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
445 GEN_test(_name##_xmm_src_dst, \
446 "vfmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
447 GEN_test(_name##_xmm_mem1, \
448 "vfmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
449 GEN_test(_name##_xmm_mem2, \
450 "vfmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
451 GEN_test_VFMSUBSS_xmm(VFMSUBSS)
452
453 #define GEN_test_VFNMADDPD_xmm(_name) \
454 GEN_test(_name##_xmm, \
455 "vfnmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
456 GEN_test(_name##_xmm_src_dst, \
457 "vfnmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
458 GEN_test(_name##_xmm_mem1, \
459 "vfnmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
460 GEN_test(_name##_xmm_mem2, \
461 "vfnmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
462 GEN_test_VFNMADDPD_xmm(VFNMADDPD)
463
464 #define GEN_test_VFNMADDPD_ymm(_name) \
465 GEN_test(_name##_ymm, \
466 "vfnmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
467 GEN_test(_name##_ymm_src_dst, \
468 "vfnmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
469 GEN_test(_name##_ymm_mem1, \
470 "vfnmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
471 GEN_test(_name##_ymm_mem2, \
472 "vfnmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
473 GEN_test_VFNMADDPD_ymm(VFNMADDPD)
474
475 #define GEN_test_VFNMADDPS_xmm(_name) \
476 GEN_test(_name##_xmm, \
477 "vfnmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
478 GEN_test(_name##_xmm_src_dst, \
479 "vfnmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
480 GEN_test(_name##_xmm_mem1, \
481 "vfnmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
482 GEN_test(_name##_xmm_mem2, \
483 "vfnmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
484 GEN_test_VFNMADDPS_xmm(VFNMADDPS)
485
486 #define GEN_test_VFNMADDPS_ymm(_name) \
487 GEN_test(_name##_ymm, \
488 "vfnmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
489 GEN_test(_name##_ymm_src_dst, \
490 "vfnmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
491 GEN_test(_name##_ymm_mem1, \
492 "vfnmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
493 GEN_test(_name##_ymm_mem2, \
494 "vfnmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
495 GEN_test_VFNMADDPS_ymm(VFNMADDPS)
496
497 #define GEN_test_VFNMADDSD_xmm(_name) \
498 GEN_test(_name##_xmm, \
499 "vfnmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
500 GEN_test(_name##_xmm_src_dst, \
501 "vfnmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
502 GEN_test(_name##_xmm_mem1, \
503 "vfnmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
504 GEN_test(_name##_xmm_mem2, \
505 "vfnmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
506 GEN_test_VFNMADDSD_xmm(VFNMADDSD)
507
508 #define GEN_test_VFNMADDSS_xmm(_name) \
509 GEN_test(_name##_xmm, \
510 "vfnmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
511 GEN_test(_name##_xmm_src_dst, \
512 "vfnmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
513 GEN_test(_name##_xmm_mem1, \
514 "vfnmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
515 GEN_test(_name##_xmm_mem2, \
516 "vfnmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
517 GEN_test_VFNMADDSS_xmm(VFNMADDSS)
518
519 #define GEN_test_VFNMSUBPD_xmm(_name) \
520 GEN_test(_name##_xmm, \
521 "vfnmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
522 GEN_test(_name##_xmm_src_dst, \
523 "vfnmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
524 GEN_test(_name##_xmm_mem1, \
525 "vfnmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
526 GEN_test(_name##_xmm_mem2, \
527 "vfnmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
528 GEN_test_VFNMSUBPD_xmm(VFNMSUBPD)
529
530 #define GEN_test_VFNMSUBPD_ymm(_name) \
531 GEN_test(_name##_ymm, \
532 "vfnmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
533 GEN_test(_name##_ymm_src_dst, \
534 "vfnmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
535 GEN_test(_name##_ymm_mem1, \
536 "vfnmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
537 GEN_test(_name##_ymm_mem2, \
538 "vfnmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
539 GEN_test_VFNMSUBPD_ymm(VFNMSUBPD)
540
541 #define GEN_test_VFNMSUBPS_xmm(_name) \
542 GEN_test(_name##_xmm, \
543 "vfnmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
544 GEN_test(_name##_xmm_src_dst, \
545 "vfnmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
546 GEN_test(_name##_xmm_mem1, \
547 "vfnmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
548 GEN_test(_name##_xmm_mem2, \
549 "vfnmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
550 GEN_test_VFNMSUBPS_xmm(VFNMSUBPS)
551
552 #define GEN_test_VFNMSUBPS_ymm(_name) \
553 GEN_test(_name##_ymm, \
554 "vfnmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
555 GEN_test(_name##_ymm_src_dst, \
556 "vfnmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
557 GEN_test(_name##_ymm_mem1, \
558 "vfnmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
559 GEN_test(_name##_ymm_mem2, \
560 "vfnmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
561 GEN_test_VFNMSUBPS_ymm(VFNMSUBPS)
562
563 #define GEN_test_VFNMSUBSD_xmm(_name) \
564 GEN_test(_name##_xmm, \
565 "vfnmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
566 GEN_test(_name##_xmm_src_dst, \
567 "vfnmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
568 GEN_test(_name##_xmm_mem1, \
569 "vfnmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
570 GEN_test(_name##_xmm_mem2, \
571 "vfnmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
572 GEN_test_VFNMSUBSD_xmm(VFNMSUBSD)
573
574 #define GEN_test_VFNMSUBSS_xmm(_name) \
575 GEN_test(_name##_xmm, \
576 "vfnmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
577 GEN_test(_name##_xmm_src_dst, \
578 "vfnmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
579 GEN_test(_name##_xmm_mem1, \
580 "vfnmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
581 GEN_test(_name##_xmm_mem2, \
582 "vfnmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
583 GEN_test_VFNMSUBSS_xmm(VFNMSUBSS)
584
585 #define DO_test_block(_name, _sub, _bname, _block) \
586 test_##_name##_##_sub(_bname,_block);
587
588 #define DO_test(_name, _sub, _isD) { \
589 Block* b = memalign32(sizeof(Block)); \
590 oneBlock(b); \
591 DO_test_block(_name, _sub, "ones", b); \
592 if (_isD) { \
593 specialDBlock(b); \
594 DO_test_block(_name, _sub, "specialD", b); \
595 } else { \
596 specialFBlock(b); \
597 DO_test_block(_name, _sub, "specialF", b); \
598 } \
599 randBlock(b); \
600 DO_test_block(_name, _sub, "rand", b); \
601 free(b); \
602 }
603
604 #define DO_tests_xmm(_name,_isD) \
605 DO_test(_name, xmm, _isD); \
606 DO_test(_name, xmm_src_dst, _isD); \
607 DO_test(_name, xmm_mem1, _isD); \
608 DO_test(_name, xmm_mem2, _isD);
609
610 #define DO_tests_ymm(_name,_isD) \
611 DO_test(_name, ymm, _isD); \
612 DO_test(_name, ymm_src_dst, _isD); \
613 DO_test(_name, ymm_mem1, _isD); \
614 DO_test(_name, ymm_mem2, _isD);
615
616 int main ( void )
617 {
618 init_special_values();
619
620 // 128
621 DO_tests_xmm(VFMADDPD, 1);
622 DO_tests_xmm(VFMADDPS, 0);
623 DO_tests_xmm(VFMADDSD, 1);
624 DO_tests_xmm(VFMADDSS, 0);
625 DO_tests_xmm(VFMADDSUBPD, 1);
626 DO_tests_xmm(VFMADDSUBPS, 0);
627 DO_tests_xmm(VFMSUBADDPD, 1);
628 DO_tests_xmm(VFMSUBADDPS, 0);
629 DO_tests_xmm(VFMSUBPD, 1);
630 DO_tests_xmm(VFMSUBPS, 0);
631 DO_tests_xmm(VFMSUBSD, 1);
632 DO_tests_xmm(VFMSUBSS, 0);
633 DO_tests_xmm(VFNMADDPD, 1);
634 DO_tests_xmm(VFNMADDPS, 0);
635 DO_tests_xmm(VFNMADDSD, 1);
636 DO_tests_xmm(VFNMADDSS, 0);
637 DO_tests_xmm(VFNMSUBPD, 1);
638 DO_tests_xmm(VFNMSUBPS, 0);
639 DO_tests_xmm(VFNMSUBSD, 1);
640 DO_tests_xmm(VFNMSUBSS, 0);
641
642 // 256
643 /*
644 DO_tests_ymm(VFMADDPD, 1);
645 DO_tests_ymm(VFMADDPS, 0);
646 DO_tests_ymm(VFMADDSUBPD, 1);
647 DO_tests_ymm(VFMADDSUBPS, 0);
648 DO_tests_ymm(VFMSUBADDPD, 1);
649 DO_tests_ymm(VFMSUBADDPS, 0);
650 DO_tests_ymm(VFMSUBPD, 1);
651 DO_tests_ymm(VFMSUBPS, 0);
652 DO_tests_ymm(VFNMADDPD, 1);
653 DO_tests_ymm(VFNMADDPS, 0);
654 DO_tests_ymm(VFNMSUBPD, 1);
655 DO_tests_ymm(VFNMSUBPS, 0);
656 */
657
658 return 0;
659 }
660