• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <assert.h>
4 #include <math.h>
5 #include "tests/malloc.h"
6 
7 typedef  unsigned char           UChar;
8 typedef  unsigned int            UInt;
9 typedef  unsigned long int       UWord;
10 typedef  unsigned long long int  ULong;
11 typedef  double                  Double;
12 typedef  float                   Float;
13 
14 #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr)))
15 
16 typedef  union { UChar u8[16]; UInt u32[4]; Float f32[4]; Double f64[2]; } XMM;
17 typedef  union { UChar u8[32]; UInt u32[8]; XMM xmm[2]; }  YMM;
18 typedef  struct {  YMM r1; YMM r2; YMM r3; YMM r4; YMM m; }  Block;
19 
showFloat(XMM * vec,int idx)20 void showFloat ( XMM* vec, int idx )
21 {
22    Float f = vec->f32[idx];
23    int neg = signbit (f);
24    char sign = neg != 0 ? '-' : ' ';
25    switch (fpclassify (f)) {
26       case FP_NORMAL: {
27          for (int i = idx * 4 + 3; i >= idx * 4; i--)
28             printf("%02x", (UInt)vec->u8[i]);
29          break;
30       }
31       case FP_INFINITE: {
32          printf ("[ %cINF ]", sign);
33          break;
34       }
35       case FP_ZERO: {
36          printf ("[%cZERO ]", sign);
37          break;
38       }
39       case FP_NAN: {
40          printf ("[  NAN ]");
41          break;
42       }
43       default: {
44          printf ("[%cSUBNR]", sign);
45          break;
46       }
47    }
48 }
49 
showDouble(XMM * vec,int idx)50 void showDouble ( XMM* vec, int idx )
51 {
52    Double d = vec->f64[idx];
53    int neg = signbit (d);
54    char sign = neg != 0 ? '-' : ' ';
55    switch (fpclassify (d)) {
56       case FP_NORMAL: {
57          for (int i = idx * 8 + 7; i >= idx * 8; i--)
58             printf("%02x", (UInt)vec->u8[i]);
59          break;
60       }
61       case FP_INFINITE: {
62          printf ("[     %cINF     ]", sign);
63          break;
64       }
65       case FP_ZERO: {
66          printf ("[    %cZERO     ]", sign);
67          break;
68       }
69       case FP_NAN: {
70          printf ("[      NAN     ]");
71          break;
72       }
73       default: {
74          printf ("[  %cSUBNORMAL  ]", sign);
75          break;
76       }
77    }
78 }
79 
showXMM(XMM * vec,int isDouble)80 void showXMM ( XMM* vec, int isDouble )
81 {
82    if (isDouble) {
83      showDouble ( vec, 1 );
84      printf (".");
85      showDouble ( vec, 0 );
86    } else {
87      showFloat ( vec, 3 );
88      printf (".");
89      showFloat ( vec, 2 );
90      printf (".");
91      showFloat ( vec, 1 );
92      printf (".");
93      showFloat ( vec, 0 );
94    }
95 }
96 
showYMM(YMM * vec,int isDouble)97 void showYMM ( YMM* vec, int isDouble )
98 {
99    assert(IS_32_ALIGNED(vec));
100    showXMM ( &vec->xmm[1], isDouble );
101    printf(".");
102    showXMM ( &vec->xmm[0], isDouble );
103 }
104 
showBlock(char * msg,Block * block,int isDouble)105 void showBlock ( char* msg, Block* block, int isDouble )
106 {
107    printf("  %s\n", msg);
108    printf("r1: "); showYMM(&block->r1, isDouble); printf("\n");
109    printf("r2: "); showYMM(&block->r2, isDouble); printf("\n");
110    printf("r3: "); showYMM(&block->r3, isDouble); printf("\n");
111    printf("r4: "); showYMM(&block->r4, isDouble); printf("\n");
112    printf(" m: "); showYMM(&block->m, isDouble); printf("\n");
113 }
114 
115 static Double special_values[10];
116 
117 static __attribute__((noinline))
negate(Double d)118 Double negate ( Double d ) { return -d; }
119 static __attribute__((noinline))
divf64(Double x,Double y)120 Double divf64 ( Double x, Double y ) { return x/y; }
121 
122 static __attribute__((noinline))
plusZero(void)123 Double plusZero  ( void ) { return 0.0; }
124 static __attribute__((noinline))
minusZero(void)125 Double minusZero ( void ) { return negate(plusZero()); }
126 
127 static __attribute__((noinline))
plusOne(void)128 Double plusOne  ( void ) { return 1.0; }
129 static __attribute__((noinline))
minusOne(void)130 Double minusOne ( void ) { return negate(plusOne()); }
131 
132 static __attribute__((noinline))
plusInf(void)133 Double plusInf   ( void ) { return 1.0 / 0.0; }
134 static __attribute__((noinline))
minusInf(void)135 Double minusInf  ( void ) { return negate(plusInf()); }
136 
137 static __attribute__((noinline))
plusNaN(void)138 Double plusNaN  ( void ) { return divf64(plusInf(),plusInf()); }
139 static __attribute__((noinline))
minusNaN(void)140 Double minusNaN ( void ) { return negate(plusNaN()); }
141 
142 static __attribute__((noinline))
plusDenorm(void)143 Double plusDenorm  ( void ) { return 1.23e-315 / 1e3; }
144 static __attribute__((noinline))
minusDenorm(void)145 Double minusDenorm ( void ) { return negate(plusDenorm()); }
146 
init_special_values(void)147 static void init_special_values ( void )
148 {
149    special_values[0] = plusZero();
150    special_values[1] = minusZero();
151    special_values[2] = plusOne();
152    special_values[3] = minusOne();
153    special_values[4] = plusInf();
154    special_values[5] = minusInf();
155    special_values[6] = plusNaN();
156    special_values[7] = minusNaN();
157    special_values[8] = plusDenorm();
158    special_values[9] = minusDenorm();
159 }
160 
specialFBlock(Block * b)161 void specialFBlock ( Block* b )
162 {
163    int i;
164    Float* p = (Float*)b;
165    for (i = 0; i < sizeof(Block) / sizeof(Float); i++)
166       p[i] = (Float) special_values[i % 10];
167 }
168 
specialDBlock(Block * b)169 void specialDBlock ( Block* b )
170 {
171    int i;
172    Double* p = (Double*)b;
173    for (i = 0; i < sizeof(Block) / sizeof(Double); i++)
174       p[i] = special_values[i % 10];
175 }
176 
randUChar(void)177 UChar randUChar ( void )
178 {
179    static UInt seed = 80021;
180    seed = 1103515245 * seed + 12345;
181    return (seed >> 17) & 0xFF;
182 }
183 
randBlock(Block * b)184 void randBlock ( Block* b )
185 {
186    int i;
187    UChar* p = (UChar*)b;
188    for (i = 0; i < sizeof(Block); i++)
189       p[i] = randUChar();
190 }
191 
oneBlock(Block * b)192 void oneBlock ( Block* b )
193 {
194    int i;
195    UChar* p = (UChar*)b;
196    for (i = 0; i < sizeof(Block); i++)
197       p[i] = 1;
198 }
199 
200 #define GEN_test(_name, _instr, _isD) \
201    __attribute__ ((noinline)) void \
202    test_##_name ( const char *n, Block* b) \
203    { \
204       printf("%s %s\n", #_name, n); \
205       showBlock("before", b, _isD); \
206       __asm__ __volatile__( \
207           "vmovdqa   0(%0),%%ymm7"  "\n\t" \
208           "vmovdqa  32(%0),%%ymm8"  "\n\t" \
209           "vmovdqa  64(%0),%%ymm6"  "\n\t" \
210           "vmovdqa  96(%0),%%ymm9"  "\n\t" \
211           "leaq    128(%0),%%r14"   "\n\t" \
212           _instr "\n\t" \
213           "vmovdqa %%ymm7,  0(%0)"  "\n\t" \
214           "vmovdqa %%ymm8, 32(%0)"  "\n\t" \
215           "vmovdqa %%ymm6, 64(%0)"  "\n\t" \
216           "vmovdqa %%ymm9, 96(%0)"  "\n\t" \
217           : /*OUT*/  \
218           : /*IN*/"r"(b) \
219           : /*TRASH*/"xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \
220        ); \
221        showBlock("after", b, _isD); \
222        printf("\n"); \
223     }
224 
225 /* All these defines do the same thing (and someone with stronger
226    preprocessor foo could probably express things much smaller).
227    They generate 4 different functions to test 4 variants of an
228    fma4 instruction. One with as input 4 registers, one where
229    the output register is also one of the input registers and
230    two versions where different inputs are a memory location.
231    The xmm variants create 128 versions, the ymm variants 256. */
232 
233 #define GEN_test_VFMADDPD_xmm(_name) \
234    GEN_test(_name##_xmm, \
235             "vfmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
236    GEN_test(_name##_xmm_src_dst, \
237             "vfmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
238    GEN_test(_name##_xmm_mem1, \
239             "vfmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
240    GEN_test(_name##_xmm_mem2, \
241             "vfmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
242 GEN_test_VFMADDPD_xmm(VFMADDPD)
243 
244 #define GEN_test_VFMADDPD_ymm(_name) \
245    GEN_test(_name##_ymm, \
246             "vfmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
247    GEN_test(_name##_ymm_src_dst, \
248             "vfmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
249    GEN_test(_name##_ymm_mem1, \
250             "vfmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
251    GEN_test(_name##_ymm_mem2, \
252             "vfmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
GEN_test_VFMADDPD_ymm(VFMADDPD)253 GEN_test_VFMADDPD_ymm(VFMADDPD)
254 
255 #define GEN_test_VFMADDPS_xmm(_name) \
256    GEN_test(_name##_xmm, \
257             "vfmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
258    GEN_test(_name##_xmm_src_dst, \
259             "vfmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
260    GEN_test(_name##_xmm_mem1, \
261             "vfmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
262    GEN_test(_name##_xmm_mem2, \
263             "vfmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
264 GEN_test_VFMADDPS_xmm(VFMADDPS)
265 
266 #define GEN_test_VFMADDPS_ymm(_name) \
267    GEN_test(_name##_ymm, \
268             "vfmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
269    GEN_test(_name##_ymm_src_dst, \
270             "vfmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
271    GEN_test(_name##_ymm_mem1, \
272             "vfmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
273    GEN_test(_name##_ymm_mem2, \
274             "vfmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
275 GEN_test_VFMADDPS_ymm(VFMADDPS)
276 
277 #define GEN_test_VFMADDSD_xmm(_name) \
278    GEN_test(_name##_xmm, \
279             "vfmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
280    GEN_test(_name##_xmm_src_dst, \
281             "vfmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
282    GEN_test(_name##_xmm_mem1, \
283             "vfmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
284    GEN_test(_name##_xmm_mem2, \
285             "vfmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
286 GEN_test_VFMADDSD_xmm(VFMADDSD)
287 
288 #define GEN_test_VFMADDSS_xmm(_name) \
289    GEN_test(_name##_xmm, \
290             "vfmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
291    GEN_test(_name##_xmm_src_dst, \
292             "vfmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
293    GEN_test(_name##_xmm_mem1, \
294             "vfmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
295    GEN_test(_name##_xmm_mem2, \
296             "vfmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
297 GEN_test_VFMADDSS_xmm(VFMADDSS)
298 
299 #define GEN_test_VFMADDSUBPD_xmm(_name) \
300    GEN_test(_name##_xmm, \
301             "vfmaddsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
302    GEN_test(_name##_xmm_src_dst, \
303             "vfmaddsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
304    GEN_test(_name##_xmm_mem1, \
305             "vfmaddsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
306    GEN_test(_name##_xmm_mem2, \
307             "vfmaddsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
308 GEN_test_VFMADDSUBPD_xmm(VFMADDSUBPD)
309 
310 #define GEN_test_VFMADDSUBPD_ymm(_name) \
311    GEN_test(_name##_ymm, \
312             "vfmaddsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
313    GEN_test(_name##_ymm_src_dst, \
314             "vfmaddsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
315    GEN_test(_name##_ymm_mem1, \
316             "vfmaddsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
317    GEN_test(_name##_ymm_mem2, \
318             "vfmaddsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
319 GEN_test_VFMADDSUBPD_ymm(VFMADDSUBPD)
320 
321 #define GEN_test_VFMADDSUBPS_xmm(_name) \
322    GEN_test(_name##_xmm, \
323             "vfmaddsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
324    GEN_test(_name##_xmm_src_dst, \
325             "vfmaddsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
326    GEN_test(_name##_xmm_mem1, \
327             "vfmaddsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
328    GEN_test(_name##_xmm_mem2, \
329             "vfmaddsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
330 GEN_test_VFMADDSUBPS_xmm(VFMADDSUBPS)
331 
332 #define GEN_test_VFMADDSUBPS_ymm(_name) \
333    GEN_test(_name##_ymm, \
334             "vfmaddsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
335    GEN_test(_name##_ymm_src_dst, \
336             "vfmaddsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
337    GEN_test(_name##_ymm_mem1, \
338             "vfmaddsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
339    GEN_test(_name##_ymm_mem2, \
340             "vfmaddsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
341 GEN_test_VFMADDSUBPS_ymm(VFMADDSUBPS)
342 
343 #define GEN_test_VFMSUBADDPD_xmm(_name) \
344    GEN_test(_name##_xmm, \
345             "vfmsubaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
346    GEN_test(_name##_xmm_src_dst, \
347             "vfmsubaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
348    GEN_test(_name##_xmm_mem1, \
349             "vfmsubaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
350    GEN_test(_name##_xmm_mem2, \
351             "vfmsubaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
352 GEN_test_VFMSUBADDPD_xmm(VFMSUBADDPD)
353 
354 #define GEN_test_VFMSUBADDPD_ymm(_name) \
355    GEN_test(_name##_ymm, \
356             "vfmsubaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
357    GEN_test(_name##_ymm_src_dst, \
358             "vfmsubaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
359    GEN_test(_name##_ymm_mem1, \
360             "vfmsubaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
361    GEN_test(_name##_ymm_mem2, \
362             "vfmsubaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
363 GEN_test_VFMSUBADDPD_ymm(VFMSUBADDPD)
364 
365 #define GEN_test_VFMSUBADDPS_xmm(_name) \
366    GEN_test(_name##_xmm, \
367             "vfmsubaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
368    GEN_test(_name##_xmm_src_dst, \
369             "vfmsubaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
370    GEN_test(_name##_xmm_mem1, \
371             "vfmsubaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
372    GEN_test(_name##_xmm_mem2, \
373             "vfmsubaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
374 GEN_test_VFMSUBADDPS_xmm(VFMSUBADDPS)
375 
376 #define GEN_test_VFMSUBADDPS_ymm(_name) \
377    GEN_test(_name##_ymm, \
378             "vfmsubaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
379    GEN_test(_name##_ymm_src_dst, \
380             "vfmsubaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
381    GEN_test(_name##_ymm_mem1, \
382             "vfmsubaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
383    GEN_test(_name##_ymm_mem2, \
384             "vfmsubaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
385 GEN_test_VFMSUBADDPS_ymm(VFMSUBADDPS)
386 
387 #define GEN_test_VFMSUBPD_xmm(_name) \
388    GEN_test(_name##_xmm, \
389             "vfmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
390    GEN_test(_name##_xmm_src_dst, \
391             "vfmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
392    GEN_test(_name##_xmm_mem1, \
393             "vfmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
394    GEN_test(_name##_xmm_mem2, \
395             "vfmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
396 GEN_test_VFMSUBPD_xmm(VFMSUBPD)
397 
398 #define GEN_test_VFMSUBPD_ymm(_name) \
399    GEN_test(_name##_ymm, \
400             "vfmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
401    GEN_test(_name##_ymm_src_dst, \
402             "vfmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
403    GEN_test(_name##_ymm_mem1, \
404             "vfmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
405    GEN_test(_name##_ymm_mem2, \
406             "vfmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
407 GEN_test_VFMSUBPD_ymm(VFMSUBPD)
408 
409 #define GEN_test_VFMSUBPS_xmm(_name) \
410    GEN_test(_name##_xmm, \
411             "vfmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
412    GEN_test(_name##_xmm_src_dst, \
413             "vfmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
414    GEN_test(_name##_xmm_mem1, \
415             "vfmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
416    GEN_test(_name##_xmm_mem2, \
417             "vfmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
418 GEN_test_VFMSUBPS_xmm(VFMSUBPS)
419 
420 #define GEN_test_VFMSUBPS_ymm(_name) \
421    GEN_test(_name##_ymm, \
422             "vfmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
423    GEN_test(_name##_ymm_src_dst, \
424             "vfmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
425    GEN_test(_name##_ymm_mem1, \
426             "vfmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
427    GEN_test(_name##_ymm_mem2, \
428             "vfmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
429 GEN_test_VFMSUBPS_ymm(VFMSUBPS)
430 
431 #define GEN_test_VFMSUBSD_xmm(_name) \
432    GEN_test(_name##_xmm, \
433             "vfmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
434    GEN_test(_name##_xmm_src_dst, \
435             "vfmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
436    GEN_test(_name##_xmm_mem1, \
437             "vfmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
438    GEN_test(_name##_xmm_mem2, \
439             "vfmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
440 GEN_test_VFMSUBSD_xmm(VFMSUBSD)
441 
442 #define GEN_test_VFMSUBSS_xmm(_name) \
443    GEN_test(_name##_xmm, \
444             "vfmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
445    GEN_test(_name##_xmm_src_dst, \
446             "vfmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
447    GEN_test(_name##_xmm_mem1, \
448             "vfmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
449    GEN_test(_name##_xmm_mem2, \
450             "vfmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
451 GEN_test_VFMSUBSS_xmm(VFMSUBSS)
452 
453 #define GEN_test_VFNMADDPD_xmm(_name) \
454    GEN_test(_name##_xmm, \
455             "vfnmaddpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
456    GEN_test(_name##_xmm_src_dst, \
457             "vfnmaddpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
458    GEN_test(_name##_xmm_mem1, \
459             "vfnmaddpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
460    GEN_test(_name##_xmm_mem2, \
461             "vfnmaddpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
462 GEN_test_VFNMADDPD_xmm(VFNMADDPD)
463 
464 #define GEN_test_VFNMADDPD_ymm(_name) \
465    GEN_test(_name##_ymm, \
466             "vfnmaddpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
467    GEN_test(_name##_ymm_src_dst, \
468             "vfnmaddpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
469    GEN_test(_name##_ymm_mem1, \
470             "vfnmaddpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
471    GEN_test(_name##_ymm_mem2, \
472             "vfnmaddpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
473 GEN_test_VFNMADDPD_ymm(VFNMADDPD)
474 
475 #define GEN_test_VFNMADDPS_xmm(_name) \
476    GEN_test(_name##_xmm, \
477             "vfnmaddps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
478    GEN_test(_name##_xmm_src_dst, \
479             "vfnmaddps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
480    GEN_test(_name##_xmm_mem1, \
481             "vfnmaddps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
482    GEN_test(_name##_xmm_mem2, \
483             "vfnmaddps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
484 GEN_test_VFNMADDPS_xmm(VFNMADDPS)
485 
486 #define GEN_test_VFNMADDPS_ymm(_name) \
487    GEN_test(_name##_ymm, \
488             "vfnmaddps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
489    GEN_test(_name##_ymm_src_dst, \
490             "vfnmaddps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
491    GEN_test(_name##_ymm_mem1, \
492             "vfnmaddps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
493    GEN_test(_name##_ymm_mem2, \
494             "vfnmaddps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
495 GEN_test_VFNMADDPS_ymm(VFNMADDPS)
496 
497 #define GEN_test_VFNMADDSD_xmm(_name) \
498    GEN_test(_name##_xmm, \
499             "vfnmaddsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
500    GEN_test(_name##_xmm_src_dst, \
501             "vfnmaddsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
502    GEN_test(_name##_xmm_mem1, \
503             "vfnmaddsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
504    GEN_test(_name##_xmm_mem2, \
505             "vfnmaddsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
506 GEN_test_VFNMADDSD_xmm(VFNMADDSD)
507 
508 #define GEN_test_VFNMADDSS_xmm(_name) \
509    GEN_test(_name##_xmm, \
510             "vfnmaddss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
511    GEN_test(_name##_xmm_src_dst, \
512             "vfnmaddss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
513    GEN_test(_name##_xmm_mem1, \
514             "vfnmaddss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
515    GEN_test(_name##_xmm_mem2, \
516             "vfnmaddss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
517 GEN_test_VFNMADDSS_xmm(VFNMADDSS)
518 
519 #define GEN_test_VFNMSUBPD_xmm(_name) \
520    GEN_test(_name##_xmm, \
521             "vfnmsubpd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
522    GEN_test(_name##_xmm_src_dst, \
523             "vfnmsubpd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
524    GEN_test(_name##_xmm_mem1, \
525             "vfnmsubpd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
526    GEN_test(_name##_xmm_mem2, \
527             "vfnmsubpd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
528 GEN_test_VFNMSUBPD_xmm(VFNMSUBPD)
529 
530 #define GEN_test_VFNMSUBPD_ymm(_name) \
531    GEN_test(_name##_ymm, \
532             "vfnmsubpd %%ymm7,%%ymm8,%%ymm6,%%ymm9", 1); \
533    GEN_test(_name##_ymm_src_dst, \
534             "vfnmsubpd %%ymm7,%%ymm8,%%ymm9,%%ymm9", 1); \
535    GEN_test(_name##_ymm_mem1, \
536             "vfnmsubpd (%%r14),%%ymm8,%%ymm6,%%ymm9", 1); \
537    GEN_test(_name##_ymm_mem2, \
538             "vfnmsubpd %%ymm8,(%%r14),%%ymm6,%%ymm9", 1);
539 GEN_test_VFNMSUBPD_ymm(VFNMSUBPD)
540 
541 #define GEN_test_VFNMSUBPS_xmm(_name) \
542    GEN_test(_name##_xmm, \
543             "vfnmsubps %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
544    GEN_test(_name##_xmm_src_dst, \
545             "vfnmsubps %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
546    GEN_test(_name##_xmm_mem1, \
547             "vfnmsubps (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
548    GEN_test(_name##_xmm_mem2, \
549             "vfnmsubps %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
550 GEN_test_VFNMSUBPS_xmm(VFNMSUBPS)
551 
552 #define GEN_test_VFNMSUBPS_ymm(_name) \
553    GEN_test(_name##_ymm, \
554             "vfnmsubps %%ymm7,%%ymm8,%%ymm6,%%ymm9", 0); \
555    GEN_test(_name##_ymm_src_dst, \
556             "vfnmsubps %%ymm7,%%ymm8,%%ymm9,%%ymm9", 0); \
557    GEN_test(_name##_ymm_mem1, \
558             "vfnmsubps (%%r14),%%ymm8,%%ymm6,%%ymm9", 0); \
559    GEN_test(_name##_ymm_mem2, \
560             "vfnmsubps %%ymm8,(%%r14),%%ymm6,%%ymm9", 0);
561 GEN_test_VFNMSUBPS_ymm(VFNMSUBPS)
562 
563 #define GEN_test_VFNMSUBSD_xmm(_name) \
564    GEN_test(_name##_xmm, \
565             "vfnmsubsd %%xmm7,%%xmm8,%%xmm6,%%xmm9", 1); \
566    GEN_test(_name##_xmm_src_dst, \
567             "vfnmsubsd %%xmm7,%%xmm8,%%xmm9,%%xmm9", 1); \
568    GEN_test(_name##_xmm_mem1, \
569             "vfnmsubsd (%%r14),%%xmm8,%%xmm6,%%xmm9", 1); \
570    GEN_test(_name##_xmm_mem2, \
571             "vfnmsubsd %%xmm8,(%%r14),%%xmm6,%%xmm9", 1);
572 GEN_test_VFNMSUBSD_xmm(VFNMSUBSD)
573 
574 #define GEN_test_VFNMSUBSS_xmm(_name) \
575    GEN_test(_name##_xmm, \
576             "vfnmsubss %%xmm7,%%xmm8,%%xmm6,%%xmm9", 0); \
577    GEN_test(_name##_xmm_src_dst, \
578             "vfnmsubss %%xmm7,%%xmm8,%%xmm9,%%xmm9", 0); \
579    GEN_test(_name##_xmm_mem1, \
580             "vfnmsubss (%%r14),%%xmm8,%%xmm6,%%xmm9", 0); \
581    GEN_test(_name##_xmm_mem2, \
582             "vfnmsubss %%xmm8,(%%r14),%%xmm6,%%xmm9", 0);
583 GEN_test_VFNMSUBSS_xmm(VFNMSUBSS)
584 
585 #define DO_test_block(_name, _sub, _bname, _block) \
586    test_##_name##_##_sub(_bname,_block);
587 
588 #define DO_test(_name, _sub, _isD) { \
589    Block* b = memalign32(sizeof(Block)); \
590    oneBlock(b); \
591    DO_test_block(_name, _sub, "ones", b); \
592    if (_isD) { \
593       specialDBlock(b); \
594       DO_test_block(_name, _sub, "specialD", b); \
595    } else { \
596       specialFBlock(b); \
597       DO_test_block(_name, _sub, "specialF", b); \
598    } \
599    randBlock(b); \
600    DO_test_block(_name, _sub, "rand", b); \
601    free(b); \
602 }
603 
604 #define DO_tests_xmm(_name,_isD) \
605    DO_test(_name, xmm, _isD); \
606    DO_test(_name, xmm_src_dst, _isD); \
607    DO_test(_name, xmm_mem1, _isD); \
608    DO_test(_name, xmm_mem2, _isD);
609 
610 #define DO_tests_ymm(_name,_isD) \
611    DO_test(_name, ymm, _isD); \
612    DO_test(_name, ymm_src_dst, _isD); \
613    DO_test(_name, ymm_mem1, _isD); \
614    DO_test(_name, ymm_mem2, _isD);
615 
616 int main ( void )
617 {
618   init_special_values();
619 
620   // 128
621   DO_tests_xmm(VFMADDPD, 1);
622   DO_tests_xmm(VFMADDPS, 0);
623   DO_tests_xmm(VFMADDSD, 1);
624   DO_tests_xmm(VFMADDSS, 0);
625   DO_tests_xmm(VFMADDSUBPD, 1);
626   DO_tests_xmm(VFMADDSUBPS, 0);
627   DO_tests_xmm(VFMSUBADDPD, 1);
628   DO_tests_xmm(VFMSUBADDPS, 0);
629   DO_tests_xmm(VFMSUBPD, 1);
630   DO_tests_xmm(VFMSUBPS, 0);
631   DO_tests_xmm(VFMSUBSD, 1);
632   DO_tests_xmm(VFMSUBSS, 0);
633   DO_tests_xmm(VFNMADDPD, 1);
634   DO_tests_xmm(VFNMADDPS, 0);
635   DO_tests_xmm(VFNMADDSD, 1);
636   DO_tests_xmm(VFNMADDSS, 0);
637   DO_tests_xmm(VFNMSUBPD, 1);
638   DO_tests_xmm(VFNMSUBPS, 0);
639   DO_tests_xmm(VFNMSUBSD, 1);
640   DO_tests_xmm(VFNMSUBSS, 0);
641 
642   // 256
643   /*
644   DO_tests_ymm(VFMADDPD, 1);
645   DO_tests_ymm(VFMADDPS, 0);
646   DO_tests_ymm(VFMADDSUBPD, 1);
647   DO_tests_ymm(VFMADDSUBPS, 0);
648   DO_tests_ymm(VFMSUBADDPD, 1);
649   DO_tests_ymm(VFMSUBADDPS, 0);
650   DO_tests_ymm(VFMSUBPD, 1);
651   DO_tests_ymm(VFMSUBPS, 0);
652   DO_tests_ymm(VFNMADDPD, 1);
653   DO_tests_ymm(VFNMADDPS, 0);
654   DO_tests_ymm(VFNMSUBPD, 1);
655   DO_tests_ymm(VFNMSUBPS, 0);
656   */
657 
658   return 0;
659 }
660