1
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <assert.h>
5 #include "tests/malloc.h"
6
7 typedef unsigned char UChar;
8 typedef unsigned int UInt;
9 typedef unsigned long int UWord;
10 typedef unsigned long long int ULong;
11
12 #if defined(VGO_darwin)
13 UChar randArray[1027] __attribute__((used));
14 #else
15 UChar _randArray[1027] __attribute__((used));
16 #endif
17
18 #define IS_32_ALIGNED(_ptr) (0 == (0x1F & (UWord)(_ptr)))
19
20 typedef union { UChar u8[32]; UInt u32[8]; } YMM;
21
22 typedef struct { YMM a1; YMM a2; YMM a3; YMM a4; ULong u64; } Block;
23
showYMM(YMM * vec)24 void showYMM ( YMM* vec )
25 {
26 int i;
27 assert(IS_32_ALIGNED(vec));
28 for (i = 31; i >= 0; i--) {
29 printf("%02x", (UInt)vec->u8[i]);
30 if (i > 0 && 0 == ((i+0) & 7)) printf(".");
31 }
32 }
33
showBlock(char * msg,Block * block)34 void showBlock ( char* msg, Block* block )
35 {
36 printf(" %s\n", msg);
37 printf(" "); showYMM(&block->a1); printf("\n");
38 printf(" "); showYMM(&block->a2); printf("\n");
39 printf(" "); showYMM(&block->a3); printf("\n");
40 printf(" "); showYMM(&block->a4); printf("\n");
41 printf(" %016llx\n", block->u64);
42 }
43
randUChar(void)44 UChar randUChar ( void )
45 {
46 static UInt seed = 80021;
47 seed = 1103515245 * seed + 12345;
48 return (seed >> 17) & 0xFF;
49 }
50
randBlock(Block * b)51 void randBlock ( Block* b )
52 {
53 int i;
54 UChar* p = (UChar*)b;
55 for (i = 0; i < sizeof(Block); i++)
56 p[i] = randUChar();
57 }
58
59
60 /* Generate a function test_NAME, that tests the given insn, in both
61 its mem and reg forms. The reg form of the insn may mention, as
62 operands only %ymm6, %ymm7, %ymm8, %ymm9 and %r14. The mem form of
63 the insn may mention as operands only (%rax), %ymm7, %ymm8, %ymm9
64 and %r14. It's OK for the insn to clobber ymm0, as this is needed
65 for testing PCMPxSTRx, and ymm6, as this is needed for testing
66 MOVMASK variants. */
67
68 #define GEN_test_RandM(_name, _reg_form, _mem_form) \
69 \
70 __attribute__ ((noinline)) static void test_##_name ( void ) \
71 { \
72 Block* b = memalign32(sizeof(Block)); \
73 randBlock(b); \
74 printf("%s(reg)\n", #_name); \
75 showBlock("before", b); \
76 __asm__ __volatile__( \
77 "vmovdqa 0(%0),%%ymm7" "\n\t" \
78 "vmovdqa 32(%0),%%ymm8" "\n\t" \
79 "vmovdqa 64(%0),%%ymm6" "\n\t" \
80 "vmovdqa 96(%0),%%ymm9" "\n\t" \
81 "movq 128(%0),%%r14" "\n\t" \
82 _reg_form "\n\t" \
83 "vmovdqa %%ymm7, 0(%0)" "\n\t" \
84 "vmovdqa %%ymm8, 32(%0)" "\n\t" \
85 "vmovdqa %%ymm6, 64(%0)" "\n\t" \
86 "vmovdqa %%ymm9, 96(%0)" "\n\t" \
87 "movq %%r14, 128(%0)" "\n\t" \
88 : /*OUT*/ \
89 : /*IN*/"r"(b) \
90 : /*TRASH*/"xmm0","xmm7","xmm8","xmm6","xmm9","r14","memory","cc" \
91 ); \
92 showBlock("after", b); \
93 randBlock(b); \
94 printf("%s(mem)\n", #_name); \
95 showBlock("before", b); \
96 __asm__ __volatile__( \
97 "leaq 0(%0),%%rax" "\n\t" \
98 "vmovdqa 32(%0),%%ymm8" "\n\t" \
99 "vmovdqa 64(%0),%%ymm7" "\n\t" \
100 "vmovdqa 96(%0),%%ymm9" "\n\t" \
101 "movq 128(%0),%%r14" "\n\t" \
102 _mem_form "\n\t" \
103 "vmovdqa %%ymm8, 32(%0)" "\n\t" \
104 "vmovdqa %%ymm7, 64(%0)" "\n\t" \
105 "vmovdqa %%ymm9, 96(%0)" "\n\t" \
106 "movq %%r14, 128(%0)" "\n\t" \
107 : /*OUT*/ \
108 : /*IN*/"r"(b) \
109 : /*TRASH*/"xmm6", \
110 "xmm0","xmm8","xmm7","xmm9","r14","rax","memory","cc" \
111 ); \
112 showBlock("after", b); \
113 printf("\n"); \
114 free(b); \
115 }
116
117 #define GEN_test_Ronly(_name, _reg_form) \
118 GEN_test_RandM(_name, _reg_form, "")
119 #define GEN_test_Monly(_name, _mem_form) \
120 GEN_test_RandM(_name, "", _mem_form)
121
122 /* Vector integers promoved from 128-bit in AVX to 256-bit in AVX2. */
123
124 GEN_test_RandM(VPOR_256,
125 "vpor %%ymm6, %%ymm8, %%ymm7",
126 "vpor (%%rax), %%ymm8, %%ymm7")
127
128 GEN_test_RandM(VPXOR_256,
129 "vpxor %%ymm6, %%ymm8, %%ymm7",
130 "vpxor (%%rax), %%ymm8, %%ymm7")
131
132 GEN_test_RandM(VPSUBB_256,
133 "vpsubb %%ymm6, %%ymm8, %%ymm7",
134 "vpsubb (%%rax), %%ymm8, %%ymm7")
135
136 GEN_test_RandM(VPSUBD_256,
137 "vpsubd %%ymm6, %%ymm8, %%ymm7",
138 "vpsubd (%%rax), %%ymm8, %%ymm7")
139
140 GEN_test_RandM(VPADDD_256,
141 "vpaddd %%ymm6, %%ymm8, %%ymm7",
142 "vpaddd (%%rax), %%ymm8, %%ymm7")
143
144 GEN_test_RandM(VPMOVZXWD_256,
145 "vpmovzxwd %%xmm6, %%ymm8",
146 "vpmovzxwd (%%rax), %%ymm8")
147
148 GEN_test_RandM(VPMOVZXBW_256,
149 "vpmovzxbw %%xmm6, %%ymm8",
150 "vpmovzxbw (%%rax), %%ymm8")
151
152 GEN_test_RandM(VPBLENDVB_256,
153 "vpblendvb %%ymm9, %%ymm6, %%ymm8, %%ymm7",
154 "vpblendvb %%ymm9, (%%rax), %%ymm8, %%ymm7")
155
156 GEN_test_RandM(VPMINSD_256,
157 "vpminsd %%ymm6, %%ymm8, %%ymm7",
158 "vpminsd (%%rax), %%ymm8, %%ymm7")
159
160 GEN_test_RandM(VPMAXSD_256,
161 "vpmaxsd %%ymm6, %%ymm8, %%ymm7",
162 "vpmaxsd (%%rax), %%ymm8, %%ymm7")
163
164 GEN_test_RandM(VPSHUFB_256,
165 "vpshufb %%ymm6, %%ymm8, %%ymm7",
166 "vpshufb (%%rax), %%ymm8, %%ymm7")
167
168 GEN_test_RandM(VPUNPCKLBW_256,
169 "vpunpcklbw %%ymm6, %%ymm8, %%ymm7",
170 "vpunpcklbw (%%rax), %%ymm8, %%ymm7")
171
172 GEN_test_RandM(VPUNPCKHBW_256,
173 "vpunpckhbw %%ymm6, %%ymm8, %%ymm7",
174 "vpunpckhbw (%%rax), %%ymm8, %%ymm7")
175
176 GEN_test_RandM(VPABSD_256,
177 "vpabsd %%ymm6, %%ymm8",
178 "vpabsd (%%rax), %%ymm8")
179
180 GEN_test_RandM(VPACKUSWB_256,
181 "vpackuswb %%ymm9, %%ymm8, %%ymm7",
182 "vpackuswb (%%rax), %%ymm8, %%ymm7")
183
184 GEN_test_Ronly(VPMOVMSKB_256,
185 "vpmovmskb %%ymm8, %%r14")
186
187 GEN_test_RandM(VPAND_256,
188 "vpand %%ymm9, %%ymm8, %%ymm7",
189 "vpand (%%rax), %%ymm8, %%ymm7")
190
191 GEN_test_RandM(VPCMPEQB_256,
192 "vpcmpeqb %%ymm9, %%ymm8, %%ymm7",
193 "vpcmpeqb (%%rax), %%ymm8, %%ymm7")
194
195 GEN_test_RandM(VPSHUFLW_0x39_256,
196 "vpshuflw $0x39, %%ymm9, %%ymm7",
197 "vpshuflw $0xC6, (%%rax), %%ymm8")
198
199 GEN_test_RandM(VPSHUFHW_0x39_256,
200 "vpshufhw $0x39, %%ymm9, %%ymm7",
201 "vpshufhw $0xC6, (%%rax), %%ymm8")
202
203 GEN_test_RandM(VPMULLW_256,
204 "vpmullw %%ymm9, %%ymm8, %%ymm7",
205 "vpmullw (%%rax), %%ymm8, %%ymm7")
206
207 GEN_test_RandM(VPADDUSW_256,
208 "vpaddusw %%ymm9, %%ymm8, %%ymm7",
209 "vpaddusw (%%rax), %%ymm8, %%ymm7")
210
211 GEN_test_RandM(VPMULHUW_256,
212 "vpmulhuw %%ymm9, %%ymm8, %%ymm7",
213 "vpmulhuw (%%rax), %%ymm8, %%ymm7")
214
215 GEN_test_RandM(VPADDUSB_256,
216 "vpaddusb %%ymm9, %%ymm8, %%ymm7",
217 "vpaddusb (%%rax), %%ymm8, %%ymm7")
218
219 GEN_test_RandM(VPUNPCKLWD_256,
220 "vpunpcklwd %%ymm6, %%ymm8, %%ymm7",
221 "vpunpcklwd (%%rax), %%ymm8, %%ymm7")
222
223 GEN_test_RandM(VPUNPCKHWD_256,
224 "vpunpckhwd %%ymm6, %%ymm8, %%ymm7",
225 "vpunpckhwd (%%rax), %%ymm8, %%ymm7")
226
227 GEN_test_Ronly(VPSLLD_0x05_256,
228 "vpslld $0x5, %%ymm9, %%ymm7")
229
230 GEN_test_Ronly(VPSRLD_0x05_256,
231 "vpsrld $0x5, %%ymm9, %%ymm7")
232
233 GEN_test_Ronly(VPSRAD_0x05_256,
234 "vpsrad $0x5, %%ymm9, %%ymm7")
235
236 GEN_test_RandM(VPSUBUSB_256,
237 "vpsubusb %%ymm9, %%ymm8, %%ymm7",
238 "vpsubusb (%%rax), %%ymm8, %%ymm7")
239
240 GEN_test_RandM(VPSUBSB_256,
241 "vpsubsb %%ymm9, %%ymm8, %%ymm7",
242 "vpsubsb (%%rax), %%ymm8, %%ymm7")
243
244 GEN_test_Ronly(VPSRLDQ_0x05_256,
245 "vpsrldq $0x5, %%ymm9, %%ymm7")
246
247 GEN_test_Ronly(VPSLLDQ_0x05_256,
248 "vpslldq $0x5, %%ymm9, %%ymm7")
249
250 GEN_test_RandM(VPANDN_256,
251 "vpandn %%ymm9, %%ymm8, %%ymm7",
252 "vpandn (%%rax), %%ymm8, %%ymm7")
253
254 GEN_test_RandM(VPUNPCKLQDQ_256,
255 "vpunpcklqdq %%ymm6, %%ymm8, %%ymm7",
256 "vpunpcklqdq (%%rax), %%ymm8, %%ymm7")
257
258 GEN_test_Ronly(VPSRLW_0x05_256,
259 "vpsrlw $0x5, %%ymm9, %%ymm7")
260
261 GEN_test_Ronly(VPSLLW_0x05_256,
262 "vpsllw $0x5, %%ymm9, %%ymm7")
263
264 GEN_test_RandM(VPADDW_256,
265 "vpaddw %%ymm6, %%ymm8, %%ymm7",
266 "vpaddw (%%rax), %%ymm8, %%ymm7")
267
268 GEN_test_RandM(VPACKSSDW_256,
269 "vpackssdw %%ymm9, %%ymm8, %%ymm7",
270 "vpackssdw (%%rax), %%ymm8, %%ymm7")
271
272 GEN_test_RandM(VPUNPCKLDQ_256,
273 "vpunpckldq %%ymm6, %%ymm8, %%ymm7",
274 "vpunpckldq (%%rax), %%ymm8, %%ymm7")
275
276 GEN_test_RandM(VPCMPEQD_256,
277 "vpcmpeqd %%ymm6, %%ymm8, %%ymm7",
278 "vpcmpeqd (%%rax), %%ymm8, %%ymm7")
279
280 GEN_test_RandM(VPSHUFD_0x39_256,
281 "vpshufd $0x39, %%ymm9, %%ymm8",
282 "vpshufd $0xC6, (%%rax), %%ymm7")
283
284 GEN_test_RandM(VPADDQ_256,
285 "vpaddq %%ymm6, %%ymm8, %%ymm7",
286 "vpaddq (%%rax), %%ymm8, %%ymm7")
287
288 GEN_test_RandM(VPSUBQ_256,
289 "vpsubq %%ymm6, %%ymm8, %%ymm7",
290 "vpsubq (%%rax), %%ymm8, %%ymm7")
291
292 GEN_test_RandM(VPSUBW_256,
293 "vpsubw %%ymm6, %%ymm8, %%ymm7",
294 "vpsubw (%%rax), %%ymm8, %%ymm7")
295
296 GEN_test_RandM(VPCMPEQQ_256,
297 "vpcmpeqq %%ymm6, %%ymm8, %%ymm7",
298 "vpcmpeqq (%%rax), %%ymm8, %%ymm7")
299
300 GEN_test_RandM(VPCMPGTQ_256,
301 "vpcmpgtq %%ymm6, %%ymm8, %%ymm7",
302 "vpcmpgtq (%%rax), %%ymm8, %%ymm7")
303
304 GEN_test_Ronly(VPSRLQ_0x05_256,
305 "vpsrlq $0x5, %%ymm9, %%ymm7")
306
307 GEN_test_RandM(VPMULUDQ_256,
308 "vpmuludq %%ymm6, %%ymm8, %%ymm7",
309 "vpmuludq (%%rax), %%ymm8, %%ymm7")
310
311 GEN_test_RandM(VPMULDQ_256,
312 "vpmuldq %%ymm6, %%ymm8, %%ymm7",
313 "vpmuldq (%%rax), %%ymm8, %%ymm7")
314
315 GEN_test_Ronly(VPSLLQ_0x05_256,
316 "vpsllq $0x5, %%ymm9, %%ymm7")
317
318 GEN_test_RandM(VPMAXUD_256,
319 "vpmaxud %%ymm6, %%ymm8, %%ymm7",
320 "vpmaxud (%%rax), %%ymm8, %%ymm7")
321
322 GEN_test_RandM(VPMINUD_256,
323 "vpminud %%ymm6, %%ymm8, %%ymm7",
324 "vpminud (%%rax), %%ymm8, %%ymm7")
325
326 GEN_test_RandM(VPMULLD_256,
327 "vpmulld %%ymm6, %%ymm8, %%ymm7",
328 "vpmulld (%%rax), %%ymm8, %%ymm7")
329
330 GEN_test_RandM(VPMAXUW_256,
331 "vpmaxuw %%ymm6, %%ymm8, %%ymm7",
332 "vpmaxuw (%%rax), %%ymm8, %%ymm7")
333
334 GEN_test_RandM(VPMINUW_256,
335 "vpminuw %%ymm6, %%ymm8, %%ymm7",
336 "vpminuw (%%rax), %%ymm8, %%ymm7")
337
338 GEN_test_RandM(VPMAXSW_256,
339 "vpmaxsw %%ymm6, %%ymm8, %%ymm7",
340 "vpmaxsw (%%rax), %%ymm8, %%ymm7")
341
342 GEN_test_RandM(VPMINSW_256,
343 "vpminsw %%ymm6, %%ymm8, %%ymm7",
344 "vpminsw (%%rax), %%ymm8, %%ymm7")
345
346 GEN_test_RandM(VPMAXUB_256,
347 "vpmaxub %%ymm6, %%ymm8, %%ymm7",
348 "vpmaxub (%%rax), %%ymm8, %%ymm7")
349
350 GEN_test_RandM(VPMINUB_256,
351 "vpminub %%ymm6, %%ymm8, %%ymm7",
352 "vpminub (%%rax), %%ymm8, %%ymm7")
353
354 GEN_test_RandM(VPMAXSB_256,
355 "vpmaxsb %%ymm6, %%ymm8, %%ymm7",
356 "vpmaxsb (%%rax), %%ymm8, %%ymm7")
357
358 GEN_test_RandM(VPMINSB_256,
359 "vpminsb %%ymm6, %%ymm8, %%ymm7",
360 "vpminsb (%%rax), %%ymm8, %%ymm7")
361
362 GEN_test_RandM(VPMOVSXBW_256,
363 "vpmovsxbw %%xmm6, %%ymm8",
364 "vpmovsxbw (%%rax), %%ymm8")
365
366 GEN_test_RandM(VPSUBUSW_256,
367 "vpsubusw %%ymm9, %%ymm8, %%ymm7",
368 "vpsubusw (%%rax), %%ymm8, %%ymm7")
369
370 GEN_test_RandM(VPSUBSW_256,
371 "vpsubsw %%ymm9, %%ymm8, %%ymm7",
372 "vpsubsw (%%rax), %%ymm8, %%ymm7")
373
374 GEN_test_RandM(VPCMPEQW_256,
375 "vpcmpeqw %%ymm6, %%ymm8, %%ymm7",
376 "vpcmpeqw (%%rax), %%ymm8, %%ymm7")
377
378 GEN_test_RandM(VPADDB_256,
379 "vpaddb %%ymm6, %%ymm8, %%ymm7",
380 "vpaddb (%%rax), %%ymm8, %%ymm7")
381
382 GEN_test_RandM(VPUNPCKHDQ_256,
383 "vpunpckhdq %%ymm6, %%ymm8, %%ymm7",
384 "vpunpckhdq (%%rax), %%ymm8, %%ymm7")
385
386 GEN_test_RandM(VPMOVSXDQ_256,
387 "vpmovsxdq %%xmm6, %%ymm8",
388 "vpmovsxdq (%%rax), %%ymm8")
389
390 GEN_test_RandM(VPMOVSXWD_256,
391 "vpmovsxwd %%xmm6, %%ymm8",
392 "vpmovsxwd (%%rax), %%ymm8")
393
394 GEN_test_RandM(VPMULHW_256,
395 "vpmulhw %%ymm9, %%ymm8, %%ymm7",
396 "vpmulhw (%%rax), %%ymm8, %%ymm7")
397
398 GEN_test_RandM(VPUNPCKHQDQ_256,
399 "vpunpckhqdq %%ymm6, %%ymm8, %%ymm7",
400 "vpunpckhqdq (%%rax), %%ymm8, %%ymm7")
401
402 GEN_test_Ronly(VPSRAW_0x05_256,
403 "vpsraw $0x5, %%ymm9, %%ymm7")
404
405 GEN_test_RandM(VPCMPGTB_256,
406 "vpcmpgtb %%ymm6, %%ymm8, %%ymm7",
407 "vpcmpgtb (%%rax), %%ymm8, %%ymm7")
408
409 GEN_test_RandM(VPCMPGTW_256,
410 "vpcmpgtw %%ymm6, %%ymm8, %%ymm7",
411 "vpcmpgtw (%%rax), %%ymm8, %%ymm7")
412
413 GEN_test_RandM(VPCMPGTD_256,
414 "vpcmpgtd %%ymm6, %%ymm8, %%ymm7",
415 "vpcmpgtd (%%rax), %%ymm8, %%ymm7")
416
417 GEN_test_RandM(VPMOVZXBD_256,
418 "vpmovzxbd %%xmm6, %%ymm8",
419 "vpmovzxbd (%%rax), %%ymm8")
420
421 GEN_test_RandM(VPMOVSXBD_256,
422 "vpmovsxbd %%xmm6, %%ymm8",
423 "vpmovsxbd (%%rax), %%ymm8")
424
425 GEN_test_RandM(VPALIGNR_256_1of3,
426 "vpalignr $0, %%ymm6, %%ymm8, %%ymm7",
427 "vpalignr $3, (%%rax), %%ymm8, %%ymm7")
428 GEN_test_RandM(VPALIGNR_256_2of3,
429 "vpalignr $6, %%ymm6, %%ymm8, %%ymm7",
430 "vpalignr $9, (%%rax), %%ymm8, %%ymm7")
431 GEN_test_RandM(VPALIGNR_256_3of3,
432 "vpalignr $12, %%ymm6, %%ymm8, %%ymm7",
433 "vpalignr $15, (%%rax), %%ymm8, %%ymm7")
434
435 GEN_test_RandM(VPBLENDW_256_0x00,
436 "vpblendw $0x00, %%ymm6, %%ymm8, %%ymm7",
437 "vpblendw $0x01, (%%rax), %%ymm8, %%ymm7")
438 GEN_test_RandM(VPBLENDW_256_0xFE,
439 "vpblendw $0xFE, %%ymm6, %%ymm8, %%ymm7",
440 "vpblendw $0xFF, (%%rax), %%ymm8, %%ymm7")
441 GEN_test_RandM(VPBLENDW_256_0x30,
442 "vpblendw $0x30, %%ymm6, %%ymm8, %%ymm7",
443 "vpblendw $0x03, (%%rax), %%ymm8, %%ymm7")
444 GEN_test_RandM(VPBLENDW_256_0x21,
445 "vpblendw $0x21, %%ymm6, %%ymm8, %%ymm7",
446 "vpblendw $0x12, (%%rax), %%ymm8, %%ymm7")
447 GEN_test_RandM(VPBLENDW_256_0xD7,
448 "vpblendw $0xD7, %%ymm6, %%ymm8, %%ymm7",
449 "vpblendw $0x6C, (%%rax), %%ymm8, %%ymm7")
450 GEN_test_RandM(VPBLENDW_256_0xB5,
451 "vpblendw $0xB5, %%ymm6, %%ymm8, %%ymm7",
452 "vpblendw $0x4A, (%%rax), %%ymm8, %%ymm7")
453 GEN_test_RandM(VPBLENDW_256_0x85,
454 "vpblendw $0x85, %%ymm6, %%ymm8, %%ymm7",
455 "vpblendw $0xDC, (%%rax), %%ymm8, %%ymm7")
456 GEN_test_RandM(VPBLENDW_256_0x29,
457 "vpblendw $0x29, %%ymm6, %%ymm8, %%ymm7",
458 "vpblendw $0x92, (%%rax), %%ymm8, %%ymm7")
459
460 GEN_test_RandM(VPSLLW_256,
461 "andl $15, %%r14d;"
462 "vmovd %%r14d, %%xmm6;"
463 "vpsllw %%xmm6, %%ymm8, %%ymm9",
464 "andq $15, 128(%%rax);"
465 "vpsllw 128(%%rax), %%ymm8, %%ymm9")
466
467 GEN_test_RandM(VPSRLW_256,
468 "andl $15, %%r14d;"
469 "vmovd %%r14d, %%xmm6;"
470 "vpsrlw %%xmm6, %%ymm8, %%ymm9",
471 "andq $15, 128(%%rax);"
472 "vpsrlw 128(%%rax), %%ymm8, %%ymm9")
473
474 GEN_test_RandM(VPSRAW_256,
475 "andl $31, %%r14d;"
476 "vmovd %%r14d, %%xmm6;"
477 "vpsraw %%xmm6, %%ymm8, %%ymm9",
478 "andq $15, 128(%%rax);"
479 "vpsraw 128(%%rax), %%ymm8, %%ymm9")
480
481 GEN_test_RandM(VPSLLD_256,
482 "andl $31, %%r14d;"
483 "vmovd %%r14d, %%xmm6;"
484 "vpslld %%xmm6, %%ymm8, %%ymm9",
485 "andq $31, 128(%%rax);"
486 "vpslld 128(%%rax), %%ymm8, %%ymm9")
487
488 GEN_test_RandM(VPSRLD_256,
489 "andl $31, %%r14d;"
490 "vmovd %%r14d, %%xmm6;"
491 "vpsrld %%xmm6, %%ymm8, %%ymm9",
492 "andq $31, 128(%%rax);"
493 "vpsrld 128(%%rax), %%ymm8, %%ymm9")
494
495 GEN_test_RandM(VPSRAD_256,
496 "andl $31, %%r14d;"
497 "vmovd %%r14d, %%xmm6;"
498 "vpsrad %%xmm6, %%ymm8, %%ymm9",
499 "andq $31, 128(%%rax);"
500 "vpsrad 128(%%rax), %%ymm8, %%ymm9")
501
502 GEN_test_RandM(VPSLLQ_256,
503 "andl $63, %%r14d;"
504 "vmovd %%r14d, %%xmm6;"
505 "vpsllq %%xmm6, %%ymm8, %%ymm9",
506 "andq $63, 128(%%rax);"
507 "vpsllq 128(%%rax), %%ymm8, %%ymm9")
508
509 GEN_test_RandM(VPSRLQ_256,
510 "andl $63, %%r14d;"
511 "vmovd %%r14d, %%xmm6;"
512 "vpsrlq %%xmm6, %%ymm8, %%ymm9",
513 "andq $63, 128(%%rax);"
514 "vpsrlq 128(%%rax), %%ymm8, %%ymm9")
515
516 GEN_test_RandM(VPMADDWD_256,
517 "vpmaddwd %%ymm6, %%ymm8, %%ymm7",
518 "vpmaddwd (%%rax), %%ymm8, %%ymm7")
519
520 GEN_test_Monly(VMOVNTDQA_256,
521 "vmovntdqa (%%rax), %%ymm9")
522
523 GEN_test_RandM(VPACKSSWB_256,
524 "vpacksswb %%ymm6, %%ymm8, %%ymm7",
525 "vpacksswb (%%rax), %%ymm8, %%ymm7")
526
527 GEN_test_RandM(VPAVGB_256,
528 "vpavgb %%ymm6, %%ymm8, %%ymm7",
529 "vpavgb (%%rax), %%ymm8, %%ymm7")
530
531 GEN_test_RandM(VPAVGW_256,
532 "vpavgw %%ymm6, %%ymm8, %%ymm7",
533 "vpavgw (%%rax), %%ymm8, %%ymm7")
534
535 GEN_test_RandM(VPADDSB_256,
536 "vpaddsb %%ymm6, %%ymm8, %%ymm7",
537 "vpaddsb (%%rax), %%ymm8, %%ymm7")
538
539 GEN_test_RandM(VPADDSW_256,
540 "vpaddsw %%ymm6, %%ymm8, %%ymm7",
541 "vpaddsw (%%rax), %%ymm8, %%ymm7")
542
543 GEN_test_RandM(VPHADDW_256,
544 "vphaddw %%ymm6, %%ymm8, %%ymm7",
545 "vphaddw (%%rax), %%ymm8, %%ymm7")
546
547 GEN_test_RandM(VPHADDD_256,
548 "vphaddd %%ymm6, %%ymm8, %%ymm7",
549 "vphaddd (%%rax), %%ymm8, %%ymm7")
550
551 GEN_test_RandM(VPHADDSW_256,
552 "vphaddsw %%ymm6, %%ymm8, %%ymm7",
553 "vphaddsw (%%rax), %%ymm8, %%ymm7")
554
555 GEN_test_RandM(VPMADDUBSW_256,
556 "vpmaddubsw %%ymm6, %%ymm8, %%ymm7",
557 "vpmaddubsw (%%rax), %%ymm8, %%ymm7")
558
559 GEN_test_RandM(VPHSUBW_256,
560 "vphsubw %%ymm6, %%ymm8, %%ymm7",
561 "vphsubw (%%rax), %%ymm8, %%ymm7")
562
563 GEN_test_RandM(VPHSUBD_256,
564 "vphsubd %%ymm6, %%ymm8, %%ymm7",
565 "vphsubd (%%rax), %%ymm8, %%ymm7")
566
567 GEN_test_RandM(VPHSUBSW_256,
568 "vphsubsw %%ymm6, %%ymm8, %%ymm7",
569 "vphsubsw (%%rax), %%ymm8, %%ymm7")
570
571 GEN_test_RandM(VPABSB_256,
572 "vpabsb %%ymm6, %%ymm7",
573 "vpabsb (%%rax), %%ymm7")
574
575 GEN_test_RandM(VPABSW_256,
576 "vpabsw %%ymm6, %%ymm7",
577 "vpabsw (%%rax), %%ymm7")
578
579 GEN_test_RandM(VPMOVSXBQ_256,
580 "vpmovsxbq %%xmm6, %%ymm8",
581 "vpmovsxbq (%%rax), %%ymm8")
582
583 GEN_test_RandM(VPMOVSXWQ_256,
584 "vpmovsxwq %%xmm6, %%ymm8",
585 "vpmovsxwq (%%rax), %%ymm8")
586
587 GEN_test_RandM(VPACKUSDW_256,
588 "vpackusdw %%ymm6, %%ymm8, %%ymm7",
589 "vpackusdw (%%rax), %%ymm8, %%ymm7")
590
591 GEN_test_RandM(VPMOVZXBQ_256,
592 "vpmovzxbq %%xmm6, %%ymm8",
593 "vpmovzxbq (%%rax), %%ymm8")
594
595 GEN_test_RandM(VPMOVZXWQ_256,
596 "vpmovzxwq %%xmm6, %%ymm8",
597 "vpmovzxwq (%%rax), %%ymm8")
598
599 GEN_test_RandM(VPMOVZXDQ_256,
600 "vpmovzxdq %%xmm6, %%ymm8",
601 "vpmovzxdq (%%rax), %%ymm8")
602
603 GEN_test_RandM(VMPSADBW_256_0x0,
604 "vmpsadbw $0, %%ymm6, %%ymm8, %%ymm7",
605 "vmpsadbw $0, (%%rax), %%ymm8, %%ymm7")
606 GEN_test_RandM(VMPSADBW_256_0x39,
607 "vmpsadbw $0x39, %%ymm6, %%ymm8, %%ymm7",
608 "vmpsadbw $0x39, (%%rax), %%ymm8, %%ymm7")
609 GEN_test_RandM(VMPSADBW_256_0x32,
610 "vmpsadbw $0x32, %%ymm6, %%ymm8, %%ymm7",
611 "vmpsadbw $0x32, (%%rax), %%ymm8, %%ymm7")
612 GEN_test_RandM(VMPSADBW_256_0x2b,
613 "vmpsadbw $0x2b, %%ymm6, %%ymm8, %%ymm7",
614 "vmpsadbw $0x2b, (%%rax), %%ymm8, %%ymm7")
615 GEN_test_RandM(VMPSADBW_256_0x24,
616 "vmpsadbw $0x24, %%ymm6, %%ymm8, %%ymm7",
617 "vmpsadbw $0x24, (%%rax), %%ymm8, %%ymm7")
618 GEN_test_RandM(VMPSADBW_256_0x1d,
619 "vmpsadbw $0x1d, %%ymm6, %%ymm8, %%ymm7",
620 "vmpsadbw $0x1d, (%%rax), %%ymm8, %%ymm7")
621 GEN_test_RandM(VMPSADBW_256_0x16,
622 "vmpsadbw $0x16, %%ymm6, %%ymm8, %%ymm7",
623 "vmpsadbw $0x16, (%%rax), %%ymm8, %%ymm7")
624 GEN_test_RandM(VMPSADBW_256_0x0f,
625 "vmpsadbw $0x0f, %%ymm6, %%ymm8, %%ymm7",
626 "vmpsadbw $0x0f, (%%rax), %%ymm8, %%ymm7")
627
628 GEN_test_RandM(VPSADBW_256,
629 "vpsadbw %%ymm6, %%ymm8, %%ymm7",
630 "vpsadbw (%%rax), %%ymm8, %%ymm7")
631
632 GEN_test_RandM(VPSIGNB_256,
633 "vpsignb %%ymm6, %%ymm8, %%ymm7",
634 "vpsignb (%%rax), %%ymm8, %%ymm7")
635
636 GEN_test_RandM(VPSIGNW_256,
637 "vpsignw %%ymm6, %%ymm8, %%ymm7",
638 "vpsignw (%%rax), %%ymm8, %%ymm7")
639
640 GEN_test_RandM(VPSIGND_256,
641 "vpsignd %%ymm6, %%ymm8, %%ymm7",
642 "vpsignd (%%rax), %%ymm8, %%ymm7")
643
644 GEN_test_RandM(VPMULHRSW_256,
645 "vpmulhrsw %%ymm6, %%ymm8, %%ymm7",
646 "vpmulhrsw (%%rax), %%ymm8, %%ymm7")
647
648 /* Instructions new in AVX2. */
649
650 GEN_test_Monly(VBROADCASTI128,
651 "vbroadcasti128 (%%rax), %%ymm9")
652
653 GEN_test_RandM(VEXTRACTI128_0x0,
654 "vextracti128 $0x0, %%ymm7, %%xmm9",
655 "vextracti128 $0x0, %%ymm7, (%%rax)")
656
657 GEN_test_RandM(VEXTRACTI128_0x1,
658 "vextracti128 $0x1, %%ymm7, %%xmm9",
659 "vextracti128 $0x1, %%ymm7, (%%rax)")
660
661 GEN_test_RandM(VINSERTI128_0x0,
662 "vinserti128 $0x0, %%xmm9, %%ymm7, %%ymm8",
663 "vinserti128 $0x0, (%%rax), %%ymm7, %%ymm8")
664
665 GEN_test_RandM(VINSERTI128_0x1,
666 "vinserti128 $0x1, %%xmm9, %%ymm7, %%ymm8",
667 "vinserti128 $0x1, (%%rax), %%ymm7, %%ymm8")
668
669 GEN_test_RandM(VPERM2I128_0x00,
670 "vperm2i128 $0x00, %%ymm6, %%ymm8, %%ymm7",
671 "vperm2i128 $0x00, (%%rax), %%ymm8, %%ymm7")
672 GEN_test_RandM(VPERM2I128_0xFF,
673 "vperm2i128 $0xFF, %%ymm6, %%ymm8, %%ymm7",
674 "vperm2i128 $0xFF, (%%rax), %%ymm8, %%ymm7")
675 GEN_test_RandM(VPERM2I128_0x30,
676 "vperm2i128 $0x30, %%ymm6, %%ymm8, %%ymm7",
677 "vperm2i128 $0x30, (%%rax), %%ymm8, %%ymm7")
678 GEN_test_RandM(VPERM2I128_0x21,
679 "vperm2i128 $0x21, %%ymm6, %%ymm8, %%ymm7",
680 "vperm2i128 $0x21, (%%rax), %%ymm8, %%ymm7")
681 GEN_test_RandM(VPERM2I128_0x12,
682 "vperm2i128 $0x12, %%ymm6, %%ymm8, %%ymm7",
683 "vperm2i128 $0x12, (%%rax), %%ymm8, %%ymm7")
684 GEN_test_RandM(VPERM2I128_0x03,
685 "vperm2i128 $0x03, %%ymm6, %%ymm8, %%ymm7",
686 "vperm2i128 $0x03, (%%rax), %%ymm8, %%ymm7")
687 GEN_test_RandM(VPERM2I128_0x85,
688 "vperm2i128 $0x85, %%ymm6, %%ymm8, %%ymm7",
689 "vperm2i128 $0x85, (%%rax), %%ymm8, %%ymm7")
690 GEN_test_RandM(VPERM2I128_0x5A,
691 "vperm2i128 $0x5A, %%ymm6, %%ymm8, %%ymm7",
692 "vperm2i128 $0x5A, (%%rax), %%ymm8, %%ymm7")
693
694 GEN_test_Ronly(VBROADCASTSS_128,
695 "vbroadcastss %%xmm9, %%xmm7")
696
697 GEN_test_Ronly(VBROADCASTSS_256,
698 "vbroadcastss %%xmm9, %%ymm7")
699
700 GEN_test_Ronly(VBROADCASTSD_256,
701 "vbroadcastsd %%xmm9, %%ymm7")
702
703 GEN_test_RandM(VPERMD,
704 "vpermd %%ymm6, %%ymm7, %%ymm9",
705 "vpermd (%%rax), %%ymm7, %%ymm9")
706
707 GEN_test_RandM(VPERMQ_0x00,
708 "vpermq $0x00, %%ymm6, %%ymm7",
709 "vpermq $0x01, (%%rax), %%ymm7")
710 GEN_test_RandM(VPERMQ_0xFE,
711 "vpermq $0xFE, %%ymm6, %%ymm7",
712 "vpermq $0xFF, (%%rax), %%ymm7")
713 GEN_test_RandM(VPERMQ_0x30,
714 "vpermq $0x30, %%ymm6, %%ymm7",
715 "vpermq $0x03, (%%rax), %%ymm7")
716 GEN_test_RandM(VPERMQ_0x21,
717 "vpermq $0x21, %%ymm6, %%ymm7",
718 "vpermq $0x12, (%%rax), %%ymm7")
719 GEN_test_RandM(VPERMQ_0xD7,
720 "vpermq $0xD7, %%ymm6, %%ymm7",
721 "vpermq $0x6C, (%%rax), %%ymm7")
722 GEN_test_RandM(VPERMQ_0xB5,
723 "vpermq $0xB5, %%ymm6, %%ymm7",
724 "vpermq $0x4A, (%%rax), %%ymm7")
725 GEN_test_RandM(VPERMQ_0x85,
726 "vpermq $0x85, %%ymm6, %%ymm7",
727 "vpermq $0xDC, (%%rax), %%ymm7")
728 GEN_test_RandM(VPERMQ_0x29,
729 "vpermq $0x29, %%ymm6, %%ymm7",
730 "vpermq $0x92, (%%rax), %%ymm7")
731
732 GEN_test_RandM(VPERMPS,
733 "vpermps %%ymm6, %%ymm7, %%ymm9",
734 "vpermps (%%rax), %%ymm7, %%ymm9")
735
736 GEN_test_RandM(VPERMPD_0x00,
737 "vpermpd $0x00, %%ymm6, %%ymm7",
738 "vpermpd $0x01, (%%rax), %%ymm7")
739 GEN_test_RandM(VPERMPD_0xFE,
740 "vpermpd $0xFE, %%ymm6, %%ymm7",
741 "vpermpd $0xFF, (%%rax), %%ymm7")
742 GEN_test_RandM(VPERMPD_0x30,
743 "vpermpd $0x30, %%ymm6, %%ymm7",
744 "vpermpd $0x03, (%%rax), %%ymm7")
745 GEN_test_RandM(VPERMPD_0x21,
746 "vpermpd $0x21, %%ymm6, %%ymm7",
747 "vpermpd $0x12, (%%rax), %%ymm7")
748 GEN_test_RandM(VPERMPD_0xD7,
749 "vpermpd $0xD7, %%ymm6, %%ymm7",
750 "vpermpd $0x6C, (%%rax), %%ymm7")
751 GEN_test_RandM(VPERMPD_0xB5,
752 "vpermpd $0xB5, %%ymm6, %%ymm7",
753 "vpermpd $0x4A, (%%rax), %%ymm7")
754 GEN_test_RandM(VPERMPD_0x85,
755 "vpermpd $0x85, %%ymm6, %%ymm7",
756 "vpermpd $0xDC, (%%rax), %%ymm7")
757 GEN_test_RandM(VPERMPD_0x29,
758 "vpermpd $0x29, %%ymm6, %%ymm7",
759 "vpermpd $0x92, (%%rax), %%ymm7")
760
761 GEN_test_RandM(VPBLENDD_128_0x00,
762 "vpblendd $0x00, %%xmm6, %%xmm8, %%xmm7",
763 "vpblendd $0x01, (%%rax), %%xmm8, %%xmm7")
764 GEN_test_RandM(VPBLENDD_128_0x02,
765 "vpblendd $0x02, %%xmm6, %%xmm8, %%xmm7",
766 "vpblendd $0x03, (%%rax), %%xmm8, %%xmm7")
767 GEN_test_RandM(VPBLENDD_128_0x04,
768 "vpblendd $0x04, %%xmm6, %%xmm8, %%xmm7",
769 "vpblendd $0x05, (%%rax), %%xmm8, %%xmm7")
770 GEN_test_RandM(VPBLENDD_128_0x06,
771 "vpblendd $0x06, %%xmm6, %%xmm8, %%xmm7",
772 "vpblendd $0x07, (%%rax), %%xmm8, %%xmm7")
773 GEN_test_RandM(VPBLENDD_128_0x08,
774 "vpblendd $0x08, %%xmm6, %%xmm8, %%xmm7",
775 "vpblendd $0x09, (%%rax), %%xmm8, %%xmm7")
776 GEN_test_RandM(VPBLENDD_128_0x0A,
777 "vpblendd $0x0A, %%xmm6, %%xmm8, %%xmm7",
778 "vpblendd $0x0B, (%%rax), %%xmm8, %%xmm7")
779 GEN_test_RandM(VPBLENDD_128_0x0C,
780 "vpblendd $0x0C, %%xmm6, %%xmm8, %%xmm7",
781 "vpblendd $0x0D, (%%rax), %%xmm8, %%xmm7")
782 GEN_test_RandM(VPBLENDD_128_0x0E,
783 "vpblendd $0x0E, %%xmm6, %%xmm8, %%xmm7",
784 "vpblendd $0x0F, (%%rax), %%xmm8, %%xmm7")
785
786 GEN_test_RandM(VPBLENDD_256_0x00,
787 "vpblendd $0x00, %%ymm6, %%ymm8, %%ymm7",
788 "vpblendd $0x01, (%%rax), %%ymm8, %%ymm7")
789 GEN_test_RandM(VPBLENDD_256_0xFE,
790 "vpblendd $0xFE, %%ymm6, %%ymm8, %%ymm7",
791 "vpblendd $0xFF, (%%rax), %%ymm8, %%ymm7")
792 GEN_test_RandM(VPBLENDD_256_0x30,
793 "vpblendd $0x30, %%ymm6, %%ymm8, %%ymm7",
794 "vpblendd $0x03, (%%rax), %%ymm8, %%ymm7")
795 GEN_test_RandM(VPBLENDD_256_0x21,
796 "vpblendd $0x21, %%ymm6, %%ymm8, %%ymm7",
797 "vpblendd $0x12, (%%rax), %%ymm8, %%ymm7")
798 GEN_test_RandM(VPBLENDD_256_0xD7,
799 "vpblendd $0xD7, %%ymm6, %%ymm8, %%ymm7",
800 "vpblendd $0x6C, (%%rax), %%ymm8, %%ymm7")
801 GEN_test_RandM(VPBLENDD_256_0xB5,
802 "vpblendd $0xB5, %%ymm6, %%ymm8, %%ymm7",
803 "vpblendd $0x4A, (%%rax), %%ymm8, %%ymm7")
804 GEN_test_RandM(VPBLENDD_256_0x85,
805 "vpblendd $0x85, %%ymm6, %%ymm8, %%ymm7",
806 "vpblendd $0xDC, (%%rax), %%ymm8, %%ymm7")
807 GEN_test_RandM(VPBLENDD_256_0x29,
808 "vpblendd $0x29, %%ymm6, %%ymm8, %%ymm7",
809 "vpblendd $0x92, (%%rax), %%ymm8, %%ymm7")
810
811 GEN_test_RandM(VPSLLVD_128,
812 "vpslld $27, %%xmm6, %%xmm6;"
813 "vpsrld $27, %%xmm6, %%xmm6;"
814 "vpsllvd %%xmm6, %%xmm8, %%xmm7",
815 "andl $31, (%%rax);"
816 "andl $31, 4(%%rax);"
817 "andl $31, 8(%%rax);"
818 "vpsllvd (%%rax), %%xmm8, %%xmm7")
819
820 GEN_test_RandM(VPSLLVD_256,
821 "vpslld $27, %%ymm6, %%ymm6;"
822 "vpsrld $27, %%ymm6, %%ymm6;"
823 "vpsllvd %%ymm6, %%ymm8, %%ymm7",
824 "andl $31, (%%rax);"
825 "andl $31, 4(%%rax);"
826 "andl $31, 8(%%rax);"
827 "andl $31, 16(%%rax);"
828 "andl $31, 20(%%rax);"
829 "andl $31, 24(%%rax);"
830 "vpsllvd (%%rax), %%ymm8, %%ymm7")
831
832 GEN_test_RandM(VPSLLVQ_128,
833 "vpsllq $58, %%xmm6, %%xmm6;"
834 "vpsrlq $58, %%xmm6, %%xmm6;"
835 "vpsllvq %%xmm6, %%xmm8, %%xmm7",
836 "andl $63, (%%rax);"
837 "vpsllvq (%%rax), %%xmm8, %%xmm7")
838
839 GEN_test_RandM(VPSLLVQ_256,
840 "vpsllq $58, %%ymm6, %%ymm6;"
841 "vpsrlq $58, %%ymm6, %%ymm6;"
842 "vpsllvq %%ymm6, %%ymm8, %%ymm7",
843 "andl $63, (%%rax);"
844 "andl $63, 8(%%rax);"
845 "andl $63, 16(%%rax);"
846 "vpsllvq (%%rax), %%ymm8, %%ymm7")
847
848 GEN_test_RandM(VPSRLVD_128,
849 "vpslld $27, %%xmm6, %%xmm6;"
850 "vpsrld $27, %%xmm6, %%xmm6;"
851 "vpsrlvd %%xmm6, %%xmm8, %%xmm7",
852 "andl $31, (%%rax);"
853 "andl $31, 4(%%rax);"
854 "andl $31, 8(%%rax);"
855 "vpsrlvd (%%rax), %%xmm8, %%xmm7")
856
857 GEN_test_RandM(VPSRLVD_256,
858 "vpslld $27, %%ymm6, %%ymm6;"
859 "vpsrld $27, %%ymm6, %%ymm6;"
860 "vpsrlvd %%ymm6, %%ymm8, %%ymm7",
861 "andl $31, (%%rax);"
862 "andl $31, 4(%%rax);"
863 "andl $31, 8(%%rax);"
864 "andl $31, 16(%%rax);"
865 "andl $31, 20(%%rax);"
866 "andl $31, 24(%%rax);"
867 "vpsrlvd (%%rax), %%ymm8, %%ymm7")
868
869 GEN_test_RandM(VPSRLVQ_128,
870 "vpsllq $58, %%xmm6, %%xmm6;"
871 "vpsrlq $58, %%xmm6, %%xmm6;"
872 "vpsrlvq %%xmm6, %%xmm8, %%xmm7",
873 "andl $63, (%%rax);"
874 "vpsrlvq (%%rax), %%xmm8, %%xmm7")
875
876 GEN_test_RandM(VPSRLVQ_256,
877 "vpsllq $58, %%ymm6, %%ymm6;"
878 "vpsrlq $58, %%ymm6, %%ymm6;"
879 "vpsrlvq %%ymm6, %%ymm8, %%ymm7",
880 "andl $63, (%%rax);"
881 "andl $63, 8(%%rax);"
882 "andl $63, 16(%%rax);"
883 "vpsrlvq (%%rax), %%ymm8, %%ymm7")
884
885 GEN_test_RandM(VPSRAVD_128,
886 "vpslld $27, %%xmm6, %%xmm6;"
887 "vpsrld $27, %%xmm6, %%xmm6;"
888 "vpsravd %%xmm6, %%xmm8, %%xmm7",
889 "andl $31, (%%rax);"
890 "andl $31, 4(%%rax);"
891 "andl $31, 8(%%rax);"
892 "vpsravd (%%rax), %%xmm8, %%xmm7")
893
894 GEN_test_RandM(VPSRAVD_256,
895 "vpslld $27, %%ymm6, %%ymm6;"
896 "vpsrld $27, %%ymm6, %%ymm6;"
897 "vpsravd %%ymm6, %%ymm8, %%ymm7",
898 "andl $31, (%%rax);"
899 "andl $31, 4(%%rax);"
900 "andl $31, 8(%%rax);"
901 "andl $31, 16(%%rax);"
902 "andl $31, 20(%%rax);"
903 "andl $31, 24(%%rax);"
904 "vpsravd (%%rax), %%ymm8, %%ymm7")
905
906 GEN_test_RandM(VPBROADCASTB_128,
907 "vpbroadcastb %%xmm9, %%xmm7",
908 "vpbroadcastb (%%rax), %%xmm7")
909
910 GEN_test_RandM(VPBROADCASTB_256,
911 "vpbroadcastb %%xmm9, %%ymm7",
912 "vpbroadcastb (%%rax), %%ymm7")
913
914 GEN_test_RandM(VPBROADCASTW_128,
915 "vpbroadcastw %%xmm9, %%xmm7",
916 "vpbroadcastw (%%rax), %%xmm7")
917
918 GEN_test_RandM(VPBROADCASTW_256,
919 "vpbroadcastw %%xmm9, %%ymm7",
920 "vpbroadcastw (%%rax), %%ymm7")
921
922 GEN_test_RandM(VPBROADCASTD_128,
923 "vpbroadcastd %%xmm9, %%xmm7",
924 "vpbroadcastd (%%rax), %%xmm7")
925
926 GEN_test_RandM(VPBROADCASTD_256,
927 "vpbroadcastd %%xmm9, %%ymm7",
928 "vpbroadcastd (%%rax), %%ymm7")
929
930 GEN_test_RandM(VPBROADCASTQ_128,
931 "vpbroadcastq %%xmm9, %%xmm7",
932 "vpbroadcastq (%%rax), %%xmm7")
933
934 GEN_test_RandM(VPBROADCASTQ_256,
935 "vpbroadcastq %%xmm9, %%ymm7",
936 "vpbroadcastq (%%rax), %%ymm7")
937
938 GEN_test_Monly(VPMASKMOVD_128_LoadForm,
939 "vpmaskmovd (%%rax), %%xmm8, %%xmm7;"
940 "vxorps %%xmm6, %%xmm6, %%xmm6;"
941 "vpmaskmovd (%%rax,%%rax,4), %%xmm6, %%xmm9")
942
943 GEN_test_Monly(VPMASKMOVD_256_LoadForm,
944 "vpmaskmovd (%%rax), %%ymm8, %%ymm7;"
945 "vxorps %%ymm6, %%ymm6, %%ymm6;"
946 "vpmaskmovd (%%rax,%%rax,4), %%ymm6, %%ymm9")
947
948 GEN_test_Monly(VPMASKMOVQ_128_LoadForm,
949 "vpmaskmovq (%%rax), %%xmm8, %%xmm7;"
950 "vxorpd %%xmm6, %%xmm6, %%xmm6;"
951 "vpmaskmovq (%%rax,%%rax,4), %%xmm6, %%xmm9")
952
953 GEN_test_Monly(VPMASKMOVQ_256_LoadForm,
954 "vpmaskmovq (%%rax), %%ymm8, %%ymm7;"
955 "vxorpd %%ymm6, %%ymm6, %%ymm6;"
956 "vpmaskmovq (%%rax,%%rax,4), %%ymm6, %%ymm9")
957
958 GEN_test_Monly(VPMASKMOVD_128_StoreForm,
959 "vpmaskmovd %%xmm8, %%xmm7, (%%rax);"
960 "vxorps %%xmm6, %%xmm6, %%xmm6;"
961 "vpmaskmovd %%xmm9, %%xmm6, (%%rax,%%rax,4)")
962
963 GEN_test_Monly(VPMASKMOVD_256_StoreForm,
964 "vpmaskmovd %%ymm8, %%ymm7, (%%rax);"
965 "vxorps %%ymm6, %%ymm6, %%ymm6;"
966 "vpmaskmovd %%ymm9, %%ymm6, (%%rax,%%rax,4)")
967
968 GEN_test_Monly(VPMASKMOVQ_128_StoreForm,
969 "vpmaskmovq %%xmm8, %%xmm7, (%%rax);"
970 "vxorpd %%xmm6, %%xmm6, %%xmm6;"
971 "vpmaskmovq %%xmm9, %%xmm6, (%%rax,%%rax,4)")
972
973 GEN_test_Monly(VPMASKMOVQ_256_StoreForm,
974 "vpmaskmovq %%ymm8, %%ymm7, (%%rax);"
975 "vxorpd %%ymm6, %%ymm6, %%ymm6;"
976 "vpmaskmovq %%ymm9, %%ymm6, (%%rax,%%rax,4)")
977
978 GEN_test_Ronly(VGATHERDPS_128,
979 "vpslld $25, %%xmm7, %%xmm8;"
980 "vpsrld $25, %%xmm8, %%xmm8;"
981 "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
982 "leaq _randArray(%%rip), %%r14;"
983 "vgatherdps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
984 "xorl %%r14d, %%r14d")
985
986 GEN_test_Ronly(VGATHERDPS_256,
987 "vpslld $25, %%ymm7, %%ymm8;"
988 "vpsrld $25, %%ymm8, %%ymm8;"
989 "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
990 "leaq _randArray(%%rip), %%r14;"
991 "vgatherdps %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;"
992 "xorl %%r14d, %%r14d")
993
994 GEN_test_Ronly(VGATHERQPS_128_1,
995 "vpsllq $57, %%xmm7, %%xmm8;"
996 "vpsrlq $57, %%xmm8, %%xmm8;"
997 "vpmovsxdq %%xmm6, %%xmm9;"
998 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
999 "vmovdqa 96(%0), %%ymm9;"
1000 "leaq _randArray(%%rip), %%r14;"
1001 "vgatherqps %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
1002 "xorl %%r14d, %%r14d")
1003
1004 GEN_test_Ronly(VGATHERQPS_256_1,
1005 "vpsllq $57, %%ymm7, %%ymm8;"
1006 "vpsrlq $57, %%ymm8, %%ymm8;"
1007 "vpmovsxdq %%xmm6, %%ymm9;"
1008 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1009 "vmovdqa 96(%0), %%ymm9;"
1010 "leaq _randArray(%%rip), %%r14;"
1011 "vgatherqps %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;"
1012 "xorl %%r14d, %%r14d")
1013
1014 GEN_test_Ronly(VGATHERQPS_128_2,
1015 "vpsllq $57, %%xmm7, %%xmm8;"
1016 "vpsrlq $57, %%xmm8, %%xmm8;"
1017 "vpmovsxdq %%xmm6, %%xmm9;"
1018 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1019 "vmovdqa 96(%0), %%ymm9;"
1020 "leaq _randArray(%%rip), %%r14;"
1021 "vmovq %%r14, %%xmm7;"
1022 "vpsllq $2, %%xmm8, %%xmm8;"
1023 "vpbroadcastq %%xmm7, %%xmm7;"
1024 "vpaddq %%xmm7, %%xmm8, %%xmm8;"
1025 "vgatherqps %%xmm6, 1(,%%xmm8,1), %%xmm9;"
1026 "vpsubq %%xmm7, %%xmm8, %%xmm8;"
1027 "vmovdqa 0(%0), %%ymm7;"
1028 "xorl %%r14d, %%r14d")
1029
1030 GEN_test_Ronly(VGATHERQPS_256_2,
1031 "vpsllq $57, %%ymm7, %%ymm8;"
1032 "vpsrlq $57, %%ymm8, %%ymm8;"
1033 "vpmovsxdq %%xmm6, %%ymm9;"
1034 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1035 "vmovdqa 96(%0), %%ymm9;"
1036 "leaq _randArray(%%rip), %%r14;"
1037 "vmovq %%r14, %%xmm7;"
1038 "vpsllq $2, %%ymm8, %%ymm8;"
1039 "vpbroadcastq %%xmm7, %%ymm7;"
1040 "vpaddq %%ymm7, %%ymm8, %%ymm8;"
1041 "vgatherqps %%xmm6, 1(,%%ymm8,1), %%xmm9;"
1042 "vpsubq %%ymm7, %%ymm8, %%ymm8;"
1043 "vmovdqa 0(%0), %%ymm7;"
1044 "xorl %%r14d, %%r14d")
1045
1046 GEN_test_Ronly(VGATHERDPD_128,
1047 "vpslld $26, %%xmm7, %%xmm8;"
1048 "vpsrld $26, %%xmm8, %%xmm8;"
1049 "vshufps $13, %%xmm6, %%xmm6, %%xmm9;"
1050 "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1051 "vmovdqa 96(%0), %%ymm9;"
1052 "leaq _randArray(%%rip), %%r14;"
1053 "vgatherdpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
1054 "xorl %%r14d, %%r14d")
1055
1056 GEN_test_Ronly(VGATHERDPD_256,
1057 "vpslld $26, %%ymm7, %%ymm8;"
1058 "vpsrld $26, %%ymm8, %%ymm8;"
1059 "vextracti128 $1, %%ymm6, %%xmm9;"
1060 "vshufps $221, %%ymm9, %%ymm6, %%ymm9;"
1061 "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1062 "vmovdqa 96(%0), %%ymm9;"
1063 "leaq _randArray(%%rip), %%r14;"
1064 "vgatherdpd %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;"
1065 "xorl %%r14d, %%r14d")
1066
1067 GEN_test_Ronly(VGATHERQPD_128_1,
1068 "vpsllq $58, %%xmm7, %%xmm8;"
1069 "vpsrlq $58, %%xmm8, %%xmm8;"
1070 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1071 "leaq _randArray(%%rip), %%r14;"
1072 "vgatherqpd %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
1073 "xorl %%r14d, %%r14d")
1074
1075 GEN_test_Ronly(VGATHERQPD_256_1,
1076 "vpsllq $58, %%ymm7, %%ymm8;"
1077 "vpsrlq $58, %%ymm8, %%ymm8;"
1078 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1079 "leaq _randArray(%%rip), %%r14;"
1080 "vgatherqpd %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;"
1081 "xorl %%r14d, %%r14d")
1082
1083 GEN_test_Ronly(VGATHERQPD_128_2,
1084 "vpsllq $58, %%xmm7, %%xmm8;"
1085 "vpsrlq $58, %%xmm8, %%xmm8;"
1086 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1087 "leaq _randArray(%%rip), %%r14;"
1088 "vmovq %%r14, %%xmm7;"
1089 "vpsllq $2, %%xmm8, %%xmm8;"
1090 "vpbroadcastq %%xmm7, %%xmm7;"
1091 "vpaddq %%xmm7, %%xmm8, %%xmm8;"
1092 "vgatherqpd %%xmm6, 1(,%%xmm8,1), %%xmm9;"
1093 "vpsubq %%xmm7, %%xmm8, %%xmm8;"
1094 "vmovdqa 0(%0), %%ymm7;"
1095 "xorl %%r14d, %%r14d")
1096
1097 GEN_test_Ronly(VGATHERQPD_256_2,
1098 "vpsllq $58, %%ymm7, %%ymm8;"
1099 "vpsrlq $58, %%ymm8, %%ymm8;"
1100 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1101 "leaq _randArray(%%rip), %%r14;"
1102 "vmovq %%r14, %%xmm7;"
1103 "vpsllq $2, %%ymm8, %%ymm8;"
1104 "vpbroadcastq %%xmm7, %%ymm7;"
1105 "vpaddq %%ymm7, %%ymm8, %%ymm8;"
1106 "vgatherqpd %%ymm6, 1(,%%ymm8,1), %%ymm9;"
1107 "vpsubq %%ymm7, %%ymm8, %%ymm8;"
1108 "vmovdqa 0(%0), %%ymm7;"
1109 "xorl %%r14d, %%r14d")
1110
1111 GEN_test_Ronly(VPGATHERDD_128,
1112 "vpslld $25, %%xmm7, %%xmm8;"
1113 "vpsrld $25, %%xmm8, %%xmm8;"
1114 "vblendvps %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1115 "leaq _randArray(%%rip), %%r14;"
1116 "vpgatherdd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
1117 "xorl %%r14d, %%r14d")
1118
1119 GEN_test_Ronly(VPGATHERDD_256,
1120 "vpslld $25, %%ymm7, %%ymm8;"
1121 "vpsrld $25, %%ymm8, %%ymm8;"
1122 "vblendvps %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1123 "leaq _randArray(%%rip), %%r14;"
1124 "vpgatherdd %%ymm6, 3(%%r14,%%ymm8,4), %%ymm9;"
1125 "xorl %%r14d, %%r14d")
1126
1127 GEN_test_Ronly(VPGATHERQD_128_1,
1128 "vpsllq $57, %%xmm7, %%xmm8;"
1129 "vpsrlq $57, %%xmm8, %%xmm8;"
1130 "vpmovsxdq %%xmm6, %%xmm9;"
1131 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1132 "vmovdqa 96(%0), %%ymm9;"
1133 "leaq _randArray(%%rip), %%r14;"
1134 "vpgatherqd %%xmm6, 3(%%r14,%%xmm8,4), %%xmm9;"
1135 "xorl %%r14d, %%r14d")
1136
1137 GEN_test_Ronly(VPGATHERQD_256_1,
1138 "vpsllq $57, %%ymm7, %%ymm8;"
1139 "vpsrlq $57, %%ymm8, %%ymm8;"
1140 "vpmovsxdq %%xmm6, %%ymm9;"
1141 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1142 "vmovdqa 96(%0), %%ymm9;"
1143 "leaq _randArray(%%rip), %%r14;"
1144 "vpgatherqd %%xmm6, 3(%%r14,%%ymm8,4), %%xmm9;"
1145 "xorl %%r14d, %%r14d")
1146
1147 GEN_test_Ronly(VPGATHERQD_128_2,
1148 "vpsllq $57, %%xmm7, %%xmm8;"
1149 "vpsrlq $57, %%xmm8, %%xmm8;"
1150 "vpmovsxdq %%xmm6, %%xmm9;"
1151 "vblendvpd %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1152 "vmovdqa 96(%0), %%ymm9;"
1153 "leaq _randArray(%%rip), %%r14;"
1154 "vmovq %%r14, %%xmm7;"
1155 "vpsllq $2, %%xmm8, %%xmm8;"
1156 "vpbroadcastq %%xmm7, %%xmm7;"
1157 "vpaddq %%xmm7, %%xmm8, %%xmm8;"
1158 "vpgatherqd %%xmm6, 1(,%%xmm8,1), %%xmm9;"
1159 "vpsubq %%xmm7, %%xmm8, %%xmm8;"
1160 "vmovdqa 0(%0), %%ymm7;"
1161 "xorl %%r14d, %%r14d")
1162
1163 GEN_test_Ronly(VPGATHERQD_256_2,
1164 "vpsllq $57, %%ymm7, %%ymm8;"
1165 "vpsrlq $57, %%ymm8, %%ymm8;"
1166 "vpmovsxdq %%xmm6, %%ymm9;"
1167 "vblendvpd %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1168 "vmovdqa 96(%0), %%ymm9;"
1169 "leaq _randArray(%%rip), %%r14;"
1170 "vmovq %%r14, %%xmm7;"
1171 "vpsllq $2, %%ymm8, %%ymm8;"
1172 "vpbroadcastq %%xmm7, %%ymm7;"
1173 "vpaddq %%ymm7, %%ymm8, %%ymm8;"
1174 "vpgatherqd %%xmm6, 1(,%%ymm8,1), %%xmm9;"
1175 "vpsubq %%ymm7, %%ymm8, %%ymm8;"
1176 "vmovdqa 0(%0), %%ymm7;"
1177 "xorl %%r14d, %%r14d")
1178
1179 GEN_test_Ronly(VPGATHERDQ_128,
1180 "vpslld $26, %%xmm7, %%xmm8;"
1181 "vpsrld $26, %%xmm8, %%xmm8;"
1182 "vshufps $13, %%xmm6, %%xmm6, %%xmm9;"
1183 "vblendvps %%xmm9, %%xmm8, %%xmm7, %%xmm8;"
1184 "vmovdqa 96(%0), %%ymm9;"
1185 "leaq _randArray(%%rip), %%r14;"
1186 "vpgatherdq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
1187 "xorl %%r14d, %%r14d")
1188
1189 GEN_test_Ronly(VPGATHERDQ_256,
1190 "vpslld $26, %%ymm7, %%ymm8;"
1191 "vpsrld $26, %%ymm8, %%ymm8;"
1192 "vextracti128 $1, %%ymm6, %%xmm9;"
1193 "vshufps $221, %%ymm9, %%ymm6, %%ymm9;"
1194 "vblendvps %%ymm9, %%ymm8, %%ymm7, %%ymm8;"
1195 "vmovdqa 96(%0), %%ymm9;"
1196 "leaq _randArray(%%rip), %%r14;"
1197 "vpgatherdq %%ymm6, 3(%%r14,%%xmm8,8), %%ymm9;"
1198 "xorl %%r14d, %%r14d")
1199
1200 GEN_test_Ronly(VPGATHERQQ_128_1,
1201 "vpsllq $58, %%xmm7, %%xmm8;"
1202 "vpsrlq $58, %%xmm8, %%xmm8;"
1203 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1204 "leaq _randArray(%%rip), %%r14;"
1205 "vpgatherqq %%xmm6, 3(%%r14,%%xmm8,8), %%xmm9;"
1206 "xorl %%r14d, %%r14d")
1207
1208 GEN_test_Ronly(VPGATHERQQ_256_1,
1209 "vpsllq $58, %%ymm7, %%ymm8;"
1210 "vpsrlq $58, %%ymm8, %%ymm8;"
1211 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1212 "leaq _randArray(%%rip), %%r14;"
1213 "vpgatherqq %%ymm6, 3(%%r14,%%ymm8,8), %%ymm9;"
1214 "xorl %%r14d, %%r14d")
1215
1216 GEN_test_Ronly(VPGATHERQQ_128_2,
1217 "vpsllq $58, %%xmm7, %%xmm8;"
1218 "vpsrlq $58, %%xmm8, %%xmm8;"
1219 "vblendvpd %%xmm6, %%xmm8, %%xmm7, %%xmm8;"
1220 "leaq _randArray(%%rip), %%r14;"
1221 "vmovq %%r14, %%xmm7;"
1222 "vpsllq $2, %%xmm8, %%xmm8;"
1223 "vpbroadcastq %%xmm7, %%xmm7;"
1224 "vpaddq %%xmm7, %%xmm8, %%xmm8;"
1225 "vpgatherqq %%xmm6, 1(,%%xmm8,1), %%xmm9;"
1226 "vpsubq %%xmm7, %%xmm8, %%xmm8;"
1227 "vmovdqa 0(%0), %%ymm7;"
1228 "xorl %%r14d, %%r14d")
1229
1230 GEN_test_Ronly(VPGATHERQQ_256_2,
1231 "vpsllq $58, %%ymm7, %%ymm8;"
1232 "vpsrlq $58, %%ymm8, %%ymm8;"
1233 "vblendvpd %%ymm6, %%ymm8, %%ymm7, %%ymm8;"
1234 "leaq _randArray(%%rip), %%r14;"
1235 "vmovq %%r14, %%xmm7;"
1236 "vpsllq $2, %%ymm8, %%ymm8;"
1237 "vpbroadcastq %%xmm7, %%ymm7;"
1238 "vpaddq %%ymm7, %%ymm8, %%ymm8;"
1239 "vpgatherqq %%ymm6, 1(,%%ymm8,1), %%ymm9;"
1240 "vpsubq %%ymm7, %%ymm8, %%ymm8;"
1241 "vmovdqa 0(%0), %%ymm7;"
1242 "xorl %%r14d, %%r14d")
1243
1244 /* Comment duplicated above, for convenient reference:
1245 Allowed operands in test insns:
1246 Reg form: %ymm6, %ymm7, %ymm8, %ymm9 and %r14.
1247 Mem form: (%rax), %ymm7, %ymm8, %ymm9 and %r14.
1248 Imm8 etc fields are also allowed, where they make sense.
1249 Both forms may use ymm0 as scratch. Mem form may also use
1250 ymm6 as scratch.
1251 */
1252
1253 #define N_DEFAULT_ITERS 3
1254
1255 // Do the specified test some number of times
1256 #define DO_N(_iters, _testfn) \
1257 do { int i; for (i = 0; i < (_iters); i++) { test_##_testfn(); } } while (0)
1258
1259 // Do the specified test the default number of times
1260 #define DO_D(_testfn) DO_N(N_DEFAULT_ITERS, _testfn)
1261
1262
main(void)1263 int main ( void )
1264 {
1265 DO_D( VPOR_256 );
1266 DO_D( VPXOR_256 );
1267 DO_D( VPSUBB_256 );
1268 DO_D( VPSUBD_256 );
1269 DO_D( VPADDD_256 );
1270 DO_D( VPMOVZXWD_256 );
1271 DO_D( VPMOVZXBW_256 );
1272 DO_D( VPBLENDVB_256 );
1273 DO_D( VPMINSD_256 );
1274 DO_D( VPMAXSD_256 );
1275 DO_D( VPSHUFB_256 );
1276 DO_D( VPUNPCKLBW_256 );
1277 DO_D( VPUNPCKHBW_256 );
1278 DO_D( VPABSD_256 );
1279 DO_D( VPACKUSWB_256 );
1280 DO_D( VPMOVMSKB_256 );
1281 DO_D( VPAND_256 );
1282 DO_D( VPCMPEQB_256 );
1283 DO_D( VPSHUFLW_0x39_256 );
1284 DO_D( VPSHUFHW_0x39_256 );
1285 DO_D( VPMULLW_256 );
1286 DO_D( VPADDUSW_256 );
1287 DO_D( VPMULHUW_256 );
1288 DO_D( VPADDUSB_256 );
1289 DO_D( VPUNPCKLWD_256 );
1290 DO_D( VPUNPCKHWD_256 );
1291 DO_D( VPSLLD_0x05_256 );
1292 DO_D( VPSRLD_0x05_256 );
1293 DO_D( VPSRAD_0x05_256 );
1294 DO_D( VPSUBUSB_256 );
1295 DO_D( VPSUBSB_256 );
1296 DO_D( VPSRLDQ_0x05_256 );
1297 DO_D( VPSLLDQ_0x05_256 );
1298 DO_D( VPANDN_256 );
1299 DO_D( VPUNPCKLQDQ_256 );
1300 DO_D( VPSRLW_0x05_256 );
1301 DO_D( VPSLLW_0x05_256 );
1302 DO_D( VPADDW_256 );
1303 DO_D( VPACKSSDW_256 );
1304 DO_D( VPUNPCKLDQ_256 );
1305 DO_D( VPCMPEQD_256 );
1306 DO_D( VPSHUFD_0x39_256 );
1307 DO_D( VPADDQ_256 );
1308 DO_D( VPSUBQ_256 );
1309 DO_D( VPSUBW_256 );
1310 DO_D( VPCMPEQQ_256 );
1311 DO_D( VPCMPGTQ_256 );
1312 DO_D( VPSRLQ_0x05_256 );
1313 DO_D( VPMULUDQ_256 );
1314 DO_D( VPMULDQ_256 );
1315 DO_D( VPSLLQ_0x05_256 );
1316 DO_D( VPMAXUD_256 );
1317 DO_D( VPMINUD_256 );
1318 DO_D( VPMULLD_256 );
1319 DO_D( VPMAXUW_256 );
1320 DO_D( VPMINUW_256 );
1321 DO_D( VPMAXSW_256 );
1322 DO_D( VPMINSW_256 );
1323 DO_D( VPMAXUB_256 );
1324 DO_D( VPMINUB_256 );
1325 DO_D( VPMAXSB_256 );
1326 DO_D( VPMINSB_256 );
1327 DO_D( VPMOVSXBW_256 );
1328 DO_D( VPSUBUSW_256 );
1329 DO_D( VPSUBSW_256 );
1330 DO_D( VPCMPEQW_256 );
1331 DO_D( VPADDB_256 );
1332 DO_D( VPUNPCKHDQ_256 );
1333 DO_D( VPMOVSXDQ_256 );
1334 DO_D( VPMOVSXWD_256 );
1335 DO_D( VPMULHW_256 );
1336 DO_D( VPUNPCKHQDQ_256 );
1337 DO_D( VPSRAW_0x05_256 );
1338 DO_D( VPCMPGTB_256 );
1339 DO_D( VPCMPGTW_256 );
1340 DO_D( VPCMPGTD_256 );
1341 DO_D( VPMOVZXBD_256 );
1342 DO_D( VPMOVSXBD_256 );
1343 DO_D( VPALIGNR_256_1of3 );
1344 DO_D( VPALIGNR_256_2of3 );
1345 DO_D( VPALIGNR_256_3of3 );
1346 DO_D( VPBLENDW_256_0x00 );
1347 DO_D( VPBLENDW_256_0xFE );
1348 DO_D( VPBLENDW_256_0x30 );
1349 DO_D( VPBLENDW_256_0x21 );
1350 DO_D( VPBLENDW_256_0xD7 );
1351 DO_D( VPBLENDW_256_0xB5 );
1352 DO_D( VPBLENDW_256_0x85 );
1353 DO_D( VPBLENDW_256_0x29 );
1354 DO_D( VPSLLW_256 );
1355 DO_D( VPSRLW_256 );
1356 DO_D( VPSRAW_256 );
1357 DO_D( VPSLLD_256 );
1358 DO_D( VPSRLD_256 );
1359 DO_D( VPSRAD_256 );
1360 DO_D( VPSLLQ_256 );
1361 DO_D( VPSRLQ_256 );
1362 DO_D( VPMADDWD_256 );
1363 DO_D( VMOVNTDQA_256 );
1364 DO_D( VPACKSSWB_256 );
1365 DO_D( VPAVGB_256 );
1366 DO_D( VPAVGW_256 );
1367 DO_D( VPADDSB_256 );
1368 DO_D( VPADDSW_256 );
1369 DO_D( VPHADDW_256 );
1370 DO_D( VPHADDD_256 );
1371 DO_D( VPHADDSW_256 );
1372 DO_D( VPMADDUBSW_256 );
1373 DO_D( VPHSUBW_256 );
1374 DO_D( VPHSUBD_256 );
1375 DO_D( VPHSUBSW_256 );
1376 DO_D( VPABSB_256 );
1377 DO_D( VPABSW_256 );
1378 DO_D( VPMOVSXBQ_256 );
1379 DO_D( VPMOVSXWQ_256 );
1380 DO_D( VPACKUSDW_256 );
1381 DO_D( VPMOVZXBQ_256 );
1382 DO_D( VPMOVZXWQ_256 );
1383 DO_D( VPMOVZXDQ_256 );
1384 DO_D( VMPSADBW_256_0x0 );
1385 DO_D( VMPSADBW_256_0x39 );
1386 DO_D( VMPSADBW_256_0x32 );
1387 DO_D( VMPSADBW_256_0x2b );
1388 DO_D( VMPSADBW_256_0x24 );
1389 DO_D( VMPSADBW_256_0x1d );
1390 DO_D( VMPSADBW_256_0x16 );
1391 DO_D( VMPSADBW_256_0x0f );
1392 DO_D( VPSADBW_256 );
1393 DO_D( VPSIGNB_256 );
1394 DO_D( VPSIGNW_256 );
1395 DO_D( VPSIGND_256 );
1396 DO_D( VPMULHRSW_256 );
1397 DO_D( VBROADCASTI128 );
1398 DO_D( VEXTRACTI128_0x0 );
1399 DO_D( VEXTRACTI128_0x1 );
1400 DO_D( VINSERTI128_0x0 );
1401 DO_D( VINSERTI128_0x1 );
1402 DO_D( VPERM2I128_0x00 );
1403 DO_D( VPERM2I128_0xFF );
1404 DO_D( VPERM2I128_0x30 );
1405 DO_D( VPERM2I128_0x21 );
1406 DO_D( VPERM2I128_0x12 );
1407 DO_D( VPERM2I128_0x03 );
1408 DO_D( VPERM2I128_0x85 );
1409 DO_D( VPERM2I128_0x5A );
1410 DO_D( VBROADCASTSS_128 );
1411 DO_D( VBROADCASTSS_256 );
1412 DO_D( VBROADCASTSD_256 );
1413 DO_D( VPERMD );
1414 DO_D( VPERMQ_0x00 );
1415 DO_D( VPERMQ_0xFE );
1416 DO_D( VPERMQ_0x30 );
1417 DO_D( VPERMQ_0x21 );
1418 DO_D( VPERMQ_0xD7 );
1419 DO_D( VPERMQ_0xB5 );
1420 DO_D( VPERMQ_0x85 );
1421 DO_D( VPERMQ_0x29 );
1422 DO_D( VPERMPS );
1423 DO_D( VPERMPD_0x00 );
1424 DO_D( VPERMPD_0xFE );
1425 DO_D( VPERMPD_0x30 );
1426 DO_D( VPERMPD_0x21 );
1427 DO_D( VPERMPD_0xD7 );
1428 DO_D( VPERMPD_0xB5 );
1429 DO_D( VPERMPD_0x85 );
1430 DO_D( VPERMPD_0x29 );
1431 DO_D( VPBLENDD_128_0x00 );
1432 DO_D( VPBLENDD_128_0x02 );
1433 DO_D( VPBLENDD_128_0x04 );
1434 DO_D( VPBLENDD_128_0x06 );
1435 DO_D( VPBLENDD_128_0x08 );
1436 DO_D( VPBLENDD_128_0x0A );
1437 DO_D( VPBLENDD_128_0x0C );
1438 DO_D( VPBLENDD_128_0x0E );
1439 DO_D( VPBLENDD_256_0x00 );
1440 DO_D( VPBLENDD_256_0xFE );
1441 DO_D( VPBLENDD_256_0x30 );
1442 DO_D( VPBLENDD_256_0x21 );
1443 DO_D( VPBLENDD_256_0xD7 );
1444 DO_D( VPBLENDD_256_0xB5 );
1445 DO_D( VPBLENDD_256_0x85 );
1446 DO_D( VPBLENDD_256_0x29 );
1447 DO_D( VPSLLVD_128 );
1448 DO_D( VPSLLVD_256 );
1449 DO_D( VPSLLVQ_128 );
1450 DO_D( VPSLLVQ_256 );
1451 DO_D( VPSRLVD_128 );
1452 DO_D( VPSRLVD_256 );
1453 DO_D( VPSRLVQ_128 );
1454 DO_D( VPSRLVQ_256 );
1455 DO_D( VPSRAVD_128 );
1456 DO_D( VPSRAVD_256 );
1457 DO_D( VPBROADCASTB_128 );
1458 DO_D( VPBROADCASTB_256 );
1459 DO_D( VPBROADCASTW_128 );
1460 DO_D( VPBROADCASTW_256 );
1461 DO_D( VPBROADCASTD_128 );
1462 DO_D( VPBROADCASTD_256 );
1463 DO_D( VPBROADCASTQ_128 );
1464 DO_D( VPBROADCASTQ_256 );
1465 DO_D( VPMASKMOVD_128_LoadForm );
1466 DO_D( VPMASKMOVD_256_LoadForm );
1467 DO_D( VPMASKMOVQ_128_LoadForm );
1468 DO_D( VPMASKMOVQ_256_LoadForm );
1469 DO_D( VPMASKMOVD_128_StoreForm );
1470 DO_D( VPMASKMOVD_256_StoreForm );
1471 DO_D( VPMASKMOVQ_128_StoreForm );
1472 DO_D( VPMASKMOVQ_256_StoreForm );
1473 #if defined(VGO_darwin)
1474 { int i; for (i = 0; i < sizeof(randArray); i++) randArray[i] = randUChar(); }
1475 #else
1476 { int i; for (i = 0; i < sizeof(_randArray); i++) _randArray[i] = randUChar(); }
1477 #endif
1478 DO_D( VGATHERDPS_128 );
1479 DO_D( VGATHERDPS_256 );
1480 DO_D( VGATHERQPS_128_1 );
1481 DO_D( VGATHERQPS_256_1 );
1482 DO_D( VGATHERQPS_128_2 );
1483 DO_D( VGATHERQPS_256_2 );
1484 DO_D( VGATHERDPD_128 );
1485 DO_D( VGATHERDPD_256 );
1486 DO_D( VGATHERQPD_128_1 );
1487 DO_D( VGATHERQPD_256_1 );
1488 DO_D( VGATHERQPD_128_2 );
1489 DO_D( VGATHERQPD_256_2 );
1490 DO_D( VPGATHERDD_128 );
1491 DO_D( VPGATHERDD_256 );
1492 DO_D( VPGATHERQD_128_1 );
1493 DO_D( VPGATHERQD_256_1 );
1494 DO_D( VPGATHERQD_128_2 );
1495 DO_D( VPGATHERQD_256_2 );
1496 DO_D( VPGATHERDQ_128 );
1497 DO_D( VPGATHERDQ_256 );
1498 DO_D( VPGATHERQQ_128_1 );
1499 DO_D( VPGATHERQQ_256_1 );
1500 DO_D( VPGATHERQQ_128_2 );
1501 DO_D( VPGATHERQQ_256_2 );
1502 return 0;
1503 }
1504