1
2 /* Test for a number of SSE instructions which were seen in the wild
3 with a bogus (irrelevant) REX.W bit in their prefixes. Some just
4 have REX = 0x48 where REX.W is irrelevant, hence the whole REX
5 prefix is pointless. Probably related to #133962. */
6
7 #include <stdlib.h>
8 #include <stdio.h>
9 #include <assert.h>
10 #include "tests/malloc.h"
11
12 typedef unsigned char UChar;
13
14 typedef
15 struct { __attribute__((aligned(16))) UChar b[16]; }
16 UWord128;
17
18 typedef
19 struct { UWord128 reg[16]; }
20 XMMRegs;
21
22 typedef
23 struct { UWord128 dqw[5]; }
24 Mem;
25
pp_UWord128(UWord128 * w)26 void pp_UWord128 ( UWord128* w ) {
27 int i;
28 char buf[3];
29 for (i = 15; i >= 0; i--) {
30 buf[2] = 0;
31 sprintf(buf, "%02x", (unsigned int)w->b[i]);
32 assert(buf[2] == 0);
33 if (buf[0] == '0') buf[0] = '.';
34 if (buf[1] == '0') buf[1] = '.';
35 printf("%s", buf);
36 }
37 }
38
pp_XMMRegs(char * who,XMMRegs * regs)39 void pp_XMMRegs ( char* who, XMMRegs* regs ) {
40 int i;
41 printf ("%s (xmms in order [15..0]) {\n", who );
42 for (i = 0; i < 16; i++) {
43 printf(" %%xmm%2d ", i);
44 pp_UWord128( ®s->reg[i] );
45 printf("\n");
46 }
47 printf("}\n");
48 }
49
pp_Mem(char * who,Mem * mem)50 void pp_Mem ( char* who, Mem* mem ) {
51 int i;
52 printf ("%s (dqws in order [15 .. 0]) {\n", who );
53 for (i = 0; i < 5; i++) {
54 printf(" [%d] ", i);
55 pp_UWord128( &mem->dqw[i] );
56 printf("\n");
57 }
58 printf("}\n");
59 }
60
xor_UWord128(UWord128 * src,UWord128 * dst)61 void xor_UWord128( UWord128* src, UWord128* dst ) {
62 int i;
63 for (i = 0; i < 16; i++)
64 dst->b[i] ^= src->b[i];
65 }
xor_XMMRegs(XMMRegs * src,XMMRegs * dst)66 void xor_XMMRegs ( XMMRegs* src, XMMRegs* dst ) {
67 int i;
68 for (i = 0; i < 16; i++)
69 xor_UWord128( &src->reg[i], &dst->reg[i] );
70 }
71
xor_Mem(Mem * src,Mem * dst)72 void xor_Mem ( Mem* src, Mem* dst ) {
73 int i;
74 for (i = 0; i < 5; i++)
75 xor_UWord128( &src->dqw[i], &dst->dqw[i] );
76 }
77
setup_regs_mem(XMMRegs * regs,Mem * mem)78 void setup_regs_mem ( XMMRegs* regs, Mem* mem ) {
79 int ctr, i, j;
80 ctr = 0;
81 for (i = 0; i < 16; i++) {
82 for (j = 0; j < 16; j++)
83 regs->reg[i].b[j] = 0x51 + (ctr++ % 7);
84 }
85 for (i = 0; i < 5; i++) {
86 for (j = 0; j < 16; j++)
87 mem->dqw[i].b[j] = 0x52 + (ctr++ % 13);
88 }
89 }
90
before_test(XMMRegs * regs,Mem * mem)91 void before_test ( XMMRegs* regs, Mem* mem ) {
92 setup_regs_mem( regs, mem );
93 }
94
after_test(char * who,XMMRegs * regs,Mem * mem)95 void after_test ( char* who, XMMRegs* regs, Mem* mem ) {
96 XMMRegs rdiff;
97 Mem mdiff;
98 char s[128];
99 setup_regs_mem( &rdiff, &mdiff );
100 xor_XMMRegs( regs, &rdiff );
101 xor_Mem( mem, &mdiff );
102 sprintf(s, "after \"%s\"", who );
103 pp_Mem( s, &mdiff );
104 pp_XMMRegs( s, &rdiff );
105 printf("\n");
106 }
107
108 #define LOAD_XMMREGS_from_r14 \
109 "\tmovupd 0(%%r14), %%xmm0\n" \
110 "\tmovupd 16(%%r14), %%xmm1\n" \
111 "\tmovupd 32(%%r14), %%xmm2\n" \
112 "\tmovupd 48(%%r14), %%xmm3\n" \
113 "\tmovupd 64(%%r14), %%xmm4\n" \
114 "\tmovupd 80(%%r14), %%xmm5\n" \
115 "\tmovupd 96(%%r14), %%xmm6\n" \
116 "\tmovupd 112(%%r14), %%xmm7\n" \
117 "\tmovupd 128(%%r14), %%xmm8\n" \
118 "\tmovupd 144(%%r14), %%xmm9\n" \
119 "\tmovupd 160(%%r14), %%xmm10\n" \
120 "\tmovupd 176(%%r14), %%xmm11\n" \
121 "\tmovupd 192(%%r14), %%xmm12\n" \
122 "\tmovupd 208(%%r14), %%xmm13\n" \
123 "\tmovupd 224(%%r14), %%xmm14\n" \
124 "\tmovupd 240(%%r14), %%xmm15\n"
125
126 #define SAVE_XMMREGS_to_r14 \
127 "\tmovupd %%xmm0, 0(%%r14)\n" \
128 "\tmovupd %%xmm1, 16(%%r14)\n" \
129 "\tmovupd %%xmm2, 32(%%r14)\n" \
130 "\tmovupd %%xmm3, 48(%%r14)\n" \
131 "\tmovupd %%xmm4, 64(%%r14)\n" \
132 "\tmovupd %%xmm5, 80(%%r14)\n" \
133 "\tmovupd %%xmm6, 96(%%r14)\n" \
134 "\tmovupd %%xmm7, 112(%%r14)\n" \
135 "\tmovupd %%xmm8, 128(%%r14)\n" \
136 "\tmovupd %%xmm9, 144(%%r14)\n" \
137 "\tmovupd %%xmm10, 160(%%r14)\n" \
138 "\tmovupd %%xmm11, 176(%%r14)\n" \
139 "\tmovupd %%xmm12, 192(%%r14)\n" \
140 "\tmovupd %%xmm13, 208(%%r14)\n" \
141 "\tmovupd %%xmm14, 224(%%r14)\n" \
142 "\tmovupd %%xmm15, 240(%%r14)"
143
144 #define XMMREGS \
145 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", \
146 "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
147
148 #if 0
149 /* Boilerplate for test */
150 {
151 before_test( regs, mem );
152 __asm__ __volatile__(
153 "movq %0, %%r14\n"
154 "\tmovq %1, %%r15\n"
155 LOAD_XMMREGS_from_r14
156 "\tmovq %%r15, %%rx\n"
157 "\t.byte 0x\n"
158 SAVE_XMMREGS_to_r14
159 : /*out*/ : /*in*/ "r"(regs), "r"( -x + (char*)&mem->dqw[2] )
160 : /*trash*/ "r14","r15","memory", XMMREGS,
161 "x"
162 );
163 after_test( "", regs, mem );
164 }
165 #endif
166
main(void)167 int main ( void )
168 {
169 XMMRegs* regs;
170 Mem* mem;
171 regs = memalign16(sizeof(XMMRegs) + 16);
172 mem = memalign16(sizeof(Mem) + 16);
173
174 /* addpd mem, reg 66 49 0f 58 48 00 rex.WB addpd 0x0(%r8),%xmm1 */
175 {
176 before_test( regs, mem );
177 __asm__ __volatile__(
178 "movq %0, %%r14\n"
179 "\tmovq %1, %%r15\n"
180 LOAD_XMMREGS_from_r14
181 "\tmovq %%r15, %%r8\n"
182 "\t.byte 0x66,0x49,0x0f,0x58,0x48,0x00\n"
183 SAVE_XMMREGS_to_r14
184 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
185 : /*trash*/ "r14","r15","memory", XMMREGS,
186 "r8"
187 );
188 after_test( "rex.WB addpd 0x0(%r8),%xmm1", regs, mem );
189 }
190
191 /* addsd mem, reg f2 48 0f 58 27 rex.W addsd (%rdi),%xmm4 */
192 {
193 before_test( regs, mem );
194 __asm__ __volatile__(
195 "movq %0, %%r14\n"
196 "\tmovq %1, %%r15\n"
197 LOAD_XMMREGS_from_r14
198 "\tmovq %%r15, %%rdi\n"
199 "\t.byte 0xf2,0x48,0x0f,0x58,0x27\n"
200 SAVE_XMMREGS_to_r14
201 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
202 : /*trash*/ "r14","r15","memory", XMMREGS,
203 "rdi"
204 );
205 after_test( "rex.W addsd (%rdi),%xmm4", regs, mem );
206 }
207
208 /* movapd mem, reg 66 48 0f 28 0a rex.W movapd (%rdx),%xmm1 */
209 {
210 before_test( regs, mem );
211 __asm__ __volatile__(
212 "movq %0, %%r14\n"
213 "\tmovq %1, %%r15\n"
214 LOAD_XMMREGS_from_r14
215 "\tmovq %%r15, %%rdx\n"
216 "\t.byte 0x66,0x48,0x0f,0x28,0x0a\n"
217 SAVE_XMMREGS_to_r14
218 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
219 : /*trash*/ "r14","r15","memory", XMMREGS,
220 "rdx"
221 );
222 after_test( "rex.W movapd (%rdx),%xmm1", regs, mem );
223 }
224
225 /* movapd reg, mem 66 48 0f 29 0a rex.W movapd %xmm1,(%rdx) */
226 {
227 before_test( regs, mem );
228 __asm__ __volatile__(
229 "movq %0, %%r14\n"
230 "\tmovq %1, %%r15\n"
231 LOAD_XMMREGS_from_r14
232 "\tmovq %%r15, %%rdx\n"
233 "\t.byte 0x66,0x48,0x0f,0x29,0x0a\n"
234 SAVE_XMMREGS_to_r14
235 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
236 : /*trash*/ "r14","r15","memory", XMMREGS,
237 "rdx"
238 );
239 after_test( "rex.W movapd %xmm1,(%rdx)", regs, mem );
240 }
241
242 /* movaps mem, reg 48 0f 28 42 30 rex.W movaps 0x30(%rdx),%xmm0 */
243 {
244 before_test( regs, mem );
245 __asm__ __volatile__(
246 "movq %0, %%r14\n"
247 "\tmovq %1, %%r15\n"
248 LOAD_XMMREGS_from_r14
249 "\tmovq %%r15, %%rdx\n"
250 "\t.byte 0x48,0x0f,0x28,0x42,0x30\n"
251 SAVE_XMMREGS_to_r14
252 : /*out*/ : /*in*/ "r"(regs), "r"( -0x30 + (char*)&mem->dqw[2] )
253 : /*trash*/ "r14","r15","memory", XMMREGS,
254 "rdx"
255 );
256 after_test( "movaps 0x30(%rdx),%xmm0", regs, mem );
257 }
258
259 /* movaps reg, mem 49 0f 29 48 00 rex.WB movaps %xmm1,0x0(%r8) */
260 {
261 before_test( regs, mem );
262 __asm__ __volatile__(
263 "movq %0, %%r14\n"
264 "\tmovq %1, %%r15\n"
265 LOAD_XMMREGS_from_r14
266 "\tmovq %%r15, %%r8\n"
267 "\t.byte 0x49,0x0f,0x29,0x48,0x00\n"
268 SAVE_XMMREGS_to_r14
269 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
270 : /*trash*/ "r14","r15","memory", XMMREGS,
271 "r8"
272 );
273 after_test( "rex.WB movaps %xmm1,0x0(%r8)", regs, mem );
274 }
275
276 /* movddup mem, reg f2 48 0f 12 2a rex.W movddup (%rdx),%xmm5 */
277 {
278 before_test( regs, mem );
279 __asm__ __volatile__(
280 "movq %0, %%r14\n"
281 "\tmovq %1, %%r15\n"
282 LOAD_XMMREGS_from_r14
283 "\tmovq %%r15, %%rdx\n"
284 "\t.byte 0xf2,0x48,0x0f,0x12,0x2a\n"
285 SAVE_XMMREGS_to_r14
286 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
287 : /*trash*/ "r14","r15","memory", XMMREGS,
288 "rdx"
289 );
290 after_test( "movddup (%rdx),%xmm5", regs, mem );
291 }
292
293 /* movhpd mem, reg 66 48 0f 16 06 rex.W movhpd (%rsi),%xmm0 */
294 {
295 before_test( regs, mem );
296 __asm__ __volatile__(
297 "movq %0, %%r14\n"
298 "\tmovq %1, %%r15\n"
299 LOAD_XMMREGS_from_r14
300 "\tmovq %%r15, %%rsi\n"
301 "\t.byte 0x66,0x48,0x0f,0x16,0x06\n"
302 SAVE_XMMREGS_to_r14
303 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
304 : /*trash*/ "r14","r15","memory", XMMREGS,
305 "rsi"
306 );
307 after_test( "rex.W movhpd (%rsi),%xmm0", regs, mem );
308 }
309
310 /* movhpd reg, mem 66 48 0f 17 07 rex.W movhpd %xmm0,(%rdi) */
311 {
312 before_test( regs, mem );
313 __asm__ __volatile__(
314 "movq %0, %%r14\n"
315 "\tmovq %1, %%r15\n"
316 LOAD_XMMREGS_from_r14
317 "\tmovq %%r15, %%rdi\n"
318 "\t.byte 0x66,0x48,0x0f,0x17,0x07\n"
319 SAVE_XMMREGS_to_r14
320 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
321 : /*trash*/ "r14","r15","memory", XMMREGS,
322 "rdi"
323 );
324 after_test( "rex.W movhpd %xmm0,(%rdi)", regs, mem );
325 }
326
327 /* movhps mem, reg 48 0f 16 36 rex.W movhps (%rsi),%xmm6 */
328 {
329 before_test( regs, mem );
330 __asm__ __volatile__(
331 "movq %0, %%r14\n"
332 "\tmovq %1, %%r15\n"
333 LOAD_XMMREGS_from_r14
334 "\tmovq %%r15, %%rsi\n"
335 "\t.byte 0x48,0x0f,0x16,0x36\n"
336 SAVE_XMMREGS_to_r14
337 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
338 : /*trash*/ "r14","r15","memory", XMMREGS,
339 "rsi"
340 );
341 after_test( "rex.W movhps (%rsi),%xmm6", regs, mem );
342 }
343 /* movhps reg, mem 49 0f 17 03 rex.WB movhps %xmm0,(%r11) */
344 {
345 before_test( regs, mem );
346 __asm__ __volatile__(
347 "movq %0, %%r14\n"
348 "\tmovq %1, %%r15\n"
349 LOAD_XMMREGS_from_r14
350 "\tmovq %%r15, %%r11\n"
351 "\t.byte 0x49,0x0F,0x17,0x03\n" /* rex.WB movhps %xmm0,(%r11) */
352 SAVE_XMMREGS_to_r14
353 : /*out*/ : /*in*/ "r"(regs), "r"( 0 + (char*)&mem->dqw[2] )
354 : /*trash*/ "r14","r15","memory", XMMREGS,
355 "r11"
356 );
357 after_test( "rex.WB movhps %xmm0,(%r11)", regs, mem );
358 }
359
360 /* movlpd mem, reg 66 48 0f 12 4a 00 rex.W movlpd 0x0(%rdx),%xmm1 */
361 {
362 before_test( regs, mem );
363 __asm__ __volatile__(
364 "movq %0, %%r14\n"
365 "\tmovq %1, %%r15\n"
366 LOAD_XMMREGS_from_r14
367 "\tmovq %%r15, %%rdx\n"
368 "\t.byte 0x66,0x48,0x0f,0x12,0x4a,0x00\n"
369 SAVE_XMMREGS_to_r14
370 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
371 : /*trash*/ "r14","r15","memory", XMMREGS,
372 "rdx"
373 );
374 after_test( "rex.W movlpd 0x0(%rdx),%xmm1", regs, mem );
375 }
376
377 /* movlpd reg, mem 66 48 0f 13 30 rex.W movlpd %xmm6,(%rax) */
378 {
379 before_test( regs, mem );
380 __asm__ __volatile__(
381 "movq %0, %%r14\n"
382 "\tmovq %1, %%r15\n"
383 LOAD_XMMREGS_from_r14
384 "\tmovq %%r15, %%rax\n"
385 "\t.byte 0x66,0x48,0x0f,0x13,0x30\n"
386 SAVE_XMMREGS_to_r14
387 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
388 : /*trash*/ "r14","r15","memory", XMMREGS,
389 "rax"
390 );
391 after_test( "rex.W movlpd %xmm6,(%rax)", regs, mem );
392 }
393
394 /* movlps mem, reg 48 0f 12 07 rex.W movlps (%rdi),%xmm0 */
395 {
396 before_test( regs, mem );
397 __asm__ __volatile__(
398 "movq %0, %%r14\n"
399 "\tmovq %1, %%r15\n"
400 LOAD_XMMREGS_from_r14
401 "\tmovq %%r15, %%rdi\n"
402 "\t.byte 0x48,0x0f,0x12,0x07\n"
403 SAVE_XMMREGS_to_r14
404 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
405 : /*trash*/ "r14","r15","memory", XMMREGS,
406 "rdi"
407 );
408 after_test( "rex.W movlps (%rdi),%xmm0", regs, mem );
409 }
410
411 /* movlps reg, mem 49 0f 13 02 rex.WB movlps %xmm0,(%r10) */
412 {
413 before_test( regs, mem );
414 __asm__ __volatile__(
415 "movq %0, %%r14\n"
416 "\tmovq %1, %%r15\n"
417 LOAD_XMMREGS_from_r14
418 "\tmovq %%r15, %%r10\n"
419 "\t.byte 0x49,0x0f,0x13,0x02\n"
420 SAVE_XMMREGS_to_r14
421 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
422 : /*trash*/ "r14","r15","memory", XMMREGS,
423 "r10"
424 );
425 after_test( "rex.WB movlps %xmm0,(%r10)", regs, mem );
426 }
427
428 /* movq mem, reg f3 48 0f 7e 00 rex.W movq (%rax),%xmm0 */
429 {
430 before_test( regs, mem );
431 __asm__ __volatile__(
432 "movq %0, %%r14\n"
433 "\tmovq %1, %%r15\n"
434 LOAD_XMMREGS_from_r14
435 "\tmovq %%r15, %%rax\n"
436 "\t.byte 0xf3,0x48,0x0f,0x7e,0x00\n"
437 SAVE_XMMREGS_to_r14
438 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
439 : /*trash*/ "r14","r15","memory", XMMREGS,
440 "rax"
441 );
442 after_test( "rex.W movq (%rax),%xmm0", regs, mem );
443 }
444
445 /* movq reg, mem 66 48 0f d6 00 rex.W movq %xmm0,(%rax) */
446 {
447 before_test( regs, mem );
448 __asm__ __volatile__(
449 "movq %0, %%r14\n"
450 "\tmovq %1, %%r15\n"
451 LOAD_XMMREGS_from_r14
452 "\tmovq %%r15, %%rax\n"
453 "\t.byte 0x66,0x48,0x0f,0xd6,0x00\n"
454 SAVE_XMMREGS_to_r14
455 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
456 : /*trash*/ "r14","r15","memory", XMMREGS,
457 "rax"
458 );
459 after_test( "rex.W movq %xmm0,(%rax)", regs, mem );
460 }
461
462 /* movsd mem, reg f2 48 0f 10 11 rex.W movsd (%rcx),%xmm2 */
463 {
464 before_test( regs, mem );
465 __asm__ __volatile__(
466 "movq %0, %%r14\n"
467 "\tmovq %1, %%r15\n"
468 LOAD_XMMREGS_from_r14
469 "\tmovq %%r15, %%rcx\n"
470 "\t.byte 0xf2,0x48,0x0f,0x10,0x11\n"
471 SAVE_XMMREGS_to_r14
472 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
473 : /*trash*/ "r14","r15","memory", XMMREGS,
474 "rcx"
475 );
476 after_test( "rex.W movsd (%rcx),%xmm2", regs, mem );
477 }
478
479 /* movsd reg, mem f2 48 0f 11 3f rex.W movsd %xmm7,(%rdi) */
480 {
481 before_test( regs, mem );
482 __asm__ __volatile__(
483 "movq %0, %%r14\n"
484 "\tmovq %1, %%r15\n"
485 LOAD_XMMREGS_from_r14
486 "\tmovq %%r15, %%rdi\n"
487 "\t.byte 0xf2,0x48,0x0f,0x11,0x3f\n"
488 SAVE_XMMREGS_to_r14
489 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
490 : /*trash*/ "r14","r15","memory", XMMREGS,
491 "rdi"
492 );
493 after_test( "rex.W movsd %xmm7,(%rdi)", regs, mem );
494 }
495
496 /* movss mem, reg f3 48 0f 10 5e 04 rex.W movss 0x4(%rsi),%xmm3 */
497 {
498 before_test( regs, mem );
499 __asm__ __volatile__(
500 "movq %0, %%r14\n"
501 "\tmovq %1, %%r15\n"
502 LOAD_XMMREGS_from_r14
503 "\tmovq %%r15, %%rsi\n"
504 "\t.byte 0xf3,0x48,0x0f,0x10,0x5e,0x04\n"
505 SAVE_XMMREGS_to_r14
506 : /*out*/ : /*in*/ "r"(regs), "r"( -0x4 + (char*)&mem->dqw[2] )
507 : /*trash*/ "r14","r15","memory", XMMREGS,
508 "rsi"
509 );
510 after_test( "rex.W movss 0x4(%rsi),%xmm3", regs, mem );
511 }
512
513 /* movupd reg, mem 66 48 0f 11 07 rex.W movupd %xmm0,(%rdi) */
514 {
515 before_test( regs, mem );
516 __asm__ __volatile__(
517 "movq %0, %%r14\n"
518 "\tmovq %1, %%r15\n"
519 LOAD_XMMREGS_from_r14
520 "\tmovq %%r15, %%rdi\n"
521 "\t.byte 0x66,0x48,0x0f,0x11,0x07\n"
522 SAVE_XMMREGS_to_r14
523 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
524 : /*trash*/ "r14","r15","memory", XMMREGS,
525 "rdi"
526 );
527 after_test( "rex.W movupd %xmm0,(%rdi)", regs, mem );
528 }
529
530 /* mulpd mem, reg 66 48 0f 59 61 00 rex.W mulpd 0x0(%rcx),%xmm4 */
531 {
532 before_test( regs, mem );
533 __asm__ __volatile__(
534 "movq %0, %%r14\n"
535 "\tmovq %1, %%r15\n"
536 LOAD_XMMREGS_from_r14
537 "\tmovq %%r15, %%rcx\n"
538 "\t.byte 0x66,0x48,0x0f,0x59,0x61,0x00\n"
539 SAVE_XMMREGS_to_r14
540 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
541 : /*trash*/ "r14","r15","memory", XMMREGS,
542 "rcx"
543 );
544 after_test( "rex.W mulpd 0x0(%rcx),%xmm4", regs, mem );
545 }
546
547 /* mulsd mem, reg f2 48 0f 59 1f rex.W mulsd (%rdi),%xmm3 */
548 {
549 before_test( regs, mem );
550 __asm__ __volatile__(
551 "movq %0, %%r14\n"
552 "\tmovq %1, %%r15\n"
553 LOAD_XMMREGS_from_r14
554 "\tmovq %%r15, %%rdi\n"
555 "\t.byte 0xf2,0x48,0x0f,0x59,0x1f\n"
556 SAVE_XMMREGS_to_r14
557 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
558 : /*trash*/ "r14","r15","memory", XMMREGS,
559 "rdi"
560 );
561 after_test( "rex.W mulsd (%rdi),%xmm3", regs, mem );
562 }
563
564 /* prefetchnt0 49 0f 18 4c f2 a0 rex.WB prefetcht0 -0x60(%r10,%rsi,8) */
565 {
566 before_test( regs, mem );
567 __asm__ __volatile__(
568 "movq %0, %%r14\n"
569 "\tmovq %1, %%r15\n"
570 LOAD_XMMREGS_from_r14
571 "\tmovq %%r15, %%r10\n"
572 "\txorq %%rsi, %%rsi\n"
573 "\t.byte 0x49,0x0f,0x18,0x4c,0xf2,0xa0\n"
574 SAVE_XMMREGS_to_r14
575 : /*out*/ : /*in*/ "r"(regs), "r"( - -0x60 + (char*)&mem->dqw[2] )
576 : /*trash*/ "r14","r15","memory", XMMREGS,
577 "r10","rsi"
578 );
579 after_test( "rex.WB prefetcht0 -0x60(%r10,%rsi,8)", regs, mem );
580 }
581
582 /* subsd mem, reg f2 49 0f 5c 4d f8 rex.WB subsd -0x8(%r13),%xmm1 */
583 {
584 before_test( regs, mem );
585 __asm__ __volatile__(
586 "movq %0, %%r14\n"
587 "\tmovq %1, %%r15\n"
588 LOAD_XMMREGS_from_r14
589 "\tmovq %%r15, %%r13\n"
590 "\t.byte 0xf2,0x49,0x0f,0x5c,0x4d,0xf8\n"
591 SAVE_XMMREGS_to_r14
592 : /*out*/ : /*in*/ "r"(regs), "r"( - -0x8 + (char*)&mem->dqw[2] )
593 : /*trash*/ "r14","r15","memory", XMMREGS,
594 "r13"
595 );
596 after_test( "rex.WB subsd -0x8(%r13),%xmm1", regs, mem );
597 }
598
599 /* cvtps2pd mem, reg 48 0f 5a 07 rex.W cvtps2pd (%rdi),%xmm0 */
600 {
601 before_test( regs, mem );
602 __asm__ __volatile__(
603 "movq %0, %%r14\n"
604 "\tmovq %1, %%r15\n"
605 LOAD_XMMREGS_from_r14
606 "\tmovq %%r15, %%rdi\n"
607 "\t.byte 0x48,0x0f,0x5a,0x07\n"
608 SAVE_XMMREGS_to_r14
609 : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
610 : /*trash*/ "r14","r15","memory", XMMREGS,
611 "rdi"
612 );
613 after_test( "rex.W cvtps2pd (%rdi),%xmm0", regs, mem );
614 }
615
616 free(regs);
617 free(mem);
618 return 0;
619 }
620