• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* Test for a number of SSE instructions which were seen in the wild
3    with a bogus (irrelevant) REX.W bit in their prefixes.  Some just
4    have REX = 0x48 where REX.W is irrelevant, hence the whole REX
5    prefix is pointless.  Probably related to #133962. */
6 
7 #include <stdlib.h>
8 #include <stdio.h>
9 #include <assert.h>
10 #include "tests/malloc.h"
11 
12 typedef  unsigned char  UChar;
13 
14 typedef
15    struct { __attribute__((aligned(16))) UChar b[16]; }
16    UWord128;
17 
18 typedef
19    struct { UWord128 reg[16]; }
20    XMMRegs;
21 
22 typedef
23    struct { UWord128 dqw[5]; }
24    Mem;
25 
pp_UWord128(UWord128 * w)26 void pp_UWord128 ( UWord128* w ) {
27    int i;
28    char buf[3];
29    for (i = 15; i >= 0; i--) {
30       buf[2] = 0;
31       sprintf(buf, "%02x", (unsigned int)w->b[i]);
32       assert(buf[2] == 0);
33       if (buf[0] == '0') buf[0] = '.';
34       if (buf[1] == '0') buf[1] = '.';
35       printf("%s", buf);
36    }
37 }
38 
pp_XMMRegs(char * who,XMMRegs * regs)39 void pp_XMMRegs ( char* who, XMMRegs* regs ) {
40    int i;
41    printf ("%s (xmms in order [15..0]) {\n", who );
42    for (i = 0; i < 16; i++) {
43       printf("  %%xmm%2d ", i);
44       pp_UWord128( &regs->reg[i] );
45       printf("\n");
46    }
47    printf("}\n");
48 }
49 
pp_Mem(char * who,Mem * mem)50 void pp_Mem ( char* who, Mem* mem ) {
51    int i;
52    printf ("%s (dqws in order [15 .. 0]) {\n", who );
53    for (i = 0; i < 5; i++) {
54       printf("  [%d]    ", i);
55       pp_UWord128( &mem->dqw[i] );
56       printf("\n");
57    }
58    printf("}\n");
59 }
60 
xor_UWord128(UWord128 * src,UWord128 * dst)61 void xor_UWord128( UWord128* src, UWord128* dst ) {
62    int i;
63    for (i = 0; i < 16; i++)
64       dst->b[i] ^= src->b[i];
65 }
xor_XMMRegs(XMMRegs * src,XMMRegs * dst)66 void xor_XMMRegs ( XMMRegs* src, XMMRegs* dst ) {
67    int i;
68    for (i = 0; i < 16; i++)
69       xor_UWord128( &src->reg[i], &dst->reg[i] );
70 }
71 
xor_Mem(Mem * src,Mem * dst)72 void xor_Mem ( Mem* src, Mem* dst ) {
73    int i;
74    for (i = 0; i < 5; i++)
75       xor_UWord128( &src->dqw[i], &dst->dqw[i] );
76 }
77 
setup_regs_mem(XMMRegs * regs,Mem * mem)78 void setup_regs_mem ( XMMRegs* regs, Mem* mem ) {
79    int ctr, i, j;
80    ctr = 0;
81    for (i = 0; i < 16; i++) {
82       for (j = 0; j < 16; j++)
83         regs->reg[i].b[j] = 0x51 + (ctr++ % 7);
84    }
85    for (i = 0; i < 5; i++) {
86       for (j = 0; j < 16; j++)
87         mem->dqw[i].b[j] = 0x52 + (ctr++ % 13);
88    }
89 }
90 
before_test(XMMRegs * regs,Mem * mem)91 void before_test ( XMMRegs* regs, Mem* mem ) {
92    setup_regs_mem( regs, mem );
93 }
94 
after_test(char * who,XMMRegs * regs,Mem * mem)95 void after_test ( char* who, XMMRegs* regs, Mem* mem ) {
96    XMMRegs rdiff;
97    Mem     mdiff;
98    char s[128];
99    setup_regs_mem( &rdiff, &mdiff );
100    xor_XMMRegs( regs, &rdiff );
101    xor_Mem( mem, &mdiff );
102    sprintf(s, "after \"%s\"", who );
103    pp_Mem( s, &mdiff );
104    pp_XMMRegs( s, &rdiff );
105    printf("\n");
106 }
107 
108 #define LOAD_XMMREGS_from_r14       \
109    "\tmovupd   0(%%r14),  %%xmm0\n" \
110    "\tmovupd  16(%%r14),  %%xmm1\n" \
111    "\tmovupd  32(%%r14),  %%xmm2\n" \
112    "\tmovupd  48(%%r14),  %%xmm3\n" \
113    "\tmovupd  64(%%r14),  %%xmm4\n" \
114    "\tmovupd  80(%%r14),  %%xmm5\n" \
115    "\tmovupd  96(%%r14),  %%xmm6\n" \
116    "\tmovupd 112(%%r14),  %%xmm7\n" \
117    "\tmovupd 128(%%r14),  %%xmm8\n" \
118    "\tmovupd 144(%%r14),  %%xmm9\n" \
119    "\tmovupd 160(%%r14), %%xmm10\n" \
120    "\tmovupd 176(%%r14), %%xmm11\n" \
121    "\tmovupd 192(%%r14), %%xmm12\n" \
122    "\tmovupd 208(%%r14), %%xmm13\n" \
123    "\tmovupd 224(%%r14), %%xmm14\n" \
124    "\tmovupd 240(%%r14), %%xmm15\n"
125 
126 #define SAVE_XMMREGS_to_r14         \
127    "\tmovupd %%xmm0,    0(%%r14)\n" \
128    "\tmovupd %%xmm1,   16(%%r14)\n" \
129    "\tmovupd %%xmm2,   32(%%r14)\n" \
130    "\tmovupd %%xmm3,   48(%%r14)\n" \
131    "\tmovupd %%xmm4,   64(%%r14)\n" \
132    "\tmovupd %%xmm5,   80(%%r14)\n" \
133    "\tmovupd %%xmm6,   96(%%r14)\n" \
134    "\tmovupd %%xmm7,  112(%%r14)\n" \
135    "\tmovupd %%xmm8,  128(%%r14)\n" \
136    "\tmovupd %%xmm9,  144(%%r14)\n" \
137    "\tmovupd %%xmm10, 160(%%r14)\n" \
138    "\tmovupd %%xmm11, 176(%%r14)\n" \
139    "\tmovupd %%xmm12, 192(%%r14)\n" \
140    "\tmovupd %%xmm13, 208(%%r14)\n" \
141    "\tmovupd %%xmm14, 224(%%r14)\n" \
142    "\tmovupd %%xmm15, 240(%%r14)"
143 
144 #define XMMREGS \
145    "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", \
146    "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
147 
148 #if 0
149    /* Boilerplate for test */
150    {
151      before_test( regs, mem );
152      __asm__ __volatile__(
153          "movq %0, %%r14\n"
154        "\tmovq %1, %%r15\n"
155        LOAD_XMMREGS_from_r14
156        "\tmovq %%r15, %%rx\n"
157        "\t.byte 0x\n"
158        SAVE_XMMREGS_to_r14
159           : /*out*/ : /*in*/ "r"(regs), "r"( -x + (char*)&mem->dqw[2] )
160                     : /*trash*/ "r14","r15","memory", XMMREGS,
161                                 "x"
162      );
163      after_test( "", regs, mem );
164    }
165 #endif
166 
main(void)167 int main ( void )
168 {
169    XMMRegs* regs;
170    Mem*     mem;
171    regs = memalign16(sizeof(XMMRegs) + 16);
172    mem  = memalign16(sizeof(Mem) + 16);
173 
174    /* addpd mem, reg   66 49 0f 58 48 00  rex.WB addpd  0x0(%r8),%xmm1 */
175    {
176      before_test( regs, mem );
177      __asm__ __volatile__(
178          "movq %0, %%r14\n"
179        "\tmovq %1, %%r15\n"
180        LOAD_XMMREGS_from_r14
181        "\tmovq %%r15, %%r8\n"
182        "\t.byte 0x66,0x49,0x0f,0x58,0x48,0x00\n"
183        SAVE_XMMREGS_to_r14
184           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
185                     : /*trash*/ "r14","r15","memory", XMMREGS,
186                                 "r8"
187      );
188      after_test( "rex.WB addpd  0x0(%r8),%xmm1", regs, mem );
189    }
190 
191    /* addsd mem, reg   f2 48 0f 58 27     rex.W addsd  (%rdi),%xmm4 */
192    {
193      before_test( regs, mem );
194      __asm__ __volatile__(
195          "movq %0, %%r14\n"
196        "\tmovq %1, %%r15\n"
197        LOAD_XMMREGS_from_r14
198        "\tmovq %%r15, %%rdi\n"
199        "\t.byte 0xf2,0x48,0x0f,0x58,0x27\n"
200        SAVE_XMMREGS_to_r14
201           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
202                     : /*trash*/ "r14","r15","memory", XMMREGS,
203                                 "rdi"
204      );
205      after_test( "rex.W addsd  (%rdi),%xmm4", regs, mem );
206    }
207 
208    /* movapd mem, reg  66 48 0f 28 0a     rex.W movapd (%rdx),%xmm1 */
209    {
210      before_test( regs, mem );
211      __asm__ __volatile__(
212          "movq %0, %%r14\n"
213        "\tmovq %1, %%r15\n"
214        LOAD_XMMREGS_from_r14
215        "\tmovq %%r15, %%rdx\n"
216        "\t.byte 0x66,0x48,0x0f,0x28,0x0a\n"
217        SAVE_XMMREGS_to_r14
218           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
219                     : /*trash*/ "r14","r15","memory", XMMREGS,
220                                 "rdx"
221      );
222      after_test( "rex.W movapd (%rdx),%xmm1", regs, mem );
223    }
224 
225    /* movapd reg, mem  66 48 0f 29 0a     rex.W movapd %xmm1,(%rdx) */
226    {
227      before_test( regs, mem );
228      __asm__ __volatile__(
229          "movq %0, %%r14\n"
230        "\tmovq %1, %%r15\n"
231        LOAD_XMMREGS_from_r14
232        "\tmovq %%r15, %%rdx\n"
233        "\t.byte 0x66,0x48,0x0f,0x29,0x0a\n"
234        SAVE_XMMREGS_to_r14
235           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
236                     : /*trash*/ "r14","r15","memory", XMMREGS,
237                                 "rdx"
238      );
239      after_test( "rex.W movapd %xmm1,(%rdx)", regs, mem );
240    }
241 
242    /* movaps mem, reg  48 0f 28 42 30     rex.W movaps 0x30(%rdx),%xmm0 */
243    {
244      before_test( regs, mem );
245      __asm__ __volatile__(
246          "movq %0, %%r14\n"
247        "\tmovq %1, %%r15\n"
248        LOAD_XMMREGS_from_r14
249        "\tmovq %%r15, %%rdx\n"
250        "\t.byte 0x48,0x0f,0x28,0x42,0x30\n"
251        SAVE_XMMREGS_to_r14
252           : /*out*/ : /*in*/ "r"(regs), "r"( -0x30 + (char*)&mem->dqw[2] )
253                     : /*trash*/ "r14","r15","memory", XMMREGS,
254                                 "rdx"
255      );
256      after_test( "movaps 0x30(%rdx),%xmm0", regs, mem );
257    }
258 
259    /* movaps reg, mem  49 0f 29 48 00     rex.WB movaps %xmm1,0x0(%r8) */
260    {
261      before_test( regs, mem );
262      __asm__ __volatile__(
263          "movq %0, %%r14\n"
264        "\tmovq %1, %%r15\n"
265        LOAD_XMMREGS_from_r14
266        "\tmovq %%r15, %%r8\n"
267        "\t.byte 0x49,0x0f,0x29,0x48,0x00\n"
268        SAVE_XMMREGS_to_r14
269           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
270                     : /*trash*/ "r14","r15","memory", XMMREGS,
271                                 "r8"
272      );
273      after_test( "rex.WB movaps %xmm1,0x0(%r8)", regs, mem );
274    }
275 
276    /* movddup mem, reg f2 48 0f 12 2a     rex.W movddup (%rdx),%xmm5 */
277    {
278      before_test( regs, mem );
279      __asm__ __volatile__(
280          "movq %0, %%r14\n"
281        "\tmovq %1, %%r15\n"
282        LOAD_XMMREGS_from_r14
283        "\tmovq %%r15, %%rdx\n"
284        "\t.byte 0xf2,0x48,0x0f,0x12,0x2a\n"
285        SAVE_XMMREGS_to_r14
286           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
287                     : /*trash*/ "r14","r15","memory", XMMREGS,
288                                 "rdx"
289      );
290      after_test( "movddup (%rdx),%xmm5", regs, mem );
291    }
292 
293    /* movhpd mem, reg  66 48 0f 16 06     rex.W movhpd (%rsi),%xmm0 */
294    {
295      before_test( regs, mem );
296      __asm__ __volatile__(
297          "movq %0, %%r14\n"
298        "\tmovq %1, %%r15\n"
299        LOAD_XMMREGS_from_r14
300        "\tmovq %%r15, %%rsi\n"
301        "\t.byte 0x66,0x48,0x0f,0x16,0x06\n"
302        SAVE_XMMREGS_to_r14
303           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
304                     : /*trash*/ "r14","r15","memory", XMMREGS,
305                                 "rsi"
306      );
307      after_test( "rex.W movhpd (%rsi),%xmm0", regs, mem );
308    }
309 
310    /* movhpd reg, mem  66 48 0f 17 07     rex.W movhpd %xmm0,(%rdi) */
311    {
312      before_test( regs, mem );
313      __asm__ __volatile__(
314          "movq %0, %%r14\n"
315        "\tmovq %1, %%r15\n"
316        LOAD_XMMREGS_from_r14
317        "\tmovq %%r15, %%rdi\n"
318        "\t.byte 0x66,0x48,0x0f,0x17,0x07\n"
319        SAVE_XMMREGS_to_r14
320           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
321                     : /*trash*/ "r14","r15","memory", XMMREGS,
322                                 "rdi"
323      );
324      after_test( "rex.W movhpd %xmm0,(%rdi)", regs, mem );
325    }
326 
327    /* movhps mem, reg  48 0f 16 36        rex.W movhps (%rsi),%xmm6 */
328    {
329      before_test( regs, mem );
330      __asm__ __volatile__(
331          "movq %0, %%r14\n"
332        "\tmovq %1, %%r15\n"
333        LOAD_XMMREGS_from_r14
334        "\tmovq %%r15, %%rsi\n"
335        "\t.byte 0x48,0x0f,0x16,0x36\n"
336        SAVE_XMMREGS_to_r14
337           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
338                     : /*trash*/ "r14","r15","memory", XMMREGS,
339                                 "rsi"
340      );
341      after_test( "rex.W movhps (%rsi),%xmm6", regs, mem );
342    }
343    /* movhps reg, mem  49 0f 17 03        rex.WB movhps %xmm0,(%r11) */
344    {
345      before_test( regs, mem );
346      __asm__ __volatile__(
347          "movq %0, %%r14\n"
348        "\tmovq %1, %%r15\n"
349        LOAD_XMMREGS_from_r14
350        "\tmovq %%r15, %%r11\n"
351        "\t.byte 0x49,0x0F,0x17,0x03\n" /* rex.WB movhps %xmm0,(%r11) */
352        SAVE_XMMREGS_to_r14
353          : /*out*/ : /*in*/ "r"(regs), "r"( 0 + (char*)&mem->dqw[2] )
354                     : /*trash*/ "r14","r15","memory", XMMREGS,
355                                 "r11"
356      );
357      after_test( "rex.WB movhps %xmm0,(%r11)", regs, mem );
358    }
359 
360    /* movlpd mem, reg  66 48 0f 12 4a 00  rex.W movlpd 0x0(%rdx),%xmm1 */
361    {
362      before_test( regs, mem );
363      __asm__ __volatile__(
364          "movq %0, %%r14\n"
365        "\tmovq %1, %%r15\n"
366        LOAD_XMMREGS_from_r14
367        "\tmovq %%r15, %%rdx\n"
368        "\t.byte 0x66,0x48,0x0f,0x12,0x4a,0x00\n"
369        SAVE_XMMREGS_to_r14
370           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
371                     : /*trash*/ "r14","r15","memory", XMMREGS,
372                                 "rdx"
373      );
374      after_test( "rex.W movlpd 0x0(%rdx),%xmm1", regs, mem );
375    }
376 
377    /* movlpd reg, mem  66 48 0f 13 30     rex.W movlpd %xmm6,(%rax) */
378    {
379      before_test( regs, mem );
380      __asm__ __volatile__(
381          "movq %0, %%r14\n"
382        "\tmovq %1, %%r15\n"
383        LOAD_XMMREGS_from_r14
384        "\tmovq %%r15, %%rax\n"
385        "\t.byte 0x66,0x48,0x0f,0x13,0x30\n"
386        SAVE_XMMREGS_to_r14
387           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
388                     : /*trash*/ "r14","r15","memory", XMMREGS,
389                                 "rax"
390      );
391      after_test( "rex.W movlpd %xmm6,(%rax)", regs, mem );
392    }
393 
394    /* movlps mem, reg  48 0f 12 07        rex.W movlps (%rdi),%xmm0 */
395    {
396      before_test( regs, mem );
397      __asm__ __volatile__(
398          "movq %0, %%r14\n"
399        "\tmovq %1, %%r15\n"
400        LOAD_XMMREGS_from_r14
401        "\tmovq %%r15, %%rdi\n"
402        "\t.byte 0x48,0x0f,0x12,0x07\n"
403        SAVE_XMMREGS_to_r14
404           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
405                     : /*trash*/ "r14","r15","memory", XMMREGS,
406                                 "rdi"
407      );
408      after_test( "rex.W movlps (%rdi),%xmm0", regs, mem );
409    }
410 
411    /* movlps reg, mem  49 0f 13 02        rex.WB movlps %xmm0,(%r10) */
412    {
413      before_test( regs, mem );
414      __asm__ __volatile__(
415          "movq %0, %%r14\n"
416        "\tmovq %1, %%r15\n"
417        LOAD_XMMREGS_from_r14
418        "\tmovq %%r15, %%r10\n"
419        "\t.byte 0x49,0x0f,0x13,0x02\n"
420        SAVE_XMMREGS_to_r14
421           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
422                     : /*trash*/ "r14","r15","memory", XMMREGS,
423                                 "r10"
424      );
425      after_test( "rex.WB movlps %xmm0,(%r10)", regs, mem );
426    }
427 
428    /* movq mem, reg    f3 48 0f 7e 00     rex.W movq   (%rax),%xmm0 */
429    {
430      before_test( regs, mem );
431      __asm__ __volatile__(
432          "movq %0, %%r14\n"
433        "\tmovq %1, %%r15\n"
434        LOAD_XMMREGS_from_r14
435        "\tmovq %%r15, %%rax\n"
436        "\t.byte 0xf3,0x48,0x0f,0x7e,0x00\n"
437        SAVE_XMMREGS_to_r14
438           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
439                     : /*trash*/ "r14","r15","memory", XMMREGS,
440                                 "rax"
441      );
442      after_test( "rex.W movq (%rax),%xmm0", regs, mem );
443    }
444 
445    /* movq reg, mem    66 48 0f d6 00     rex.W movq   %xmm0,(%rax) */
446    {
447      before_test( regs, mem );
448      __asm__ __volatile__(
449          "movq %0, %%r14\n"
450        "\tmovq %1, %%r15\n"
451        LOAD_XMMREGS_from_r14
452        "\tmovq %%r15, %%rax\n"
453        "\t.byte 0x66,0x48,0x0f,0xd6,0x00\n"
454        SAVE_XMMREGS_to_r14
455           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
456                     : /*trash*/ "r14","r15","memory", XMMREGS,
457                                 "rax"
458      );
459      after_test( "rex.W movq %xmm0,(%rax)", regs, mem );
460    }
461 
462    /* movsd mem, reg   f2 48 0f 10 11     rex.W movsd  (%rcx),%xmm2 */
463    {
464      before_test( regs, mem );
465      __asm__ __volatile__(
466          "movq %0, %%r14\n"
467        "\tmovq %1, %%r15\n"
468        LOAD_XMMREGS_from_r14
469        "\tmovq %%r15, %%rcx\n"
470        "\t.byte 0xf2,0x48,0x0f,0x10,0x11\n"
471        SAVE_XMMREGS_to_r14
472           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
473                     : /*trash*/ "r14","r15","memory", XMMREGS,
474                                 "rcx"
475      );
476      after_test( "rex.W movsd (%rcx),%xmm2", regs, mem );
477    }
478 
479    /* movsd reg, mem   f2 48 0f 11 3f     rex.W movsd  %xmm7,(%rdi) */
480    {
481      before_test( regs, mem );
482      __asm__ __volatile__(
483          "movq %0, %%r14\n"
484        "\tmovq %1, %%r15\n"
485        LOAD_XMMREGS_from_r14
486        "\tmovq %%r15, %%rdi\n"
487        "\t.byte 0xf2,0x48,0x0f,0x11,0x3f\n"
488        SAVE_XMMREGS_to_r14
489           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
490                     : /*trash*/ "r14","r15","memory", XMMREGS,
491                                 "rdi"
492      );
493      after_test( "rex.W movsd %xmm7,(%rdi)", regs, mem );
494    }
495 
496    /* movss mem, reg   f3 48 0f 10 5e 04  rex.W movss  0x4(%rsi),%xmm3 */
497    {
498      before_test( regs, mem );
499      __asm__ __volatile__(
500          "movq %0, %%r14\n"
501        "\tmovq %1, %%r15\n"
502        LOAD_XMMREGS_from_r14
503        "\tmovq %%r15, %%rsi\n"
504        "\t.byte 0xf3,0x48,0x0f,0x10,0x5e,0x04\n"
505        SAVE_XMMREGS_to_r14
506           : /*out*/ : /*in*/ "r"(regs), "r"( -0x4 + (char*)&mem->dqw[2] )
507                     : /*trash*/ "r14","r15","memory", XMMREGS,
508                                 "rsi"
509      );
510      after_test( "rex.W movss 0x4(%rsi),%xmm3", regs, mem );
511    }
512 
513    /* movupd reg, mem  66 48 0f 11 07     rex.W movupd %xmm0,(%rdi) */
514    {
515      before_test( regs, mem );
516      __asm__ __volatile__(
517          "movq %0, %%r14\n"
518        "\tmovq %1, %%r15\n"
519        LOAD_XMMREGS_from_r14
520        "\tmovq %%r15, %%rdi\n"
521        "\t.byte 0x66,0x48,0x0f,0x11,0x07\n"
522        SAVE_XMMREGS_to_r14
523           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
524                     : /*trash*/ "r14","r15","memory", XMMREGS,
525                                 "rdi"
526      );
527      after_test( "rex.W movupd %xmm0,(%rdi)", regs, mem );
528    }
529 
530    /* mulpd mem, reg   66 48 0f 59 61 00  rex.W mulpd  0x0(%rcx),%xmm4 */
531    {
532      before_test( regs, mem );
533      __asm__ __volatile__(
534          "movq %0, %%r14\n"
535        "\tmovq %1, %%r15\n"
536        LOAD_XMMREGS_from_r14
537        "\tmovq %%r15, %%rcx\n"
538        "\t.byte 0x66,0x48,0x0f,0x59,0x61,0x00\n"
539        SAVE_XMMREGS_to_r14
540           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
541                     : /*trash*/ "r14","r15","memory", XMMREGS,
542                                 "rcx"
543      );
544      after_test( "rex.W mulpd 0x0(%rcx),%xmm4", regs, mem );
545    }
546 
547    /* mulsd mem, reg   f2 48 0f 59 1f     rex.W mulsd  (%rdi),%xmm3 */
548    {
549      before_test( regs, mem );
550      __asm__ __volatile__(
551          "movq %0, %%r14\n"
552        "\tmovq %1, %%r15\n"
553        LOAD_XMMREGS_from_r14
554        "\tmovq %%r15, %%rdi\n"
555        "\t.byte 0xf2,0x48,0x0f,0x59,0x1f\n"
556        SAVE_XMMREGS_to_r14
557           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
558                     : /*trash*/ "r14","r15","memory", XMMREGS,
559                                 "rdi"
560      );
561      after_test( "rex.W mulsd (%rdi),%xmm3", regs, mem );
562    }
563 
564    /* prefetchnt0    49 0f 18 4c f2 a0  rex.WB prefetcht0 -0x60(%r10,%rsi,8) */
565    {
566      before_test( regs, mem );
567      __asm__ __volatile__(
568          "movq %0, %%r14\n"
569        "\tmovq %1, %%r15\n"
570        LOAD_XMMREGS_from_r14
571        "\tmovq %%r15, %%r10\n"
572        "\txorq %%rsi, %%rsi\n"
573        "\t.byte 0x49,0x0f,0x18,0x4c,0xf2,0xa0\n"
574        SAVE_XMMREGS_to_r14
575           : /*out*/ : /*in*/ "r"(regs), "r"( - -0x60 + (char*)&mem->dqw[2] )
576                     : /*trash*/ "r14","r15","memory", XMMREGS,
577                                 "r10","rsi"
578      );
579      after_test( "rex.WB prefetcht0 -0x60(%r10,%rsi,8)", regs, mem );
580    }
581 
582    /* subsd mem, reg   f2 49 0f 5c 4d f8  rex.WB subsd  -0x8(%r13),%xmm1 */
583    {
584      before_test( regs, mem );
585      __asm__ __volatile__(
586          "movq %0, %%r14\n"
587        "\tmovq %1, %%r15\n"
588        LOAD_XMMREGS_from_r14
589        "\tmovq %%r15, %%r13\n"
590        "\t.byte 0xf2,0x49,0x0f,0x5c,0x4d,0xf8\n"
591        SAVE_XMMREGS_to_r14
592           : /*out*/ : /*in*/ "r"(regs), "r"( - -0x8 + (char*)&mem->dqw[2] )
593                     : /*trash*/ "r14","r15","memory", XMMREGS,
594                                 "r13"
595      );
596      after_test( "rex.WB subsd  -0x8(%r13),%xmm1", regs, mem );
597    }
598 
599    /* cvtps2pd mem, reg   48 0f 5a 07     rex.W cvtps2pd  (%rdi),%xmm0 */
600    {
601      before_test( regs, mem );
602      __asm__ __volatile__(
603          "movq %0, %%r14\n"
604        "\tmovq %1, %%r15\n"
605        LOAD_XMMREGS_from_r14
606        "\tmovq %%r15, %%rdi\n"
607        "\t.byte 0x48,0x0f,0x5a,0x07\n"
608        SAVE_XMMREGS_to_r14
609           : /*out*/ : /*in*/ "r"(regs), "r"( -0 + (char*)&mem->dqw[2] )
610                     : /*trash*/ "r14","r15","memory", XMMREGS,
611                                 "rdi"
612      );
613      after_test( "rex.W cvtps2pd  (%rdi),%xmm0", regs, mem );
614    }
615 
616    free(regs);
617    free(mem);
618    return 0;
619 }
620