1 #include <stdlib.h>
2 #include <stdio.h>
3
4 typedef unsigned int UInt;
5 typedef unsigned long long int ULong;
6
do_cmpxchg8b(ULong * rdxOut,ULong * raxOut,ULong * memHiOut,ULong * memLoOut,ULong * zOut,ULong rdxIn,ULong raxIn,ULong memHiIn,ULong memLoIn,ULong rcxIn,ULong rbxIn)7 void do_cmpxchg8b ( /*OUT*/
8 ULong* rdxOut, ULong* raxOut,
9 ULong* memHiOut, ULong* memLoOut,
10 ULong* zOut,
11 /*IN*/
12 ULong rdxIn, ULong raxIn,
13 ULong memHiIn, ULong memLoIn,
14 ULong rcxIn, ULong rbxIn )
15 {
16 UInt mem[2];
17 ULong block[6];
18 mem[0] = (UInt)memLoIn;
19 mem[1] = (UInt)memHiIn;
20 block[0] = rdxIn;
21 block[1] = raxIn;
22 block[2] = rcxIn;
23 block[3] = rbxIn;
24 block[4] = (ULong)&mem[0];
25 block[5] = ~(0ULL);
26 __asm__ __volatile__(
27 "movq %0,%%r11\n"
28 "\tmovq 0(%%r11),%%rdx\n"
29 "\tmovq 8(%%r11),%%rax\n"
30 "\tmovq 16(%%r11),%%rcx\n"
31 "\tmovq 24(%%r11),%%rbx\n"
32 "\tmovq 32(%%r11),%%r10\n"
33 "\tlock cmpxchg8b (%%r10)\n"
34 "\tmovabsq $0,%%r10\n"
35 "\tsetz %%r10b\n"
36 "\tmovq %%r10,40(%%r11)\n"
37 "\tmovq %%rdx,0(%%r11)\n"
38 "\tmovq %%rax,8(%%r11)\n"
39 : /*out*/
40 : /*in*/ "r"(&block[0])
41 : /*trash*/ "%r11", "%r10", "%rax", "%rbx", "%rcx", "%rdx",
42 "cc", "memory" );
43 *rdxOut = block[0];
44 *raxOut = block[1];
45 *memLoOut = (ULong)mem[0];
46 *memHiOut = (ULong)mem[1];
47 *zOut = block[5];
48 }
49
try8b(ULong d,ULong a,ULong mHi,ULong mLo,ULong c,ULong b)50 void try8b ( ULong d, ULong a, ULong mHi, ULong mLo, ULong c, ULong b )
51 {
52 ULong dd, aa, mmHi, mmLo, zz;
53 do_cmpxchg8b( &dd, &aa, &mmHi, &mmLo, &zz,
54 d,a,mHi,mLo,c,b);
55 printf(" Q d:a=%llx:%llx mem=%llx:%llx c:b=%llx:%llx "
56 "-> z=%lld d:a=%llx:%llx mem=%llx:%llx\n",
57 d,a, mHi,mLo, c,b, zz, dd,aa, mmHi,mmLo );
58 }
59
do_cmpxchg16b(ULong * rdxOut,ULong * raxOut,ULong * memHiOut,ULong * memLoOut,ULong * zOut,ULong rdxIn,ULong raxIn,ULong memHiIn,ULong memLoIn,ULong rcxIn,ULong rbxIn)60 void do_cmpxchg16b ( /*OUT*/
61 ULong* rdxOut, ULong* raxOut,
62 ULong* memHiOut, ULong* memLoOut,
63 ULong* zOut,
64 /*IN*/
65 ULong rdxIn, ULong raxIn,
66 ULong memHiIn, ULong memLoIn,
67 ULong rcxIn, ULong rbxIn )
68 {
69 ULong mem[2] __attribute__((aligned(16)));
70 ULong block[6];
71 mem[0] = memLoIn;
72 mem[1] = memHiIn;
73 block[0] = rdxIn;
74 block[1] = raxIn;
75 block[2] = rcxIn;
76 block[3] = rbxIn;
77 block[4] = (ULong)&mem[0];
78 block[5] = ~(0ULL);
79 __asm__ __volatile__(
80 "movq %0,%%r11\n"
81 "\tmovq 0(%%r11),%%rdx\n"
82 "\tmovq 8(%%r11),%%rax\n"
83 "\tmovq 16(%%r11),%%rcx\n"
84 "\tmovq 24(%%r11),%%rbx\n"
85 "\tmovq 32(%%r11),%%r10\n"
86 "\t.byte 0xf0, 0x49, 0x0f, 0xc7, 0x0a\n" /* lock cmpxchg16b (%%r10) */
87 "\tmovabsq $0,%%r10\n"
88 "\tsetz %%r10b\n"
89 "\tmovq %%r10,40(%%r11)\n"
90 "\tmovq %%rdx,0(%%r11)\n"
91 "\tmovq %%rax,8(%%r11)\n"
92 : /*out*/
93 : /*in*/ "r"(&block[0])
94 : /*trash*/ "%r11", "%r10", "%rax", "%rbx", "%rcx", "%rdx",
95 "cc", "memory" );
96 *rdxOut = block[0];
97 *raxOut = block[1];
98 *memLoOut = mem[0];
99 *memHiOut = mem[1];
100 *zOut = block[5];
101 }
102
try16b(ULong d,ULong a,ULong mHi,ULong mLo,ULong c,ULong b)103 void try16b ( ULong d, ULong a, ULong mHi, ULong mLo, ULong c, ULong b )
104 {
105 ULong dd, aa, mmHi, mmLo, zz;
106 do_cmpxchg16b( &dd, &aa, &mmHi, &mmLo, &zz,
107 d,a,mHi,mLo,c,b);
108 printf("QQ d:a=%llx:%llx mem=%llx:%llx c:b=%llx:%llx "
109 "-> z=%lld d:a=%llx:%llx mem=%llx:%llx\n",
110 d,a, mHi,mLo, c,b, zz, dd,aa, mmHi,mmLo );
111 }
112
main(void)113 int main(void)
114 {
115 ULong z = 0xDEADBEEF00000000ULL;
116
117 try8b( 0,1, 5,4, 3,2 );
118 try8b( 0,1, 0,1, 3,2 );
119
120 try8b( 0,1, 0,4, 3,2 );
121 try8b( 0,1, 0,0, 3,2 );
122
123 try8b( 0,1, 5,0, 3,2 );
124 try8b( 0,1, 1,1, 3,2 );
125
126 try8b( 0+z,1+z, 5+z,4+z, 3+z,2+z );
127 try8b( 0+z,1+z, 0+z,1+z, 3+z,2+z );
128
129 try8b( 0+z,1+z, 0+z,4+z, 3+z,2+z );
130 try8b( 0+z,1+z, 0+z,0+z, 3+z,2+z );
131
132 try8b( 0+z,1+z, 5+z,0+z, 3+z,2+z );
133 try8b( 0+z,1+z, 1+z,1+z, 3+z,2+z );
134
135 try16b( 0,1, 5,4, 3,2 );
136 try16b( 0,1, 0,1, 3,2 );
137
138 try16b( 0,1, 0,4, 3,2 );
139 try16b( 0,1, 0,0, 3,2 );
140
141 try16b( 0,1, 5,0, 3,2 );
142 try16b( 0,1, 1,1, 3,2 );
143
144 try16b( 0+z,1+z, 5+z,4+z, 3+z,2+z );
145 try16b( 0+z,1+z, 0+z,1+z, 3+z,2+z );
146
147 try16b( 0+z,1+z, 0+z,4+z, 3+z,2+z );
148 try16b( 0+z,1+z, 0+z,0+z, 3+z,2+z );
149
150 try16b( 0+z,1+z, 5+z,0+z, 3+z,2+z );
151 try16b( 0+z,1+z, 1+z,1+z, 3+z,2+z );
152
153 return 0;
154 }
155
156