• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* This is an example of a program which does atomic memory operations
3    between two processes which share a page.  Valgrind 3.4.1 and
4    earlier produce incorrect answers because it does not preserve
5    atomicity of the relevant instructions in the generated code; but
6    the post-DCAS-merge versions of Valgrind do behave correctly. */
7 
8 /* On ARM, this can be compiled into either ARM or Thumb code, so as
9    to test both A and T encodings of LDREX/STREX et al.  Also on ARM,
10    it tests doubleword atomics (LDREXD, STREXD) which I don't think it
11    does on any other platform. */
12 
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <assert.h>
17 #include <unistd.h>
18 #include <sys/wait.h>
19 #include "tests/sys_mman.h"
20 
21 #define NNN 3456987
22 
23 #define IS_8_ALIGNED(_ptr)   (0 == (((unsigned long)(_ptr)) & 7))
24 
25 
atomic_add_8bit(char * p,int n)26 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
27 {
28 #if defined(VGA_x86)
29    unsigned long block[2];
30    block[0] = (unsigned long)p;
31    block[1] = n;
32    __asm__ __volatile__(
33       "movl 0(%%esi),%%eax"      "\n\t"
34       "movl 4(%%esi),%%ebx"      "\n\t"
35       "lock; addb %%bl,(%%eax)"  "\n"
36       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
37    );
38 #elif defined(VGA_amd64)
39    unsigned long block[2];
40    block[0] = (unsigned long)p;
41    block[1] = n;
42    __asm__ __volatile__(
43       "movq 0(%%rsi),%%rax"      "\n\t"
44       "movq 8(%%rsi),%%rbx"      "\n\t"
45       "lock; addb %%bl,(%%rax)"  "\n"
46       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
47    );
48 #elif defined(VGA_ppc32)
49    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
50       is 4-aligned -- guaranteed by caller. */
51    unsigned long success;
52    do {
53       __asm__ __volatile__(
54          "lwarx  15,0,%1"    "\n\t"
55          "add    15,15,%2"   "\n\t"
56          "stwcx. 15,0,%1"    "\n\t"
57          "mfcr   %0"         "\n\t"
58          "srwi   %0,%0,29"   "\n\t"
59          "andi.  %0,%0,1"    "\n"
60          : /*out*/"=b"(success)
61          : /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
62          : /*trash*/ "memory", "cc", "r15"
63       );
64    } while (success != 1);
65 #elif defined(VGA_ppc64)
66    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
67       is 8-aligned -- guaranteed by caller. */
68    unsigned long success;
69    do {
70       __asm__ __volatile__(
71          "ldarx  15,0,%1"    "\n\t"
72          "add    15,15,%2"   "\n\t"
73          "stdcx. 15,0,%1"    "\n\t"
74          "mfcr   %0"         "\n\t"
75          "srwi   %0,%0,29"   "\n\t"
76          "andi.  %0,%0,1"    "\n"
77          : /*out*/"=b"(success)
78          : /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
79          : /*trash*/ "memory", "cc", "r15"
80       );
81    } while (success != 1);
82 #elif defined(VGA_arm)
83    unsigned int block[3]
84       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
85    do {
86       __asm__ __volatile__(
87          "mov    r5, %0"         "\n\t"
88          "ldr    r9, [r5, #0]"   "\n\t" // p
89          "ldr    r10, [r5, #4]"  "\n\t" // n
90          "ldrexb r8, [r9]"       "\n\t"
91          "add    r8, r8, r10"    "\n\t"
92          "strexb r4, r8, [r9]"   "\n\t"
93          "str    r4, [r5, #8]"   "\n\t"
94          : /*out*/
95          : /*in*/ "r"(&block[0])
96          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
97       );
98    } while (block[2] != 0);
99 #elif defined(VGA_arm64)
100    unsigned long long int block[3]
101       = { (unsigned long long int)p, (unsigned long long int)n,
102           0xFFFFFFFFFFFFFFFFULL};
103    do {
104       __asm__ __volatile__(
105          "mov   x5, %0"         "\n\t"
106          "ldr   x9, [x5, #0]"   "\n\t" // p
107          "ldr   x10, [x5, #8]"  "\n\t" // n
108          "ldxrb w8, [x9]"       "\n\t"
109          "add   x8, x8, x10"    "\n\t"
110          "stxrb w4, w8, [x9]"    "\n\t"
111          "str   x4, [x5, #16]"   "\n\t"
112          : /*out*/
113          : /*in*/ "r"(&block[0])
114          : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
115       );
116    } while (block[2] != 0);
117 #elif defined(VGA_s390x)
118    int dummy;
119    __asm__ __volatile__(
120       "   l	0,%0\n\t"
121       "0: st	0,%1\n\t"
122       "   icm	1,1,%1\n\t"
123       "   ar	1,%2\n\t"
124       "   stcm  1,1,%1\n\t"
125       "   l     1,%1\n\t"
126       "   cs	0,1,%0\n\t"
127       "   jl    0b\n\t"
128       : "+m" (*p), "+m" (dummy)
129       : "d" (n)
130       : "cc", "memory", "0", "1");
131 #elif defined(VGA_mips32)
132    /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
133       exception that can cause this function to fail. */
134 #if defined (_MIPSEL)
135    unsigned int block[3]
136       = { (unsigned int)p, (unsigned int)n, 0x0 };
137    do {
138       __asm__ __volatile__(
139          "move $t0, %0"           "\n\t"
140          "lw   $t1, 0($t0)"       "\n\t"  // p
141          "lw   $t2, 4($t0)"       "\n\t"  // n
142          "andi $t2, $t2, 0xFF"    "\n\t"  // n = n and 0xFF
143          "li   $t4, 0xFF"         "\n\t"
144          "nor  $t4, $t4, $zero"   "\n\t"  // $t4 = 0xFFFFFF00
145          "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
146          "and  $t4, $t4, $t3"     "\n\t"  // $t4 = $t3 and 0xFFFFFF00
147          "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
148          "andi $t3, $t3, 0xFF"    "\n\t"  // $t3 = $t3 and 0xFF
149          "or   $t3, $t3, $t4"     "\n\t"  // $t3 = $t3 or $t4
150          "sc   $t3, 0($t1)"       "\n\t"
151          "sw   $t3, 8($t0)"       "\n\t"  // save result
152          : /*out*/
153          : /*in*/ "r"(&block[0])
154          : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
155       );
156    } while (block[2] != 1);
157 #elif defined (_MIPSEB)
158    unsigned int block[3]
159       = { (unsigned int)p, (unsigned int)n << 24, 0x0 };
160    do {
161       __asm__ __volatile__(
162          "move $t0, %0"          "\n\t"
163          "lw   $t1, 0($t0)"      "\n\t"  // p
164          "lw   $t2, 4($t0)"      "\n\t"  // n
165          "ll   $t3, 0($t1)"      "\n\t"
166          "addu $t3, $t3, $t2"    "\n\t"
167          "sc   $t3, 0($t1)"      "\n\t"
168          "sw   $t3, 8($t0)"      "\n\t"
169          : /*out*/
170          : /*in*/ "r"(&block[0])
171          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
172       );
173    } while (block[2] != 1);
174 #endif
175 #elif defined(VGA_mips64)
176    /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
177       exception that can cause this function to fail. */
178 #if defined (_MIPSEL)
179    unsigned long block[3]
180       = { (unsigned long)p, (unsigned long)n, 0x0ULL };
181    do {
182       __asm__ __volatile__(
183          "move $t0, %0"           "\n\t"
184          "ld   $t1, 0($t0)"       "\n\t"  // p
185          "ld   $t2, 8($t0)"       "\n\t"  // n
186          "andi $t2, $t2, 0xFF"    "\n\t"  // n = n and 0xFF
187          "li   $s0, 0xFF"         "\n\t"
188          "nor  $s0, $s0, $zero"   "\n\t"  // $s0 = 0xFFFFFF00
189          "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
190          "and  $s0, $s0, $t3"     "\n\t"  // $s0 = $t3 and 0xFFFFFF00
191          "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
192          "andi $t3, $t3, 0xFF"    "\n\t"  // $t3 = $t3 and 0xFF
193          "or   $t3, $t3, $s0"     "\n\t"  // $t3 = $t3 or $s0
194          "sc   $t3, 0($t1)"       "\n\t"
195          "sw   $t3, 16($t0)"      "\n\t"  // save result
196          : /*out*/
197          : /*in*/ "r"(&block[0])
198          : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
199       );
200    } while (block[2] != 1);
201 #elif defined (_MIPSEB)
202    unsigned long block[3]
203       = { (unsigned long)p, (unsigned long)n << 56, 0x0 };
204    do {
205       __asm__ __volatile__(
206          "move  $t0, %0"          "\n\t"
207          "ld    $t1, 0($t0)"      "\n\t"  // p
208          "ld    $t2, 8($t0)"      "\n\t"  // n
209          "lld   $t3, 0($t1)"      "\n\t"
210          "daddu $t3, $t3, $t2"    "\n\t"
211          "scd   $t3, 0($t1)"      "\n\t"
212          "sd    $t3, 16($t0)"     "\n\t"
213          : /*out*/
214          : /*in*/ "r"(&block[0])
215          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
216       );
217    } while (block[2] != 1);
218 #endif
219 #else
220 # error "Unsupported arch"
221 #endif
222 }
223 
224 
atomic_add_16bit(short * p,int n)225 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
226 {
227 #if defined(VGA_x86)
228    unsigned long block[2];
229    block[0] = (unsigned long)p;
230    block[1] = n;
231    __asm__ __volatile__(
232       "movl 0(%%esi),%%eax"      "\n\t"
233       "movl 4(%%esi),%%ebx"      "\n\t"
234       "lock; addw %%bx,(%%eax)"  "\n"
235       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
236    );
237 #elif defined(VGA_amd64)
238    unsigned long block[2];
239    block[0] = (unsigned long)p;
240    block[1] = n;
241    __asm__ __volatile__(
242       "movq 0(%%rsi),%%rax"      "\n\t"
243       "movq 8(%%rsi),%%rbx"      "\n\t"
244       "lock; addw %%bx,(%%rax)"  "\n"
245       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
246    );
247 #elif defined(VGA_ppc32)
248    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
249       is 8-aligned -- guaranteed by caller. */
250    unsigned long success;
251    do {
252       __asm__ __volatile__(
253          "lwarx  15,0,%1"    "\n\t"
254          "add    15,15,%2"   "\n\t"
255          "stwcx. 15,0,%1"    "\n\t"
256          "mfcr   %0"         "\n\t"
257          "srwi   %0,%0,29"   "\n\t"
258          "andi.  %0,%0,1"    "\n"
259          : /*out*/"=b"(success)
260          : /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
261          : /*trash*/ "memory", "cc", "r15"
262       );
263    } while (success != 1);
264 #elif defined(VGA_ppc64)
265    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
266       is 8-aligned -- guaranteed by caller. */
267    unsigned long success;
268    do {
269       __asm__ __volatile__(
270          "ldarx  15,0,%1"    "\n\t"
271          "add    15,15,%2"   "\n\t"
272          "stdcx. 15,0,%1"    "\n\t"
273          "mfcr   %0"         "\n\t"
274          "srwi   %0,%0,29"   "\n\t"
275          "andi.  %0,%0,1"    "\n"
276          : /*out*/"=b"(success)
277          : /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
278          : /*trash*/ "memory", "cc", "r15"
279       );
280    } while (success != 1);
281 #elif defined(VGA_arm)
282    unsigned int block[3]
283       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
284    do {
285       __asm__ __volatile__(
286          "mov    r5, %0"         "\n\t"
287          "ldr    r9, [r5, #0]"   "\n\t" // p
288          "ldr    r10, [r5, #4]"  "\n\t" // n
289          "ldrexh r8, [r9]"       "\n\t"
290          "add    r8, r8, r10"    "\n\t"
291          "strexh r4, r8, [r9]"   "\n\t"
292          "str    r4, [r5, #8]"   "\n\t"
293          : /*out*/
294          : /*in*/ "r"(&block[0])
295          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
296       );
297    } while (block[2] != 0);
298 #elif defined(VGA_arm64)
299    unsigned long long int block[3]
300       = { (unsigned long long int)p, (unsigned long long int)n,
301           0xFFFFFFFFFFFFFFFFULL};
302    do {
303       __asm__ __volatile__(
304          "mov   x5, %0"         "\n\t"
305          "ldr   x9, [x5, #0]"   "\n\t" // p
306          "ldr   x10, [x5, #8]"  "\n\t" // n
307          "ldxrh w8, [x9]"       "\n\t"
308          "add   x8, x8, x10"    "\n\t"
309          "stxrh w4, w8, [x9]"    "\n\t"
310          "str   x4, [x5, #16]"   "\n\t"
311          : /*out*/
312          : /*in*/ "r"(&block[0])
313          : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
314       );
315    } while (block[2] != 0);
316 #elif defined(VGA_s390x)
317    int dummy;
318    __asm__ __volatile__(
319       "   l	0,%0\n\t"
320       "0: st	0,%1\n\t"
321       "   icm	1,3,%1\n\t"
322       "   ar	1,%2\n\t"
323       "   stcm  1,3,%1\n\t"
324       "   l     1,%1\n\t"
325       "   cs	0,1,%0\n\t"
326       "   jl    0b\n\t"
327       : "+m" (*p), "+m" (dummy)
328       : "d" (n)
329       : "cc", "memory", "0", "1");
330 #elif defined(VGA_mips32)
331    /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
332       exception that can cause this function to fail. */
333 #if defined (_MIPSEL)
334    unsigned int block[3]
335       = { (unsigned int)p, (unsigned int)n, 0x0 };
336    do {
337       __asm__ __volatile__(
338          "move $t0, %0"           "\n\t"
339          "lw   $t1, 0($t0)"       "\n\t"  // p
340          "lw   $t2, 4($t0)"       "\n\t"  // n
341          "andi $t2, $t2, 0xFFFF"  "\n\t"  // n = n and 0xFFFF
342          "li   $t4, 0xFFFF"       "\n\t"
343          "nor  $t4, $t4, $zero"   "\n\t"  // $t4 = 0xFFFF0000
344          "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
345          "and  $t4, $t4, $t3"     "\n\t"  // $t4 = $t3 and 0xFFFF0000
346          "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
347          "andi $t3, $t3, 0xFFFF"  "\n\t"  // $t3 = $t3 and 0xFFFF
348          "or   $t3, $t3, $t4"     "\n\t"  // $t3 = $t3 or $t4
349          "sc   $t3, 0($t1)"       "\n\t"
350          "sw   $t3, 8($t0)"       "\n\t"  // save result
351          : /*out*/
352          : /*in*/ "r"(&block[0])
353          : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
354       );
355    } while (block[2] != 1);
356 #elif defined (_MIPSEB)
357    unsigned int block[3]
358       = { (unsigned int)p, (unsigned int)n << 16, 0x0 };
359    do {
360       __asm__ __volatile__(
361          "move $t0, %0"          "\n\t"
362          "lw   $t1, 0($t0)"      "\n\t"  // p
363          "lw   $t2, 4($t0)"      "\n\t"  // n
364          "ll   $t3, 0($t1)"      "\n\t"
365          "addu $t3, $t3, $t2"    "\n\t"
366          "sc   $t3, 0($t1)"      "\n\t"
367          "sw   $t3, 8($t0)"      "\n\t"
368          : /*out*/
369          : /*in*/ "r"(&block[0])
370          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
371       );
372    } while (block[2] != 1);
373 #endif
374 #elif defined(VGA_mips64)
375    /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
376       exception that can cause this function to fail. */
377 #if defined (_MIPSEL)
378    unsigned long block[3]
379       = { (unsigned long)p, (unsigned long)n, 0x0ULL };
380    do {
381       __asm__ __volatile__(
382          "move $t0, %0"           "\n\t"
383          "ld   $t1, 0($t0)"       "\n\t"  // p
384          "ld   $t2, 8($t0)"       "\n\t"  // n
385          "andi $t2, $t2, 0xFFFF"  "\n\t"  // n = n and 0xFFFF
386          "li   $s0, 0xFFFF"       "\n\t"
387          "nor  $s0, $s0, $zero"   "\n\t"  // $s0= 0xFFFF0000
388          "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
389          "and  $s0, $s0, $t3"     "\n\t"  // $s0 = $t3 and 0xFFFF0000
390          "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
391          "andi $t3, $t3, 0xFFFF"  "\n\t"  // $t3 = $t3 and 0xFFFF
392          "or   $t3, $t3, $s0"     "\n\t"  // $t3 = $t3 or $s0
393          "sc   $t3, 0($t1)"       "\n\t"
394          "sw   $t3, 16($t0)"      "\n\t"  // save result
395          : /*out*/
396          : /*in*/ "r"(&block[0])
397          : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
398       );
399    } while (block[2] != 1);
400 #elif defined (_MIPSEB)
401    unsigned long block[3]
402       = { (unsigned long)p, (unsigned long)n << 48, 0x0 };
403    do {
404       __asm__ __volatile__(
405          "move  $t0, %0"          "\n\t"
406          "ld    $t1, 0($t0)"      "\n\t"  // p
407          "ld    $t2, 8($t0)"      "\n\t"  // n
408          "lld   $t3, 0($t1)"      "\n\t"
409          "daddu $t3, $t3, $t2"    "\n\t"
410          "scd   $t3, 0($t1)"      "\n\t"
411          "sd    $t3, 16($t0)"     "\n\t"
412          : /*out*/
413          : /*in*/ "r"(&block[0])
414          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
415       );
416    } while (block[2] != 1);
417 #endif
418 #else
419 # error "Unsupported arch"
420 #endif
421 }
422 
atomic_add_32bit(int * p,int n)423 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
424 {
425 #if defined(VGA_x86)
426    unsigned long block[2];
427    block[0] = (unsigned long)p;
428    block[1] = n;
429    __asm__ __volatile__(
430       "movl 0(%%esi),%%eax"       "\n\t"
431       "movl 4(%%esi),%%ebx"       "\n\t"
432       "lock; addl %%ebx,(%%eax)"  "\n"
433       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
434    );
435 #elif defined(VGA_amd64)
436    unsigned long block[2];
437    block[0] = (unsigned long)p;
438    block[1] = n;
439    __asm__ __volatile__(
440       "movq 0(%%rsi),%%rax"       "\n\t"
441       "movq 8(%%rsi),%%rbx"       "\n\t"
442       "lock; addl %%ebx,(%%rax)"  "\n"
443       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
444    );
445 #elif defined(VGA_ppc32)
446    unsigned long success;
447    do {
448       __asm__ __volatile__(
449          "lwarx  15,0,%1"    "\n\t"
450          "add    15,15,%2"   "\n\t"
451          "stwcx. 15,0,%1"    "\n\t"
452          "mfcr   %0"         "\n\t"
453          "srwi   %0,%0,29"   "\n\t"
454          "andi.  %0,%0,1"    "\n"
455          : /*out*/"=b"(success)
456          : /*in*/ "b"(p), "b"(n)
457          : /*trash*/ "memory", "cc", "r15"
458       );
459    } while (success != 1);
460 #elif defined(VGA_ppc64)
461    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
462       is 8-aligned -- guaranteed by caller. */
463    unsigned long success;
464    do {
465       __asm__ __volatile__(
466          "ldarx  15,0,%1"    "\n\t"
467          "add    15,15,%2"   "\n\t"
468          "stdcx. 15,0,%1"    "\n\t"
469          "mfcr   %0"         "\n\t"
470          "srwi   %0,%0,29"   "\n\t"
471          "andi.  %0,%0,1"    "\n"
472          : /*out*/"=b"(success)
473          : /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
474          : /*trash*/ "memory", "cc", "r15"
475       );
476    } while (success != 1);
477 #elif defined(VGA_arm)
478    unsigned int block[3]
479       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
480    do {
481       __asm__ __volatile__(
482          "mov   r5, %0"         "\n\t"
483          "ldr   r9, [r5, #0]"   "\n\t" // p
484          "ldr   r10, [r5, #4]"  "\n\t" // n
485          "ldrex r8, [r9]"       "\n\t"
486          "add   r8, r8, r10"    "\n\t"
487          "strex r4, r8, [r9]"   "\n\t"
488          "str   r4, [r5, #8]"   "\n\t"
489          : /*out*/
490          : /*in*/ "r"(&block[0])
491          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
492       );
493    } while (block[2] != 0);
494 #elif defined(VGA_arm64)
495    unsigned long long int block[3]
496       = { (unsigned long long int)p, (unsigned long long int)n,
497           0xFFFFFFFFFFFFFFFFULL};
498    do {
499       __asm__ __volatile__(
500          "mov   x5, %0"         "\n\t"
501          "ldr   x9, [x5, #0]"   "\n\t" // p
502          "ldr   x10, [x5, #8]"  "\n\t" // n
503          "ldxr  w8, [x9]"       "\n\t"
504          "add   x8, x8, x10"    "\n\t"
505          "stxr  w4, w8, [x9]"    "\n\t"
506          "str   x4, [x5, #16]"   "\n\t"
507          : /*out*/
508          : /*in*/ "r"(&block[0])
509          : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
510       );
511    } while (block[2] != 0);
512 #elif defined(VGA_s390x)
513    __asm__ __volatile__(
514       "   l	0,%0\n\t"
515       "0: lr	1,0\n\t"
516       "   ar	1,%1\n\t"
517       "   cs	0,1,%0\n\t"
518       "   jl    0b\n\t"
519       : "+m" (*p)
520       : "d" (n)
521       : "cc", "memory", "0", "1");
522 #elif defined(VGA_mips32)
523    unsigned int block[3]
524       = { (unsigned int)p, (unsigned int)n, 0x0 };
525    do {
526       __asm__ __volatile__(
527          "move $t0, %0"        "\n\t"
528          "lw   $t1, 0($t0)"    "\n\t"  // p
529          "lw   $t2, 4($t0)"    "\n\t"  // n
530          "ll   $t3, 0($t1)"    "\n\t"
531          "addu $t3, $t3, $t2"  "\n\t"
532          "sc   $t3, 0($t1)"    "\n\t"
533          "sw   $t3, 8($t0)"    "\n\t"
534          : /*out*/
535          : /*in*/ "r"(&block[0])
536          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
537       );
538    } while (block[2] != 1);
539 #elif defined(VGA_mips64)
540    unsigned long block[3]
541       = { (unsigned long)p, (unsigned long)n, 0x0ULL };
542    do {
543       __asm__ __volatile__(
544          "move  $t0, %0"        "\n\t"
545          "ld    $t1, 0($t0)"    "\n\t"  // p
546          "ld    $t2, 8($t0)"    "\n\t"  // n
547          "ll    $t3, 0($t1)"    "\n\t"
548          "addu  $t3, $t3, $t2"  "\n\t"
549          "sc    $t3, 0($t1)"    "\n\t"
550          "sd    $t3, 16($t0)"   "\n\t"
551          : /*out*/
552          : /*in*/ "r"(&block[0])
553          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
554       );
555    } while (block[2] != 1);
556 #else
557 # error "Unsupported arch"
558 #endif
559 }
560 
atomic_add_64bit(long long int * p,int n)561 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
562 {
563 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32)
564    /* do nothing; is not supported */
565 #elif defined(VGA_amd64)
566    // this is a bit subtle.  It relies on the fact that, on a 64-bit platform,
567    // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
568    unsigned long long int block[2];
569    block[0] = (unsigned long long int)(unsigned long)p;
570    block[1] = n;
571    __asm__ __volatile__(
572       "movq 0(%%rsi),%%rax"      "\n\t"
573       "movq 8(%%rsi),%%rbx"      "\n\t"
574       "lock; addq %%rbx,(%%rax)" "\n"
575       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
576    );
577 #elif defined(VGA_ppc64)
578    unsigned long success;
579    do {
580       __asm__ __volatile__(
581          "ldarx  15,0,%1"    "\n\t"
582          "add    15,15,%2"   "\n\t"
583          "stdcx. 15,0,%1"    "\n\t"
584          "mfcr   %0"         "\n\t"
585          "srwi   %0,%0,29"   "\n\t"
586          "andi.  %0,%0,1"    "\n"
587          : /*out*/"=b"(success)
588          : /*in*/ "b"(p), "b"(n)
589          : /*trash*/ "memory", "cc", "r15"
590       );
591    } while (success != 1);
592 #elif defined(VGA_arm)
593    unsigned long long int block[3]
594      = { (unsigned long long int)(unsigned long)p,
595          (unsigned long long int)n,
596          0xFFFFFFFFFFFFFFFFULL };
597    do {
598       __asm__ __volatile__(
599          "mov    r5, %0"             "\n\t"
600          "ldr    r8,     [r5, #0]"   "\n\t" // p
601          "ldrd   r2, r3, [r5, #8]"   "\n\t" // n
602          "ldrexd r0, r1, [r8]"       "\n\t"
603          "adds   r2, r2, r0"         "\n\t"
604          "adc    r3, r3, r1"         "\n\t"
605          "strexd r1, r2, r3, [r8]"   "\n\t"
606          "str    r1, [r5, #16]"      "\n\t"
607          : /*out*/
608          : /*in*/ "r"(&block[0])
609          : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3"
610       );
611    } while (block[2] != 0xFFFFFFFF00000000ULL);
612 #elif defined(VGA_arm64)
613    unsigned long long int block[3]
614       = { (unsigned long long int)p, (unsigned long long int)n,
615           0xFFFFFFFFFFFFFFFFULL};
616    do {
617       __asm__ __volatile__(
618          "mov   x5, %0"         "\n\t"
619          "ldr   x9, [x5, #0]"   "\n\t" // p
620          "ldr   x10, [x5, #8]"  "\n\t" // n
621          "ldxr  x8, [x9]"       "\n\t"
622          "add   x8, x8, x10"    "\n\t"
623          "stxr  w4, x8, [x9]"   "\n\t"
624          "str   x4, [x5, #16]"   "\n\t"
625          : /*out*/
626          : /*in*/ "r"(&block[0])
627          : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
628       );
629    } while (block[2] != 0);
630 #elif defined(VGA_s390x)
631    __asm__ __volatile__(
632       "   lg	0,%0\n\t"
633       "0: lgr	1,0\n\t"
634       "   agr	1,%1\n\t"
635       "   csg	0,1,%0\n\t"
636       "   jl    0b\n\t"
637       : "+m" (*p)
638       : "d" (n)
639       : "cc", "memory", "0", "1");
640 #elif defined(VGA_mips64)
641    unsigned long block[3]
642       = { (unsigned long)p, (unsigned long)n, 0x0ULL };
643    do {
644       __asm__ __volatile__(
645          "move  $t0, %0"        "\n\t"
646          "ld    $t1, 0($t0)"    "\n\t" // p
647          "ld    $t2, 8($t0)"    "\n\t" // n
648          "lld   $t3, 0($t1)"    "\n\t"
649          "daddu $t3, $t3, $t2"  "\n\t"
650          "scd   $t3, 0($t1)"    "\n\t"
651          "sd    $t3, 16($t0)"   "\n\t"
652          : /*out*/
653          : /*in*/ "r"(&block[0])
654          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
655       );
656    } while (block[2] != 1);
657 #else
658 # error "Unsupported arch"
659 #endif
660 }
661 
main(int argc,char ** argv)662 int main ( int argc, char** argv )
663 {
664    int    i, status;
665    char*  page;
666    char*  p8;
667    short* p16;
668    int*   p32;
669    long long int* p64;
670    pid_t  child, p2;
671 
672    printf("parent, pre-fork\n");
673 
674    page = mmap( 0, sysconf(_SC_PAGESIZE),
675                    PROT_READ|PROT_WRITE,
676                    MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
677    if (page == MAP_FAILED) {
678       perror("mmap failed");
679       exit(1);
680    }
681 
682    p8  = (char*)(page+0);
683    p16 = (short*)(page+256);
684    p32 = (int*)(page+512);
685    p64 = (long long int*)(page+768);
686 
687    assert( IS_8_ALIGNED(p8) );
688    assert( IS_8_ALIGNED(p16) );
689    assert( IS_8_ALIGNED(p32) );
690    assert( IS_8_ALIGNED(p64) );
691 
692    memset(page, 0, 1024);
693 
694    *p8  = 0;
695    *p16 = 0;
696    *p32 = 0;
697    *p64 = 0;
698 
699    child = fork();
700    if (child == -1) {
701       perror("fork() failed\n");
702       return 1;
703    }
704 
705    if (child == 0) {
706       /* --- CHILD --- */
707       printf("child\n");
708       for (i = 0; i < NNN; i++) {
709          atomic_add_8bit(p8, 1);
710          atomic_add_16bit(p16, 1);
711          atomic_add_32bit(p32, 1);
712          atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
713       }
714       return 1;
715       /* NOTREACHED */
716 
717    }
718 
719    /* --- PARENT --- */
720 
721    printf("parent\n");
722 
723    for (i = 0; i < NNN; i++) {
724       atomic_add_8bit(p8, 1);
725       atomic_add_16bit(p16, 1);
726       atomic_add_32bit(p32, 1);
727       atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
728    }
729 
730    p2 = waitpid(child, &status, 0);
731    assert(p2 == child);
732 
733    /* assert that child finished normally */
734    assert(WIFEXITED(status));
735 
736    printf("FINAL VALUES:  8 bit %d,  16 bit %d,  32 bit %d,  64 bit %lld\n",
737           (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
738 
739    if (-74 == (int)(*(signed char*)p8)
740        && 32694 == (int)(*p16)
741        && 6913974 == *p32
742        && (0LL == *p64 || 682858642110LL == *p64)) {
743       printf("PASS\n");
744    } else {
745       printf("FAIL -- see source code for expected values\n");
746    }
747 
748    printf("parent exits\n");
749 
750    return 0;
751 }
752