• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* This is an example of a program which does atomic memory operations
3    between two processes which share a page.  Valgrind 3.4.1 and
4    earlier produce incorrect answers because it does not preserve
5    atomicity of the relevant instructions in the generated code; but
6    the post-DCAS-merge versions of Valgrind do behave correctly. */
7 
8 /* On ARM, this can be compiled into either ARM or Thumb code, so as
9    to test both A and T encodings of LDREX/STREX et al.  Also on ARM,
10    it tests doubleword atomics (LDREXD, STREXD) which I don't think it
11    does on any other platform. */
12 
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <assert.h>
17 #include <unistd.h>
18 #include <sys/wait.h>
19 #include "tests/sys_mman.h"
20 
21 #define NNN 3456987
22 
23 #define IS_8_ALIGNED(_ptr)   (0 == (((unsigned long)(_ptr)) & 7))
24 
25 
atomic_add_8bit(char * p,int n)26 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
27 {
28 #if defined(VGA_x86)
29    unsigned long block[2];
30    block[0] = (unsigned long)p;
31    block[1] = n;
32    __asm__ __volatile__(
33       "movl 0(%%esi),%%eax"      "\n\t"
34       "movl 4(%%esi),%%ebx"      "\n\t"
35       "lock; addb %%bl,(%%eax)"  "\n"
36       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
37    );
38 #elif defined(VGA_amd64)
39    unsigned long block[2];
40    block[0] = (unsigned long)p;
41    block[1] = n;
42    __asm__ __volatile__(
43       "movq 0(%%rsi),%%rax"      "\n\t"
44       "movq 8(%%rsi),%%rbx"      "\n\t"
45       "lock; addb %%bl,(%%rax)"  "\n"
46       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
47    );
48 #elif defined(VGA_ppc32)
49    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
50       is 4-aligned -- guaranteed by caller. */
51    unsigned long success;
52    do {
53       __asm__ __volatile__(
54          "lwarx  15,0,%1"    "\n\t"
55          "add    15,15,%2"   "\n\t"
56          "stwcx. 15,0,%1"    "\n\t"
57          "mfcr   %0"         "\n\t"
58          "srwi   %0,%0,29"   "\n\t"
59          "andi.  %0,%0,1"    "\n"
60          : /*out*/"=b"(success)
61          : /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
62          : /*trash*/ "memory", "cc", "r15"
63       );
64    } while (success != 1);
65 #elif defined(VGA_ppc64be)
66    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
67       is 8-aligned -- guaranteed by caller. */
68    unsigned long success;
69    do {
70       __asm__ __volatile__(
71          "ldarx  15,0,%1"    "\n\t"
72          "add    15,15,%2"   "\n\t"
73          "stdcx. 15,0,%1"    "\n\t"
74          "mfcr   %0"         "\n\t"
75          "srwi   %0,%0,29"   "\n\t"
76          "andi.  %0,%0,1"    "\n"
77          : /*out*/"=b"(success)
78          : /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
79          : /*trash*/ "memory", "cc", "r15"
80       );
81    } while (success != 1);
82 #elif defined(VGA_ppc64le)
83    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
84       is 8-aligned -- guaranteed by caller. */
85    unsigned long success;
86    do {
87       __asm__ __volatile__(
88          "ldarx  15,0,%1"    "\n\t"
89          "add    15,15,%2"   "\n\t"
90          "stdcx. 15,0,%1"    "\n\t"
91          "mfcr   %0"         "\n\t"
92          "srwi   %0,%0,29"   "\n\t"
93          "andi.  %0,%0,1"    "\n"
94          : /*out*/"=b"(success)
95          : /*in*/ "b"(p), "b"(((unsigned long)n))
96          : /*trash*/ "memory", "cc", "r15"
97       );
98    } while (success != 1);
99 #elif defined(VGA_arm)
100    unsigned int block[3]
101       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
102    do {
103       __asm__ __volatile__(
104          "mov    r5, %0"         "\n\t"
105          "ldr    r9, [r5, #0]"   "\n\t" // p
106          "ldr    r10, [r5, #4]"  "\n\t" // n
107          "ldrexb r8, [r9]"       "\n\t"
108          "add    r8, r8, r10"    "\n\t"
109          "strexb r4, r8, [r9]"   "\n\t"
110          "str    r4, [r5, #8]"   "\n\t"
111          : /*out*/
112          : /*in*/ "r"(&block[0])
113          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
114       );
115    } while (block[2] != 0);
116 #elif defined(VGA_arm64)
117    unsigned long long int block[3]
118       = { (unsigned long long int)p, (unsigned long long int)n,
119           0xFFFFFFFFFFFFFFFFULL};
120    do {
121       __asm__ __volatile__(
122          "mov   x5, %0"         "\n\t"
123          "ldr   x9, [x5, #0]"   "\n\t" // p
124          "ldr   x10, [x5, #8]"  "\n\t" // n
125          "ldxrb w8, [x9]"       "\n\t"
126          "add   x8, x8, x10"    "\n\t"
127          "stxrb w4, w8, [x9]"    "\n\t"
128          "str   x4, [x5, #16]"   "\n\t"
129          : /*out*/
130          : /*in*/ "r"(&block[0])
131          : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
132       );
133    } while (block[2] != 0);
134 #elif defined(VGA_s390x)
135    int dummy;
136    __asm__ __volatile__(
137       "   l	0,%0\n\t"
138       "0: st	0,%1\n\t"
139       "   icm	1,1,%1\n\t"
140       "   ar	1,%2\n\t"
141       "   stcm  1,1,%1\n\t"
142       "   l     1,%1\n\t"
143       "   cs	0,1,%0\n\t"
144       "   jl    0b\n\t"
145       : "+m" (*p), "+m" (dummy)
146       : "d" (n)
147       : "cc", "memory", "0", "1");
148 #elif defined(VGA_mips32)
149    /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
150       exception that can cause this function to fail. */
151 #if defined (_MIPSEL)
152    unsigned int block[3]
153       = { (unsigned int)p, (unsigned int)n, 0x0 };
154    do {
155       __asm__ __volatile__(
156          "move $t0, %0"           "\n\t"
157          "lw   $t1, 0($t0)"       "\n\t"  // p
158          "lw   $t2, 4($t0)"       "\n\t"  // n
159          "andi $t2, $t2, 0xFF"    "\n\t"  // n = n and 0xFF
160          "li   $t4, 0xFF"         "\n\t"
161          "nor  $t4, $t4, $zero"   "\n\t"  // $t4 = 0xFFFFFF00
162          "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
163          "and  $t4, $t4, $t3"     "\n\t"  // $t4 = $t3 and 0xFFFFFF00
164          "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
165          "andi $t3, $t3, 0xFF"    "\n\t"  // $t3 = $t3 and 0xFF
166          "or   $t3, $t3, $t4"     "\n\t"  // $t3 = $t3 or $t4
167          "sc   $t3, 0($t1)"       "\n\t"
168          "sw   $t3, 8($t0)"       "\n\t"  // save result
169          : /*out*/
170          : /*in*/ "r"(&block[0])
171          : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
172       );
173    } while (block[2] != 1);
174 #elif defined (_MIPSEB)
175    unsigned int block[3]
176       = { (unsigned int)p, (unsigned int)n << 24, 0x0 };
177    do {
178       __asm__ __volatile__(
179          "move $t0, %0"          "\n\t"
180          "lw   $t1, 0($t0)"      "\n\t"  // p
181          "lw   $t2, 4($t0)"      "\n\t"  // n
182          "ll   $t3, 0($t1)"      "\n\t"
183          "addu $t3, $t3, $t2"    "\n\t"
184          "sc   $t3, 0($t1)"      "\n\t"
185          "sw   $t3, 8($t0)"      "\n\t"
186          : /*out*/
187          : /*in*/ "r"(&block[0])
188          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
189       );
190    } while (block[2] != 1);
191 #endif
192 #elif defined(VGA_mips64)
193    /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
194       exception that can cause this function to fail. */
195 #if defined (_MIPSEL)
196    unsigned long block[3]
197       = { (unsigned long)p, (unsigned long)n, 0x0ULL };
198    do {
199       __asm__ __volatile__(
200          "move $t0, %0"           "\n\t"
201          "ld   $t1, 0($t0)"       "\n\t"  // p
202          "ld   $t2, 8($t0)"       "\n\t"  // n
203          "andi $t2, $t2, 0xFF"    "\n\t"  // n = n and 0xFF
204          "li   $s0, 0xFF"         "\n\t"
205          "nor  $s0, $s0, $zero"   "\n\t"  // $s0 = 0xFFFFFF00
206          "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
207          "and  $s0, $s0, $t3"     "\n\t"  // $s0 = $t3 and 0xFFFFFF00
208          "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
209          "andi $t3, $t3, 0xFF"    "\n\t"  // $t3 = $t3 and 0xFF
210          "or   $t3, $t3, $s0"     "\n\t"  // $t3 = $t3 or $s0
211          "sc   $t3, 0($t1)"       "\n\t"
212          "sw   $t3, 16($t0)"      "\n\t"  // save result
213          : /*out*/
214          : /*in*/ "r"(&block[0])
215          : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
216       );
217    } while (block[2] != 1);
218 #elif defined (_MIPSEB)
219    unsigned long block[3]
220       = { (unsigned long)p, (unsigned long)n << 56, 0x0 };
221    do {
222       __asm__ __volatile__(
223          "move  $t0, %0"          "\n\t"
224          "ld    $t1, 0($t0)"      "\n\t"  // p
225          "ld    $t2, 8($t0)"      "\n\t"  // n
226          "lld   $t3, 0($t1)"      "\n\t"
227          "daddu $t3, $t3, $t2"    "\n\t"
228          "scd   $t3, 0($t1)"      "\n\t"
229          "sd    $t3, 16($t0)"     "\n\t"
230          : /*out*/
231          : /*in*/ "r"(&block[0])
232          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
233       );
234    } while (block[2] != 1);
235 #endif
236 #elif defined(VGA_tilegx)
237    int i;
238    unsigned int *p4 = (unsigned int *)(((unsigned long long)p + 3) & (~3ULL));
239    unsigned int  mask = (0xff) << ((int)p & 3);
240    unsigned int  add = (n & 0xff) << ((int)p & 3);
241    unsigned int x, new;
242 
243    while(1) {
244       x = *p4;
245       new = (x & (~mask)) | ((x + add) & mask);
246       __insn_mtspr(0x2780, x);
247       if ( __insn_cmpexch4(p4, new) == x)
248          break;
249    }
250 #else
251 # error "Unsupported arch"
252 #endif
253 }
254 
255 
atomic_add_16bit(short * p,int n)256 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
257 {
258 #if defined(VGA_x86)
259    unsigned long block[2];
260    block[0] = (unsigned long)p;
261    block[1] = n;
262    __asm__ __volatile__(
263       "movl 0(%%esi),%%eax"      "\n\t"
264       "movl 4(%%esi),%%ebx"      "\n\t"
265       "lock; addw %%bx,(%%eax)"  "\n"
266       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
267    );
268 #elif defined(VGA_amd64)
269    unsigned long block[2];
270    block[0] = (unsigned long)p;
271    block[1] = n;
272    __asm__ __volatile__(
273       "movq 0(%%rsi),%%rax"      "\n\t"
274       "movq 8(%%rsi),%%rbx"      "\n\t"
275       "lock; addw %%bx,(%%rax)"  "\n"
276       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
277    );
278 #elif defined(VGA_ppc32)
279    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
280       is 8-aligned -- guaranteed by caller. */
281    unsigned long success;
282    do {
283       __asm__ __volatile__(
284          "lwarx  15,0,%1"    "\n\t"
285          "add    15,15,%2"   "\n\t"
286          "stwcx. 15,0,%1"    "\n\t"
287          "mfcr   %0"         "\n\t"
288          "srwi   %0,%0,29"   "\n\t"
289          "andi.  %0,%0,1"    "\n"
290          : /*out*/"=b"(success)
291          : /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
292          : /*trash*/ "memory", "cc", "r15"
293       );
294    } while (success != 1);
295 #elif defined(VGA_ppc64be)
296    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
297       is 8-aligned -- guaranteed by caller. */
298    unsigned long success;
299    do {
300       __asm__ __volatile__(
301          "ldarx  15,0,%1"    "\n\t"
302          "add    15,15,%2"   "\n\t"
303          "stdcx. 15,0,%1"    "\n\t"
304          "mfcr   %0"         "\n\t"
305          "srwi   %0,%0,29"   "\n\t"
306          "andi.  %0,%0,1"    "\n"
307          : /*out*/"=b"(success)
308          : /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
309          : /*trash*/ "memory", "cc", "r15"
310       );
311    } while (success != 1);
312 #elif defined(VGA_ppc64le)
313    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
314       is 8-aligned -- guaranteed by caller. */
315    unsigned long success;
316    do {
317       __asm__ __volatile__(
318          "ldarx  15,0,%1"    "\n\t"
319          "add    15,15,%2"   "\n\t"
320          "stdcx. 15,0,%1"    "\n\t"
321          "mfcr   %0"         "\n\t"
322          "srwi   %0,%0,29"   "\n\t"
323          "andi.  %0,%0,1"    "\n"
324          : /*out*/"=b"(success)
325          : /*in*/ "b"(p), "b"(((unsigned long)n))
326          : /*trash*/ "memory", "cc", "r15"
327       );
328    } while (success != 1);
329 #elif defined(VGA_arm)
330    unsigned int block[3]
331       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
332    do {
333       __asm__ __volatile__(
334          "mov    r5, %0"         "\n\t"
335          "ldr    r9, [r5, #0]"   "\n\t" // p
336          "ldr    r10, [r5, #4]"  "\n\t" // n
337          "ldrexh r8, [r9]"       "\n\t"
338          "add    r8, r8, r10"    "\n\t"
339          "strexh r4, r8, [r9]"   "\n\t"
340          "str    r4, [r5, #8]"   "\n\t"
341          : /*out*/
342          : /*in*/ "r"(&block[0])
343          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
344       );
345    } while (block[2] != 0);
346 #elif defined(VGA_arm64)
347    unsigned long long int block[3]
348       = { (unsigned long long int)p, (unsigned long long int)n,
349           0xFFFFFFFFFFFFFFFFULL};
350    do {
351       __asm__ __volatile__(
352          "mov   x5, %0"         "\n\t"
353          "ldr   x9, [x5, #0]"   "\n\t" // p
354          "ldr   x10, [x5, #8]"  "\n\t" // n
355          "ldxrh w8, [x9]"       "\n\t"
356          "add   x8, x8, x10"    "\n\t"
357          "stxrh w4, w8, [x9]"    "\n\t"
358          "str   x4, [x5, #16]"   "\n\t"
359          : /*out*/
360          : /*in*/ "r"(&block[0])
361          : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
362       );
363    } while (block[2] != 0);
364 #elif defined(VGA_s390x)
365    int dummy;
366    __asm__ __volatile__(
367       "   l	0,%0\n\t"
368       "0: st	0,%1\n\t"
369       "   icm	1,3,%1\n\t"
370       "   ar	1,%2\n\t"
371       "   stcm  1,3,%1\n\t"
372       "   l     1,%1\n\t"
373       "   cs	0,1,%0\n\t"
374       "   jl    0b\n\t"
375       : "+m" (*p), "+m" (dummy)
376       : "d" (n)
377       : "cc", "memory", "0", "1");
378 #elif defined(VGA_mips32)
379    /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
380       exception that can cause this function to fail. */
381 #if defined (_MIPSEL)
382    unsigned int block[3]
383       = { (unsigned int)p, (unsigned int)n, 0x0 };
384    do {
385       __asm__ __volatile__(
386          "move $t0, %0"           "\n\t"
387          "lw   $t1, 0($t0)"       "\n\t"  // p
388          "lw   $t2, 4($t0)"       "\n\t"  // n
389          "andi $t2, $t2, 0xFFFF"  "\n\t"  // n = n and 0xFFFF
390          "li   $t4, 0xFFFF"       "\n\t"
391          "nor  $t4, $t4, $zero"   "\n\t"  // $t4 = 0xFFFF0000
392          "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
393          "and  $t4, $t4, $t3"     "\n\t"  // $t4 = $t3 and 0xFFFF0000
394          "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
395          "andi $t3, $t3, 0xFFFF"  "\n\t"  // $t3 = $t3 and 0xFFFF
396          "or   $t3, $t3, $t4"     "\n\t"  // $t3 = $t3 or $t4
397          "sc   $t3, 0($t1)"       "\n\t"
398          "sw   $t3, 8($t0)"       "\n\t"  // save result
399          : /*out*/
400          : /*in*/ "r"(&block[0])
401          : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
402       );
403    } while (block[2] != 1);
404 #elif defined (_MIPSEB)
405    unsigned int block[3]
406       = { (unsigned int)p, (unsigned int)n << 16, 0x0 };
407    do {
408       __asm__ __volatile__(
409          "move $t0, %0"          "\n\t"
410          "lw   $t1, 0($t0)"      "\n\t"  // p
411          "lw   $t2, 4($t0)"      "\n\t"  // n
412          "ll   $t3, 0($t1)"      "\n\t"
413          "addu $t3, $t3, $t2"    "\n\t"
414          "sc   $t3, 0($t1)"      "\n\t"
415          "sw   $t3, 8($t0)"      "\n\t"
416          : /*out*/
417          : /*in*/ "r"(&block[0])
418          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
419       );
420    } while (block[2] != 1);
421 #endif
422 #elif defined(VGA_mips64)
423    /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
424       exception that can cause this function to fail. */
425 #if defined (_MIPSEL)
426    unsigned long block[3]
427       = { (unsigned long)p, (unsigned long)n, 0x0ULL };
428    do {
429       __asm__ __volatile__(
430          "move $t0, %0"           "\n\t"
431          "ld   $t1, 0($t0)"       "\n\t"  // p
432          "ld   $t2, 8($t0)"       "\n\t"  // n
433          "andi $t2, $t2, 0xFFFF"  "\n\t"  // n = n and 0xFFFF
434          "li   $s0, 0xFFFF"       "\n\t"
435          "nor  $s0, $s0, $zero"   "\n\t"  // $s0= 0xFFFF0000
436          "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
437          "and  $s0, $s0, $t3"     "\n\t"  // $s0 = $t3 and 0xFFFF0000
438          "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
439          "andi $t3, $t3, 0xFFFF"  "\n\t"  // $t3 = $t3 and 0xFFFF
440          "or   $t3, $t3, $s0"     "\n\t"  // $t3 = $t3 or $s0
441          "sc   $t3, 0($t1)"       "\n\t"
442          "sw   $t3, 16($t0)"      "\n\t"  // save result
443          : /*out*/
444          : /*in*/ "r"(&block[0])
445          : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
446       );
447    } while (block[2] != 1);
448 #elif defined (_MIPSEB)
449    unsigned long block[3]
450       = { (unsigned long)p, (unsigned long)n << 48, 0x0 };
451    do {
452       __asm__ __volatile__(
453          "move  $t0, %0"          "\n\t"
454          "ld    $t1, 0($t0)"      "\n\t"  // p
455          "ld    $t2, 8($t0)"      "\n\t"  // n
456          "lld   $t3, 0($t1)"      "\n\t"
457          "daddu $t3, $t3, $t2"    "\n\t"
458          "scd   $t3, 0($t1)"      "\n\t"
459          "sd    $t3, 16($t0)"     "\n\t"
460          : /*out*/
461          : /*in*/ "r"(&block[0])
462          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
463       );
464    } while (block[2] != 1);
465 #endif
466 #elif defined(VGA_tilegx)
467    int i;
468    unsigned int *p4 = (unsigned int *)(((unsigned long long)p + 3) & (~3ULL));
469    unsigned int  mask = (0xffff) << ((int)p & 3);
470    unsigned int  add = (n & 0xffff) << ((int)p & 3);
471    unsigned int x, new;
472 
473    while(1) {
474       x = *p4;
475       new = (x & (~mask)) | ((x + add) & mask);
476       __insn_mtspr(0x2780, x);
477       if ( __insn_cmpexch4(p4, new) == x)
478          break;
479    }
480 #else
481 # error "Unsupported arch"
482 #endif
483 }
484 
atomic_add_32bit(int * p,int n)485 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
486 {
487 #if defined(VGA_x86)
488    unsigned long block[2];
489    block[0] = (unsigned long)p;
490    block[1] = n;
491    __asm__ __volatile__(
492       "movl 0(%%esi),%%eax"       "\n\t"
493       "movl 4(%%esi),%%ebx"       "\n\t"
494       "lock; addl %%ebx,(%%eax)"  "\n"
495       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
496    );
497 #elif defined(VGA_amd64)
498    unsigned long block[2];
499    block[0] = (unsigned long)p;
500    block[1] = n;
501    __asm__ __volatile__(
502       "movq 0(%%rsi),%%rax"       "\n\t"
503       "movq 8(%%rsi),%%rbx"       "\n\t"
504       "lock; addl %%ebx,(%%rax)"  "\n"
505       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
506    );
507 #elif defined(VGA_ppc32)
508    unsigned long success;
509    do {
510       __asm__ __volatile__(
511          "lwarx  15,0,%1"    "\n\t"
512          "add    15,15,%2"   "\n\t"
513          "stwcx. 15,0,%1"    "\n\t"
514          "mfcr   %0"         "\n\t"
515          "srwi   %0,%0,29"   "\n\t"
516          "andi.  %0,%0,1"    "\n"
517          : /*out*/"=b"(success)
518          : /*in*/ "b"(p), "b"(n)
519          : /*trash*/ "memory", "cc", "r15"
520       );
521    } while (success != 1);
522 #elif defined(VGA_ppc64be)
523    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
524       is 8-aligned -- guaranteed by caller. */
525    unsigned long success;
526    do {
527       __asm__ __volatile__(
528          "ldarx  15,0,%1"    "\n\t"
529          "add    15,15,%2"   "\n\t"
530          "stdcx. 15,0,%1"    "\n\t"
531          "mfcr   %0"         "\n\t"
532          "srwi   %0,%0,29"   "\n\t"
533          "andi.  %0,%0,1"    "\n"
534          : /*out*/"=b"(success)
535          : /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
536          : /*trash*/ "memory", "cc", "r15"
537       );
538    } while (success != 1);
539 #elif defined(VGA_ppc64le)
540    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
541       is 8-aligned -- guaranteed by caller. */
542    unsigned long success;
543    do {
544       __asm__ __volatile__(
545          "ldarx  15,0,%1"    "\n\t"
546          "add    15,15,%2"   "\n\t"
547          "stdcx. 15,0,%1"    "\n\t"
548          "mfcr   %0"         "\n\t"
549          "srwi   %0,%0,29"   "\n\t"
550          "andi.  %0,%0,1"    "\n"
551          : /*out*/"=b"(success)
552          : /*in*/ "b"(p), "b"(((unsigned long)n))
553          : /*trash*/ "memory", "cc", "r15"
554       );
555    } while (success != 1);
556 #elif defined(VGA_arm)
557    unsigned int block[3]
558       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
559    do {
560       __asm__ __volatile__(
561          "mov   r5, %0"         "\n\t"
562          "ldr   r9, [r5, #0]"   "\n\t" // p
563          "ldr   r10, [r5, #4]"  "\n\t" // n
564          "ldrex r8, [r9]"       "\n\t"
565          "add   r8, r8, r10"    "\n\t"
566          "strex r4, r8, [r9]"   "\n\t"
567          "str   r4, [r5, #8]"   "\n\t"
568          : /*out*/
569          : /*in*/ "r"(&block[0])
570          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
571       );
572    } while (block[2] != 0);
573 #elif defined(VGA_arm64)
574    unsigned long long int block[3]
575       = { (unsigned long long int)p, (unsigned long long int)n,
576           0xFFFFFFFFFFFFFFFFULL};
577    do {
578       __asm__ __volatile__(
579          "mov   x5, %0"         "\n\t"
580          "ldr   x9, [x5, #0]"   "\n\t" // p
581          "ldr   x10, [x5, #8]"  "\n\t" // n
582          "ldxr  w8, [x9]"       "\n\t"
583          "add   x8, x8, x10"    "\n\t"
584          "stxr  w4, w8, [x9]"    "\n\t"
585          "str   x4, [x5, #16]"   "\n\t"
586          : /*out*/
587          : /*in*/ "r"(&block[0])
588          : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
589       );
590    } while (block[2] != 0);
591 #elif defined(VGA_s390x)
592    __asm__ __volatile__(
593       "   l	0,%0\n\t"
594       "0: lr	1,0\n\t"
595       "   ar	1,%1\n\t"
596       "   cs	0,1,%0\n\t"
597       "   jl    0b\n\t"
598       : "+m" (*p)
599       : "d" (n)
600       : "cc", "memory", "0", "1");
601 #elif defined(VGA_mips32)
602    unsigned int block[3]
603       = { (unsigned int)p, (unsigned int)n, 0x0 };
604    do {
605       __asm__ __volatile__(
606          "move $t0, %0"        "\n\t"
607          "lw   $t1, 0($t0)"    "\n\t"  // p
608          "lw   $t2, 4($t0)"    "\n\t"  // n
609          "ll   $t3, 0($t1)"    "\n\t"
610          "addu $t3, $t3, $t2"  "\n\t"
611          "sc   $t3, 0($t1)"    "\n\t"
612          "sw   $t3, 8($t0)"    "\n\t"
613          : /*out*/
614          : /*in*/ "r"(&block[0])
615          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
616       );
617    } while (block[2] != 1);
618 #elif defined(VGA_mips64)
619    unsigned long block[3]
620       = { (unsigned long)p, (unsigned long)n, 0x0ULL };
621    do {
622       __asm__ __volatile__(
623          "move  $t0, %0"        "\n\t"
624          "ld    $t1, 0($t0)"    "\n\t"  // p
625          "ld    $t2, 8($t0)"    "\n\t"  // n
626          "ll    $t3, 0($t1)"    "\n\t"
627          "addu  $t3, $t3, $t2"  "\n\t"
628          "sc    $t3, 0($t1)"    "\n\t"
629          "sd    $t3, 16($t0)"   "\n\t"
630          : /*out*/
631          : /*in*/ "r"(&block[0])
632          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
633       );
634    } while (block[2] != 1);
635 #elif defined(VGA_tilegx)
636     __insn_fetchadd4(p, n);
637 #else
638 # error "Unsupported arch"
639 #endif
640 }
641 
atomic_add_64bit(long long int * p,int n)642 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
643 {
644 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32)
645    /* do nothing; is not supported */
646 #elif defined(VGA_amd64)
647    // this is a bit subtle.  It relies on the fact that, on a 64-bit platform,
648    // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
649    unsigned long long int block[2];
650    block[0] = (unsigned long long int)(unsigned long)p;
651    block[1] = n;
652    __asm__ __volatile__(
653       "movq 0(%%rsi),%%rax"      "\n\t"
654       "movq 8(%%rsi),%%rbx"      "\n\t"
655       "lock; addq %%rbx,(%%rax)" "\n"
656       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
657    );
658 #elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
659    unsigned long success;
660    do {
661       __asm__ __volatile__(
662          "ldarx  15,0,%1"    "\n\t"
663          "add    15,15,%2"   "\n\t"
664          "stdcx. 15,0,%1"    "\n\t"
665          "mfcr   %0"         "\n\t"
666          "srwi   %0,%0,29"   "\n\t"
667          "andi.  %0,%0,1"    "\n"
668          : /*out*/"=b"(success)
669          : /*in*/ "b"(p), "b"(n)
670          : /*trash*/ "memory", "cc", "r15"
671       );
672    } while (success != 1);
673 #elif defined(VGA_arm)
674    unsigned long long int block[3]
675      = { (unsigned long long int)(unsigned long)p,
676          (unsigned long long int)n,
677          0xFFFFFFFFFFFFFFFFULL };
678    do {
679       __asm__ __volatile__(
680          "mov    r5, %0"             "\n\t"
681          "ldr    r8,     [r5, #0]"   "\n\t" // p
682          "ldrd   r2, r3, [r5, #8]"   "\n\t" // n
683          "ldrexd r0, r1, [r8]"       "\n\t"
684          "adds   r2, r2, r0"         "\n\t"
685          "adc    r3, r3, r1"         "\n\t"
686          "strexd r1, r2, r3, [r8]"   "\n\t"
687          "str    r1, [r5, #16]"      "\n\t"
688          : /*out*/
689          : /*in*/ "r"(&block[0])
690          : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3"
691       );
692    } while (block[2] != 0xFFFFFFFF00000000ULL);
693 #elif defined(VGA_arm64)
694    unsigned long long int block[3]
695       = { (unsigned long long int)p, (unsigned long long int)n,
696           0xFFFFFFFFFFFFFFFFULL};
697    do {
698       __asm__ __volatile__(
699          "mov   x5, %0"         "\n\t"
700          "ldr   x9, [x5, #0]"   "\n\t" // p
701          "ldr   x10, [x5, #8]"  "\n\t" // n
702          "ldxr  x8, [x9]"       "\n\t"
703          "add   x8, x8, x10"    "\n\t"
704          "stxr  w4, x8, [x9]"   "\n\t"
705          "str   x4, [x5, #16]"   "\n\t"
706          : /*out*/
707          : /*in*/ "r"(&block[0])
708          : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
709       );
710    } while (block[2] != 0);
711 #elif defined(VGA_s390x)
712    __asm__ __volatile__(
713       "   lg	0,%0\n\t"
714       "0: lgr	1,0\n\t"
715       "   agr	1,%1\n\t"
716       "   csg	0,1,%0\n\t"
717       "   jl    0b\n\t"
718       : "+m" (*p)
719       : "d" (n)
720       : "cc", "memory", "0", "1");
721 #elif defined(VGA_mips64)
722    unsigned long block[3]
723       = { (unsigned long)p, (unsigned long)n, 0x0ULL };
724    do {
725       __asm__ __volatile__(
726          "move  $t0, %0"        "\n\t"
727          "ld    $t1, 0($t0)"    "\n\t" // p
728          "ld    $t2, 8($t0)"    "\n\t" // n
729          "lld   $t3, 0($t1)"    "\n\t"
730          "daddu $t3, $t3, $t2"  "\n\t"
731          "scd   $t3, 0($t1)"    "\n\t"
732          "sd    $t3, 16($t0)"   "\n\t"
733          : /*out*/
734          : /*in*/ "r"(&block[0])
735          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
736       );
737    } while (block[2] != 1);
738 #elif defined(VGA_tilegx)
739     __insn_fetchadd(p, n);
740 #else
741 # error "Unsupported arch"
742 #endif
743 }
744 
main(int argc,char ** argv)745 int main ( int argc, char** argv )
746 {
747    int    i, status;
748    char*  page;
749    char*  p8;
750    short* p16;
751    int*   p32;
752    long long int* p64;
753    pid_t  child, p2;
754 
755    printf("parent, pre-fork\n");
756 
757    page = mmap( 0, sysconf(_SC_PAGESIZE),
758                    PROT_READ|PROT_WRITE,
759                    MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
760    if (page == MAP_FAILED) {
761       perror("mmap failed");
762       exit(1);
763    }
764 
765    p8  = (char*)(page+0);
766    p16 = (short*)(page+256);
767    p32 = (int*)(page+512);
768    p64 = (long long int*)(page+768);
769 
770    assert( IS_8_ALIGNED(p8) );
771    assert( IS_8_ALIGNED(p16) );
772    assert( IS_8_ALIGNED(p32) );
773    assert( IS_8_ALIGNED(p64) );
774 
775    memset(page, 0, 1024);
776 
777    *p8  = 0;
778    *p16 = 0;
779    *p32 = 0;
780    *p64 = 0;
781 
782    child = fork();
783    if (child == -1) {
784       perror("fork() failed\n");
785       return 1;
786    }
787 
788    if (child == 0) {
789       /* --- CHILD --- */
790       printf("child\n");
791       for (i = 0; i < NNN; i++) {
792          atomic_add_8bit(p8, 1);
793          atomic_add_16bit(p16, 1);
794          atomic_add_32bit(p32, 1);
795          atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
796       }
797       return 1;
798       /* NOTREACHED */
799 
800    }
801 
802    /* --- PARENT --- */
803 
804    printf("parent\n");
805 
806    for (i = 0; i < NNN; i++) {
807       atomic_add_8bit(p8, 1);
808       atomic_add_16bit(p16, 1);
809       atomic_add_32bit(p32, 1);
810       atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
811    }
812 
813    p2 = waitpid(child, &status, 0);
814    assert(p2 == child);
815 
816    /* assert that child finished normally */
817    assert(WIFEXITED(status));
818 
819    printf("FINAL VALUES:  8 bit %d,  16 bit %d,  32 bit %d,  64 bit %lld\n",
820           (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
821 
822    if (-74 == (int)(*(signed char*)p8)
823        && 32694 == (int)(*p16)
824        && 6913974 == *p32
825        && (0LL == *p64 || 682858642110LL == *p64)) {
826       printf("PASS\n");
827    } else {
828       printf("FAIL -- see source code for expected values\n");
829    }
830 
831    printf("parent exits\n");
832 
833    return 0;
834 }
835