• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* This is an example of a program which does atomic memory operations
3    between two processes which share a page.  Valgrind 3.4.1 and
4    earlier produce incorrect answers because it does not preserve
5    atomicity of the relevant instructions in the generated code; but
6    the post-DCAS-merge versions of Valgrind do behave correctly. */
7 
8 /* On ARM, this can be compiled into either ARM or Thumb code, so as
9    to test both A and T encodings of LDREX/STREX et al.  Also on ARM,
10    it tests doubleword atomics (LDREXD, STREXD) which I don't think it
11    does on any other platform. */
12 
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <assert.h>
17 #include <unistd.h>
18 #include <sys/wait.h>
19 #include "tests/sys_mman.h"
20 
21 #define NNN 3456987
22 
23 #define IS_8_ALIGNED(_ptr)   (0 == (((unsigned long)(_ptr)) & 7))
24 
25 
atomic_add_8bit(char * p,int n)26 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
27 {
28 #if defined(VGA_x86)
29    unsigned long block[2];
30    block[0] = (unsigned long)p;
31    block[1] = n;
32    __asm__ __volatile__(
33       "movl 0(%%esi),%%eax"      "\n\t"
34       "movl 4(%%esi),%%ebx"      "\n\t"
35       "lock; addb %%bl,(%%eax)"  "\n"
36       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
37    );
38 #elif defined(VGA_amd64)
39    unsigned long block[2];
40    block[0] = (unsigned long)p;
41    block[1] = n;
42    __asm__ __volatile__(
43       "movq 0(%%rsi),%%rax"      "\n\t"
44       "movq 8(%%rsi),%%rbx"      "\n\t"
45       "lock; addb %%bl,(%%rax)"  "\n"
46       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
47    );
48 #elif defined(VGA_ppc32)
49    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
50       is 4-aligned -- guaranteed by caller. */
51    unsigned long success;
52    do {
53       __asm__ __volatile__(
54          "lwarx  15,0,%1"    "\n\t"
55          "add    15,15,%2"   "\n\t"
56          "stwcx. 15,0,%1"    "\n\t"
57          "mfcr   %0"         "\n\t"
58          "srwi   %0,%0,29"   "\n\t"
59          "andi.  %0,%0,1"    "\n"
60          : /*out*/"=b"(success)
61          : /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
62          : /*trash*/ "memory", "cc", "r15"
63       );
64    } while (success != 1);
65 #elif defined(VGA_ppc64be)
66    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
67       is 8-aligned -- guaranteed by caller. */
68    unsigned long success;
69    do {
70       __asm__ __volatile__(
71          "ldarx  15,0,%1"    "\n\t"
72          "add    15,15,%2"   "\n\t"
73          "stdcx. 15,0,%1"    "\n\t"
74          "mfcr   %0"         "\n\t"
75          "srwi   %0,%0,29"   "\n\t"
76          "andi.  %0,%0,1"    "\n"
77          : /*out*/"=b"(success)
78          : /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
79          : /*trash*/ "memory", "cc", "r15"
80       );
81    } while (success != 1);
82 #elif defined(VGA_ppc64le)
83    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
84       is 8-aligned -- guaranteed by caller. */
85    unsigned long success;
86    do {
87       __asm__ __volatile__(
88          "ldarx  15,0,%1"    "\n\t"
89          "add    15,15,%2"   "\n\t"
90          "stdcx. 15,0,%1"    "\n\t"
91          "mfcr   %0"         "\n\t"
92          "srwi   %0,%0,29"   "\n\t"
93          "andi.  %0,%0,1"    "\n"
94          : /*out*/"=b"(success)
95          : /*in*/ "b"(p), "b"(((unsigned long)n))
96          : /*trash*/ "memory", "cc", "r15"
97       );
98    } while (success != 1);
99 #elif defined(VGA_arm)
100    unsigned int block[3]
101       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
102    do {
103       __asm__ __volatile__(
104          "mov    r5, %0"         "\n\t"
105          "ldr    r9, [r5, #0]"   "\n\t" // p
106          "ldr    r10, [r5, #4]"  "\n\t" // n
107          "ldrexb r8, [r9]"       "\n\t"
108          "add    r8, r8, r10"    "\n\t"
109          "strexb r4, r8, [r9]"   "\n\t"
110          "str    r4, [r5, #8]"   "\n\t"
111          : /*out*/
112          : /*in*/ "r"(&block[0])
113          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
114       );
115    } while (block[2] != 0);
116 #elif defined(VGA_arm64)
117    unsigned long long int block[3]
118       = { (unsigned long long int)p, (unsigned long long int)n,
119           0xFFFFFFFFFFFFFFFFULL};
120    do {
121       __asm__ __volatile__(
122          "mov   x5, %0"         "\n\t"
123          "ldr   x9, [x5, #0]"   "\n\t" // p
124          "ldr   x10, [x5, #8]"  "\n\t" // n
125          "ldxrb w8, [x9]"       "\n\t"
126          "add   x8, x8, x10"    "\n\t"
127          "stxrb w4, w8, [x9]"    "\n\t"
128          "str   x4, [x5, #16]"   "\n\t"
129          : /*out*/
130          : /*in*/ "r"(&block[0])
131          : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
132       );
133    } while (block[2] != 0);
134 #elif defined(VGA_s390x)
135    int dummy;
136    __asm__ __volatile__(
137       "   l	0,%0\n\t"
138       "0: st	0,%1\n\t"
139       "   icm	1,1,%1\n\t"
140       "   ar	1,%2\n\t"
141       "   stcm  1,1,%1\n\t"
142       "   l     1,%1\n\t"
143       "   cs	0,1,%0\n\t"
144       "   jl    0b\n\t"
145       : "+m" (*p), "+m" (dummy)
146       : "d" (n)
147       : "cc", "memory", "0", "1");
148 #elif defined(VGA_mips32)
149    /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
150       exception that can cause this function to fail. */
151 #if defined (_MIPSEL)
152    unsigned int block[3]
153       = { (unsigned int)p, (unsigned int)n, 0x0 };
154    do {
155       __asm__ __volatile__(
156          "move $t0, %0"           "\n\t"
157          "lw   $t1, 0($t0)"       "\n\t"  // p
158          "lw   $t2, 4($t0)"       "\n\t"  // n
159          "andi $t2, $t2, 0xFF"    "\n\t"  // n = n and 0xFF
160          "li   $t4, 0xFF"         "\n\t"
161          "nor  $t4, $t4, $zero"   "\n\t"  // $t4 = 0xFFFFFF00
162          "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
163          "and  $t4, $t4, $t3"     "\n\t"  // $t4 = $t3 and 0xFFFFFF00
164          "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
165          "andi $t3, $t3, 0xFF"    "\n\t"  // $t3 = $t3 and 0xFF
166          "or   $t3, $t3, $t4"     "\n\t"  // $t3 = $t3 or $t4
167          "sc   $t3, 0($t1)"       "\n\t"
168          "sw   $t3, 8($t0)"       "\n\t"  // save result
169          : /*out*/
170          : /*in*/ "r"(&block[0])
171          : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
172       );
173    } while (block[2] != 1);
174 #elif defined (_MIPSEB)
175    unsigned int block[3]
176       = { (unsigned int)p, (unsigned int)n << 24, 0x0 };
177    do {
178       __asm__ __volatile__(
179          "move $t0, %0"          "\n\t"
180          "lw   $t1, 0($t0)"      "\n\t"  // p
181          "lw   $t2, 4($t0)"      "\n\t"  // n
182          "ll   $t3, 0($t1)"      "\n\t"
183          "addu $t3, $t3, $t2"    "\n\t"
184          "sc   $t3, 0($t1)"      "\n\t"
185          "sw   $t3, 8($t0)"      "\n\t"
186          : /*out*/
187          : /*in*/ "r"(&block[0])
188          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
189       );
190    } while (block[2] != 1);
191 #endif
192 #elif defined(VGA_mips64)
193    /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
194       exception that can cause this function to fail. */
195 #if defined (_MIPSEL)
196    unsigned long block[3]
197       = { (unsigned long)p, (unsigned long)n, 0x0ULL };
198    do {
199       __asm__ __volatile__(
200          "move $t0, %0"           "\n\t"
201          "ld   $t1, 0($t0)"       "\n\t"  // p
202          "ld   $t2, 8($t0)"       "\n\t"  // n
203          "andi $t2, $t2, 0xFF"    "\n\t"  // n = n and 0xFF
204          "li   $s0, 0xFF"         "\n\t"
205          "nor  $s0, $s0, $zero"   "\n\t"  // $s0 = 0xFFFFFF00
206          "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
207          "and  $s0, $s0, $t3"     "\n\t"  // $s0 = $t3 and 0xFFFFFF00
208          "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
209          "andi $t3, $t3, 0xFF"    "\n\t"  // $t3 = $t3 and 0xFF
210          "or   $t3, $t3, $s0"     "\n\t"  // $t3 = $t3 or $s0
211          "sc   $t3, 0($t1)"       "\n\t"
212          "sw   $t3, 16($t0)"      "\n\t"  // save result
213          : /*out*/
214          : /*in*/ "r"(&block[0])
215          : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
216       );
217    } while (block[2] != 1);
218 #elif defined (_MIPSEB)
219    unsigned long block[3]
220       = { (unsigned long)p, (unsigned long)n << 56, 0x0 };
221    do {
222       __asm__ __volatile__(
223          "move  $t0, %0"          "\n\t"
224          "ld    $t1, 0($t0)"      "\n\t"  // p
225          "ld    $t2, 8($t0)"      "\n\t"  // n
226          "lld   $t3, 0($t1)"      "\n\t"
227          "daddu $t3, $t3, $t2"    "\n\t"
228          "scd   $t3, 0($t1)"      "\n\t"
229          "sd    $t3, 16($t0)"     "\n\t"
230          : /*out*/
231          : /*in*/ "r"(&block[0])
232          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
233       );
234    } while (block[2] != 1);
235 #endif
236 #else
237 # error "Unsupported arch"
238 #endif
239 }
240 
241 
atomic_add_16bit(short * p,int n)242 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
243 {
244 #if defined(VGA_x86)
245    unsigned long block[2];
246    block[0] = (unsigned long)p;
247    block[1] = n;
248    __asm__ __volatile__(
249       "movl 0(%%esi),%%eax"      "\n\t"
250       "movl 4(%%esi),%%ebx"      "\n\t"
251       "lock; addw %%bx,(%%eax)"  "\n"
252       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
253    );
254 #elif defined(VGA_amd64)
255    unsigned long block[2];
256    block[0] = (unsigned long)p;
257    block[1] = n;
258    __asm__ __volatile__(
259       "movq 0(%%rsi),%%rax"      "\n\t"
260       "movq 8(%%rsi),%%rbx"      "\n\t"
261       "lock; addw %%bx,(%%rax)"  "\n"
262       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
263    );
264 #elif defined(VGA_ppc32)
265    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
266       is 8-aligned -- guaranteed by caller. */
267    unsigned long success;
268    do {
269       __asm__ __volatile__(
270          "lwarx  15,0,%1"    "\n\t"
271          "add    15,15,%2"   "\n\t"
272          "stwcx. 15,0,%1"    "\n\t"
273          "mfcr   %0"         "\n\t"
274          "srwi   %0,%0,29"   "\n\t"
275          "andi.  %0,%0,1"    "\n"
276          : /*out*/"=b"(success)
277          : /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
278          : /*trash*/ "memory", "cc", "r15"
279       );
280    } while (success != 1);
281 #elif defined(VGA_ppc64be)
282    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
283       is 8-aligned -- guaranteed by caller. */
284    unsigned long success;
285    do {
286       __asm__ __volatile__(
287          "ldarx  15,0,%1"    "\n\t"
288          "add    15,15,%2"   "\n\t"
289          "stdcx. 15,0,%1"    "\n\t"
290          "mfcr   %0"         "\n\t"
291          "srwi   %0,%0,29"   "\n\t"
292          "andi.  %0,%0,1"    "\n"
293          : /*out*/"=b"(success)
294          : /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
295          : /*trash*/ "memory", "cc", "r15"
296       );
297    } while (success != 1);
298 #elif defined(VGA_ppc64le)
299    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
300       is 8-aligned -- guaranteed by caller. */
301    unsigned long success;
302    do {
303       __asm__ __volatile__(
304          "ldarx  15,0,%1"    "\n\t"
305          "add    15,15,%2"   "\n\t"
306          "stdcx. 15,0,%1"    "\n\t"
307          "mfcr   %0"         "\n\t"
308          "srwi   %0,%0,29"   "\n\t"
309          "andi.  %0,%0,1"    "\n"
310          : /*out*/"=b"(success)
311          : /*in*/ "b"(p), "b"(((unsigned long)n))
312          : /*trash*/ "memory", "cc", "r15"
313       );
314    } while (success != 1);
315 #elif defined(VGA_arm)
316    unsigned int block[3]
317       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
318    do {
319       __asm__ __volatile__(
320          "mov    r5, %0"         "\n\t"
321          "ldr    r9, [r5, #0]"   "\n\t" // p
322          "ldr    r10, [r5, #4]"  "\n\t" // n
323          "ldrexh r8, [r9]"       "\n\t"
324          "add    r8, r8, r10"    "\n\t"
325          "strexh r4, r8, [r9]"   "\n\t"
326          "str    r4, [r5, #8]"   "\n\t"
327          : /*out*/
328          : /*in*/ "r"(&block[0])
329          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
330       );
331    } while (block[2] != 0);
332 #elif defined(VGA_arm64)
333    unsigned long long int block[3]
334       = { (unsigned long long int)p, (unsigned long long int)n,
335           0xFFFFFFFFFFFFFFFFULL};
336    do {
337       __asm__ __volatile__(
338          "mov   x5, %0"         "\n\t"
339          "ldr   x9, [x5, #0]"   "\n\t" // p
340          "ldr   x10, [x5, #8]"  "\n\t" // n
341          "ldxrh w8, [x9]"       "\n\t"
342          "add   x8, x8, x10"    "\n\t"
343          "stxrh w4, w8, [x9]"    "\n\t"
344          "str   x4, [x5, #16]"   "\n\t"
345          : /*out*/
346          : /*in*/ "r"(&block[0])
347          : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
348       );
349    } while (block[2] != 0);
350 #elif defined(VGA_s390x)
351    int dummy;
352    __asm__ __volatile__(
353       "   l	0,%0\n\t"
354       "0: st	0,%1\n\t"
355       "   icm	1,3,%1\n\t"
356       "   ar	1,%2\n\t"
357       "   stcm  1,3,%1\n\t"
358       "   l     1,%1\n\t"
359       "   cs	0,1,%0\n\t"
360       "   jl    0b\n\t"
361       : "+m" (*p), "+m" (dummy)
362       : "d" (n)
363       : "cc", "memory", "0", "1");
364 #elif defined(VGA_mips32)
365    /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
366       exception that can cause this function to fail. */
367 #if defined (_MIPSEL)
368    unsigned int block[3]
369       = { (unsigned int)p, (unsigned int)n, 0x0 };
370    do {
371       __asm__ __volatile__(
372          "move $t0, %0"           "\n\t"
373          "lw   $t1, 0($t0)"       "\n\t"  // p
374          "lw   $t2, 4($t0)"       "\n\t"  // n
375          "andi $t2, $t2, 0xFFFF"  "\n\t"  // n = n and 0xFFFF
376          "li   $t4, 0xFFFF"       "\n\t"
377          "nor  $t4, $t4, $zero"   "\n\t"  // $t4 = 0xFFFF0000
378          "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
379          "and  $t4, $t4, $t3"     "\n\t"  // $t4 = $t3 and 0xFFFF0000
380          "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
381          "andi $t3, $t3, 0xFFFF"  "\n\t"  // $t3 = $t3 and 0xFFFF
382          "or   $t3, $t3, $t4"     "\n\t"  // $t3 = $t3 or $t4
383          "sc   $t3, 0($t1)"       "\n\t"
384          "sw   $t3, 8($t0)"       "\n\t"  // save result
385          : /*out*/
386          : /*in*/ "r"(&block[0])
387          : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
388       );
389    } while (block[2] != 1);
390 #elif defined (_MIPSEB)
391    unsigned int block[3]
392       = { (unsigned int)p, (unsigned int)n << 16, 0x0 };
393    do {
394       __asm__ __volatile__(
395          "move $t0, %0"          "\n\t"
396          "lw   $t1, 0($t0)"      "\n\t"  // p
397          "lw   $t2, 4($t0)"      "\n\t"  // n
398          "ll   $t3, 0($t1)"      "\n\t"
399          "addu $t3, $t3, $t2"    "\n\t"
400          "sc   $t3, 0($t1)"      "\n\t"
401          "sw   $t3, 8($t0)"      "\n\t"
402          : /*out*/
403          : /*in*/ "r"(&block[0])
404          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
405       );
406    } while (block[2] != 1);
407 #endif
408 #elif defined(VGA_mips64)
409    /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
410       exception that can cause this function to fail. */
411 #if defined (_MIPSEL)
412    unsigned long block[3]
413       = { (unsigned long)p, (unsigned long)n, 0x0ULL };
414    do {
415       __asm__ __volatile__(
416          "move $t0, %0"           "\n\t"
417          "ld   $t1, 0($t0)"       "\n\t"  // p
418          "ld   $t2, 8($t0)"       "\n\t"  // n
419          "andi $t2, $t2, 0xFFFF"  "\n\t"  // n = n and 0xFFFF
420          "li   $s0, 0xFFFF"       "\n\t"
421          "nor  $s0, $s0, $zero"   "\n\t"  // $s0= 0xFFFF0000
422          "ll   $t3, 0($t1)"       "\n\t"  // $t3 = old value
423          "and  $s0, $s0, $t3"     "\n\t"  // $s0 = $t3 and 0xFFFF0000
424          "addu $t3, $t3, $t2"     "\n\t"  // $t3 = $t3 + n
425          "andi $t3, $t3, 0xFFFF"  "\n\t"  // $t3 = $t3 and 0xFFFF
426          "or   $t3, $t3, $s0"     "\n\t"  // $t3 = $t3 or $s0
427          "sc   $t3, 0($t1)"       "\n\t"
428          "sw   $t3, 16($t0)"      "\n\t"  // save result
429          : /*out*/
430          : /*in*/ "r"(&block[0])
431          : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
432       );
433    } while (block[2] != 1);
434 #elif defined (_MIPSEB)
435    unsigned long block[3]
436       = { (unsigned long)p, (unsigned long)n << 48, 0x0 };
437    do {
438       __asm__ __volatile__(
439          "move  $t0, %0"          "\n\t"
440          "ld    $t1, 0($t0)"      "\n\t"  // p
441          "ld    $t2, 8($t0)"      "\n\t"  // n
442          "lld   $t3, 0($t1)"      "\n\t"
443          "daddu $t3, $t3, $t2"    "\n\t"
444          "scd   $t3, 0($t1)"      "\n\t"
445          "sd    $t3, 16($t0)"     "\n\t"
446          : /*out*/
447          : /*in*/ "r"(&block[0])
448          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
449       );
450    } while (block[2] != 1);
451 #endif
452 #else
453 # error "Unsupported arch"
454 #endif
455 }
456 
atomic_add_32bit(int * p,int n)457 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
458 {
459 #if defined(VGA_x86)
460    unsigned long block[2];
461    block[0] = (unsigned long)p;
462    block[1] = n;
463    __asm__ __volatile__(
464       "movl 0(%%esi),%%eax"       "\n\t"
465       "movl 4(%%esi),%%ebx"       "\n\t"
466       "lock; addl %%ebx,(%%eax)"  "\n"
467       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
468    );
469 #elif defined(VGA_amd64)
470    unsigned long block[2];
471    block[0] = (unsigned long)p;
472    block[1] = n;
473    __asm__ __volatile__(
474       "movq 0(%%rsi),%%rax"       "\n\t"
475       "movq 8(%%rsi),%%rbx"       "\n\t"
476       "lock; addl %%ebx,(%%rax)"  "\n"
477       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
478    );
479 #elif defined(VGA_ppc32)
480    unsigned long success;
481    do {
482       __asm__ __volatile__(
483          "lwarx  15,0,%1"    "\n\t"
484          "add    15,15,%2"   "\n\t"
485          "stwcx. 15,0,%1"    "\n\t"
486          "mfcr   %0"         "\n\t"
487          "srwi   %0,%0,29"   "\n\t"
488          "andi.  %0,%0,1"    "\n"
489          : /*out*/"=b"(success)
490          : /*in*/ "b"(p), "b"(n)
491          : /*trash*/ "memory", "cc", "r15"
492       );
493    } while (success != 1);
494 #elif defined(VGA_ppc64be)
495    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
496       is 8-aligned -- guaranteed by caller. */
497    unsigned long success;
498    do {
499       __asm__ __volatile__(
500          "ldarx  15,0,%1"    "\n\t"
501          "add    15,15,%2"   "\n\t"
502          "stdcx. 15,0,%1"    "\n\t"
503          "mfcr   %0"         "\n\t"
504          "srwi   %0,%0,29"   "\n\t"
505          "andi.  %0,%0,1"    "\n"
506          : /*out*/"=b"(success)
507          : /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
508          : /*trash*/ "memory", "cc", "r15"
509       );
510    } while (success != 1);
511 #elif defined(VGA_ppc64le)
512    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
513       is 8-aligned -- guaranteed by caller. */
514    unsigned long success;
515    do {
516       __asm__ __volatile__(
517          "ldarx  15,0,%1"    "\n\t"
518          "add    15,15,%2"   "\n\t"
519          "stdcx. 15,0,%1"    "\n\t"
520          "mfcr   %0"         "\n\t"
521          "srwi   %0,%0,29"   "\n\t"
522          "andi.  %0,%0,1"    "\n"
523          : /*out*/"=b"(success)
524          : /*in*/ "b"(p), "b"(((unsigned long)n))
525          : /*trash*/ "memory", "cc", "r15"
526       );
527    } while (success != 1);
528 #elif defined(VGA_arm)
529    unsigned int block[3]
530       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
531    do {
532       __asm__ __volatile__(
533          "mov   r5, %0"         "\n\t"
534          "ldr   r9, [r5, #0]"   "\n\t" // p
535          "ldr   r10, [r5, #4]"  "\n\t" // n
536          "ldrex r8, [r9]"       "\n\t"
537          "add   r8, r8, r10"    "\n\t"
538          "strex r4, r8, [r9]"   "\n\t"
539          "str   r4, [r5, #8]"   "\n\t"
540          : /*out*/
541          : /*in*/ "r"(&block[0])
542          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
543       );
544    } while (block[2] != 0);
545 #elif defined(VGA_arm64)
546    unsigned long long int block[3]
547       = { (unsigned long long int)p, (unsigned long long int)n,
548           0xFFFFFFFFFFFFFFFFULL};
549    do {
550       __asm__ __volatile__(
551          "mov   x5, %0"         "\n\t"
552          "ldr   x9, [x5, #0]"   "\n\t" // p
553          "ldr   x10, [x5, #8]"  "\n\t" // n
554          "ldxr  w8, [x9]"       "\n\t"
555          "add   x8, x8, x10"    "\n\t"
556          "stxr  w4, w8, [x9]"    "\n\t"
557          "str   x4, [x5, #16]"   "\n\t"
558          : /*out*/
559          : /*in*/ "r"(&block[0])
560          : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
561       );
562    } while (block[2] != 0);
563 #elif defined(VGA_s390x)
564    __asm__ __volatile__(
565       "   l	0,%0\n\t"
566       "0: lr	1,0\n\t"
567       "   ar	1,%1\n\t"
568       "   cs	0,1,%0\n\t"
569       "   jl    0b\n\t"
570       : "+m" (*p)
571       : "d" (n)
572       : "cc", "memory", "0", "1");
573 #elif defined(VGA_mips32)
574    unsigned int block[3]
575       = { (unsigned int)p, (unsigned int)n, 0x0 };
576    do {
577       __asm__ __volatile__(
578          "move $t0, %0"        "\n\t"
579          "lw   $t1, 0($t0)"    "\n\t"  // p
580          "lw   $t2, 4($t0)"    "\n\t"  // n
581          "ll   $t3, 0($t1)"    "\n\t"
582          "addu $t3, $t3, $t2"  "\n\t"
583          "sc   $t3, 0($t1)"    "\n\t"
584          "sw   $t3, 8($t0)"    "\n\t"
585          : /*out*/
586          : /*in*/ "r"(&block[0])
587          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
588       );
589    } while (block[2] != 1);
590 #elif defined(VGA_mips64)
591    unsigned long block[3]
592       = { (unsigned long)p, (unsigned long)n, 0x0ULL };
593    do {
594       __asm__ __volatile__(
595          "move  $t0, %0"        "\n\t"
596          "ld    $t1, 0($t0)"    "\n\t"  // p
597          "ld    $t2, 8($t0)"    "\n\t"  // n
598          "ll    $t3, 0($t1)"    "\n\t"
599          "addu  $t3, $t3, $t2"  "\n\t"
600          "sc    $t3, 0($t1)"    "\n\t"
601          "sd    $t3, 16($t0)"   "\n\t"
602          : /*out*/
603          : /*in*/ "r"(&block[0])
604          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
605       );
606    } while (block[2] != 1);
607 #else
608 # error "Unsupported arch"
609 #endif
610 }
611 
atomic_add_64bit(long long int * p,int n)612 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
613 {
614 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32)
615    /* do nothing; is not supported */
616 #elif defined(VGA_amd64)
617    // this is a bit subtle.  It relies on the fact that, on a 64-bit platform,
618    // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
619    unsigned long long int block[2];
620    block[0] = (unsigned long long int)(unsigned long)p;
621    block[1] = n;
622    __asm__ __volatile__(
623       "movq 0(%%rsi),%%rax"      "\n\t"
624       "movq 8(%%rsi),%%rbx"      "\n\t"
625       "lock; addq %%rbx,(%%rax)" "\n"
626       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
627    );
628 #elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
629    unsigned long success;
630    do {
631       __asm__ __volatile__(
632          "ldarx  15,0,%1"    "\n\t"
633          "add    15,15,%2"   "\n\t"
634          "stdcx. 15,0,%1"    "\n\t"
635          "mfcr   %0"         "\n\t"
636          "srwi   %0,%0,29"   "\n\t"
637          "andi.  %0,%0,1"    "\n"
638          : /*out*/"=b"(success)
639          : /*in*/ "b"(p), "b"(n)
640          : /*trash*/ "memory", "cc", "r15"
641       );
642    } while (success != 1);
643 #elif defined(VGA_arm)
644    unsigned long long int block[3]
645      = { (unsigned long long int)(unsigned long)p,
646          (unsigned long long int)n,
647          0xFFFFFFFFFFFFFFFFULL };
648    do {
649       __asm__ __volatile__(
650          "mov    r5, %0"             "\n\t"
651          "ldr    r8,     [r5, #0]"   "\n\t" // p
652          "ldrd   r2, r3, [r5, #8]"   "\n\t" // n
653          "ldrexd r0, r1, [r8]"       "\n\t"
654          "adds   r2, r2, r0"         "\n\t"
655          "adc    r3, r3, r1"         "\n\t"
656          "strexd r1, r2, r3, [r8]"   "\n\t"
657          "str    r1, [r5, #16]"      "\n\t"
658          : /*out*/
659          : /*in*/ "r"(&block[0])
660          : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3"
661       );
662    } while (block[2] != 0xFFFFFFFF00000000ULL);
663 #elif defined(VGA_arm64)
664    unsigned long long int block[3]
665       = { (unsigned long long int)p, (unsigned long long int)n,
666           0xFFFFFFFFFFFFFFFFULL};
667    do {
668       __asm__ __volatile__(
669          "mov   x5, %0"         "\n\t"
670          "ldr   x9, [x5, #0]"   "\n\t" // p
671          "ldr   x10, [x5, #8]"  "\n\t" // n
672          "ldxr  x8, [x9]"       "\n\t"
673          "add   x8, x8, x10"    "\n\t"
674          "stxr  w4, x8, [x9]"   "\n\t"
675          "str   x4, [x5, #16]"   "\n\t"
676          : /*out*/
677          : /*in*/ "r"(&block[0])
678          : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
679       );
680    } while (block[2] != 0);
681 #elif defined(VGA_s390x)
682    __asm__ __volatile__(
683       "   lg	0,%0\n\t"
684       "0: lgr	1,0\n\t"
685       "   agr	1,%1\n\t"
686       "   csg	0,1,%0\n\t"
687       "   jl    0b\n\t"
688       : "+m" (*p)
689       : "d" (n)
690       : "cc", "memory", "0", "1");
691 #elif defined(VGA_mips64)
692    unsigned long block[3]
693       = { (unsigned long)p, (unsigned long)n, 0x0ULL };
694    do {
695       __asm__ __volatile__(
696          "move  $t0, %0"        "\n\t"
697          "ld    $t1, 0($t0)"    "\n\t" // p
698          "ld    $t2, 8($t0)"    "\n\t" // n
699          "lld   $t3, 0($t1)"    "\n\t"
700          "daddu $t3, $t3, $t2"  "\n\t"
701          "scd   $t3, 0($t1)"    "\n\t"
702          "sd    $t3, 16($t0)"   "\n\t"
703          : /*out*/
704          : /*in*/ "r"(&block[0])
705          : /*trash*/ "memory", "t0", "t1", "t2", "t3"
706       );
707    } while (block[2] != 1);
708 #else
709 # error "Unsupported arch"
710 #endif
711 }
712 
main(int argc,char ** argv)713 int main ( int argc, char** argv )
714 {
715    int    i, status;
716    char*  page;
717    char*  p8;
718    short* p16;
719    int*   p32;
720    long long int* p64;
721    pid_t  child, p2;
722 
723    printf("parent, pre-fork\n");
724 
725    page = mmap( 0, sysconf(_SC_PAGESIZE),
726                    PROT_READ|PROT_WRITE,
727                    MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
728    if (page == MAP_FAILED) {
729       perror("mmap failed");
730       exit(1);
731    }
732 
733    p8  = (char*)(page+0);
734    p16 = (short*)(page+256);
735    p32 = (int*)(page+512);
736    p64 = (long long int*)(page+768);
737 
738    assert( IS_8_ALIGNED(p8) );
739    assert( IS_8_ALIGNED(p16) );
740    assert( IS_8_ALIGNED(p32) );
741    assert( IS_8_ALIGNED(p64) );
742 
743    memset(page, 0, 1024);
744 
745    *p8  = 0;
746    *p16 = 0;
747    *p32 = 0;
748    *p64 = 0;
749 
750    child = fork();
751    if (child == -1) {
752       perror("fork() failed\n");
753       return 1;
754    }
755 
756    if (child == 0) {
757       /* --- CHILD --- */
758       printf("child\n");
759       for (i = 0; i < NNN; i++) {
760          atomic_add_8bit(p8, 1);
761          atomic_add_16bit(p16, 1);
762          atomic_add_32bit(p32, 1);
763          atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
764       }
765       return 1;
766       /* NOTREACHED */
767 
768    }
769 
770    /* --- PARENT --- */
771 
772    printf("parent\n");
773 
774    for (i = 0; i < NNN; i++) {
775       atomic_add_8bit(p8, 1);
776       atomic_add_16bit(p16, 1);
777       atomic_add_32bit(p32, 1);
778       atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
779    }
780 
781    p2 = waitpid(child, &status, 0);
782    assert(p2 == child);
783 
784    /* assert that child finished normally */
785    assert(WIFEXITED(status));
786 
787    printf("FINAL VALUES:  8 bit %d,  16 bit %d,  32 bit %d,  64 bit %lld\n",
788           (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
789 
790    if (-74 == (int)(*(signed char*)p8)
791        && 32694 == (int)(*p16)
792        && 6913974 == *p32
793        && (0LL == *p64 || 682858642110LL == *p64)) {
794       printf("PASS\n");
795    } else {
796       printf("FAIL -- see source code for expected values\n");
797    }
798 
799    printf("parent exits\n");
800 
801    return 0;
802 }
803