• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* This is an example of a program which does atomic memory operations
3    between two processes which share a page.  Valgrind 3.4.1 and
4    earlier produce incorrect answers because it does not preserve
5    atomicity of the relevant instructions in the generated code; but
6    the post-DCAS-merge versions of Valgrind do behave correctly. */
7 
8 /* On ARM, this can be compiled into either ARM or Thumb code, so as
9    to test both A and T encodings of LDREX/STREX et al.  Also on ARM,
10    it tests doubleword atomics (LDREXD, STREXD) which I don't think it
11    does on any other platform. */
12 
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <assert.h>
17 #include <unistd.h>
18 #include <sys/wait.h>
19 #include "tests/sys_mman.h"
20 
21 #define NNN 3456987
22 
23 #define IS_8_ALIGNED(_ptr)   (0 == (((unsigned long)(_ptr)) & 7))
24 
25 
atomic_add_8bit(char * p,int n)26 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
27 {
28 #if defined(VGA_x86)
29    unsigned long block[2];
30    block[0] = (unsigned long)p;
31    block[1] = n;
32    __asm__ __volatile__(
33       "movl 0(%%esi),%%eax"      "\n\t"
34       "movl 4(%%esi),%%ebx"      "\n\t"
35       "lock; addb %%bl,(%%eax)"  "\n"
36       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
37    );
38 #elif defined(VGA_amd64)
39    unsigned long block[2];
40    block[0] = (unsigned long)p;
41    block[1] = n;
42    __asm__ __volatile__(
43       "movq 0(%%rsi),%%rax"      "\n\t"
44       "movq 8(%%rsi),%%rbx"      "\n\t"
45       "lock; addb %%bl,(%%rax)"  "\n"
46       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
47    );
48 #elif defined(VGA_ppc32)
49    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
50       is 4-aligned -- guaranteed by caller. */
51    unsigned long success;
52    do {
53       __asm__ __volatile__(
54          "lwarx  15,0,%1"    "\n\t"
55          "add    15,15,%2"   "\n\t"
56          "stwcx. 15,0,%1"    "\n\t"
57          "mfcr   %0"         "\n\t"
58          "srwi   %0,%0,29"   "\n\t"
59          "andi.  %0,%0,1"    "\n"
60          : /*out*/"=b"(success)
61          : /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
62          : /*trash*/ "memory", "cc", "r15"
63       );
64    } while (success != 1);
65 #elif defined(VGA_ppc64)
66    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
67       is 8-aligned -- guaranteed by caller. */
68    unsigned long success;
69    do {
70       __asm__ __volatile__(
71          "ldarx  15,0,%1"    "\n\t"
72          "add    15,15,%2"   "\n\t"
73          "stdcx. 15,0,%1"    "\n\t"
74          "mfcr   %0"         "\n\t"
75          "srwi   %0,%0,29"   "\n\t"
76          "andi.  %0,%0,1"    "\n"
77          : /*out*/"=b"(success)
78          : /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
79          : /*trash*/ "memory", "cc", "r15"
80       );
81    } while (success != 1);
82 #elif defined(VGA_arm)
83    unsigned int block[3]
84       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
85    do {
86       __asm__ __volatile__(
87          "mov    r5, %0"         "\n\t"
88          "ldr    r9, [r5, #0]"   "\n\t" // p
89          "ldr    r10, [r5, #4]"  "\n\t" // n
90          "ldrexb r8, [r9]"       "\n\t"
91          "add    r8, r8, r10"    "\n\t"
92          "strexb r4, r8, [r9]"   "\n\t"
93          "str    r4, [r5, #8]"   "\n\t"
94          : /*out*/
95          : /*in*/ "r"(&block[0])
96          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
97       );
98    } while (block[2] != 0);
99 #elif defined(VGA_s390x)
100    int dummy;
101    __asm__ __volatile__(
102       "   l	0,%0\n\t"
103       "0: st	0,%1\n\t"
104       "   icm	1,1,%1\n\t"
105       "   ar	1,%2\n\t"
106       "   stcm  1,1,%1\n\t"
107       "   l     1,%1\n\t"
108       "   cs	0,1,%0\n\t"
109       "   jl    0b\n\t"
110       : "+m" (*p), "+m" (dummy)
111       : "d" (n)
112       : "cc", "memory", "0", "1");
113 #elif defined(VGA_mips32)
114 #if defined (_MIPSEL)
115    unsigned int block[3]
116       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
117    do {
118       __asm__ __volatile__(
119          "move   $t0, %0"         "\n\t"
120          "lw   $t1, 0($t0)"       "\n\t" // p
121          "lw   $t2, 4($t0)"       "\n\t" // n
122          "ll   $t3, 0($t1)"       "\n\t"
123          "addu   $t3, $t3, $t2"   "\n\t"
124          "andi   $t3, $t3, 0xFF"  "\n\t"
125          "sc   $t3, 0($t1)"       "\n\t"
126          "sw $t3, 8($t0)"         "\n\t"
127          : /*out*/
128          : /*in*/ "r"(&block[0])
129          : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
130       );
131    } while (block[2] != 1);
132 #elif defined (_MIPSEB)
133    unsigned int block[3]
134       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
135    do {
136       __asm__ __volatile__(
137          "move   $t0, %0"               "\n\t"
138          "lw   $t1, 0($t0)"             "\n\t" // p
139          "lw   $t2, 4($t0)"             "\n\t" // n
140          "li   $t4, 0x000000FF"         "\n\t"
141          "ll   $t3, 0($t1)"             "\n\t"
142          "addu $t3, $t3, $t2"           "\n\t"
143          "and  $t3, $t3, $t4"           "\n\t"
144          "wsbh $t4, $t3"                "\n\t"
145          "rotr $t4, $t4, 16"            "\n\t"
146          "or   $t3, $t4, $t3"           "\n\t"
147          "sc   $t3, 0($t1)"             "\n\t"
148          "sw $t3, 8($t0)"               "\n\t"
149          : /*out*/
150          : /*in*/ "r"(&block[0])
151          : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3", "t4"
152       );
153    } while (block[2] != 1);
154 #endif
155 #else
156 # error "Unsupported arch"
157 #endif
158 }
159 
160 
atomic_add_16bit(short * p,int n)161 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
162 {
163 #if defined(VGA_x86)
164    unsigned long block[2];
165    block[0] = (unsigned long)p;
166    block[1] = n;
167    __asm__ __volatile__(
168       "movl 0(%%esi),%%eax"      "\n\t"
169       "movl 4(%%esi),%%ebx"      "\n\t"
170       "lock; addw %%bx,(%%eax)"  "\n"
171       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
172    );
173 #elif defined(VGA_amd64)
174    unsigned long block[2];
175    block[0] = (unsigned long)p;
176    block[1] = n;
177    __asm__ __volatile__(
178       "movq 0(%%rsi),%%rax"      "\n\t"
179       "movq 8(%%rsi),%%rbx"      "\n\t"
180       "lock; addw %%bx,(%%rax)"  "\n"
181       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
182    );
183 #elif defined(VGA_ppc32)
184    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
185       is 8-aligned -- guaranteed by caller. */
186    unsigned long success;
187    do {
188       __asm__ __volatile__(
189          "lwarx  15,0,%1"    "\n\t"
190          "add    15,15,%2"   "\n\t"
191          "stwcx. 15,0,%1"    "\n\t"
192          "mfcr   %0"         "\n\t"
193          "srwi   %0,%0,29"   "\n\t"
194          "andi.  %0,%0,1"    "\n"
195          : /*out*/"=b"(success)
196          : /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
197          : /*trash*/ "memory", "cc", "r15"
198       );
199    } while (success != 1);
200 #elif defined(VGA_ppc64)
201    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
202       is 8-aligned -- guaranteed by caller. */
203    unsigned long success;
204    do {
205       __asm__ __volatile__(
206          "ldarx  15,0,%1"    "\n\t"
207          "add    15,15,%2"   "\n\t"
208          "stdcx. 15,0,%1"    "\n\t"
209          "mfcr   %0"         "\n\t"
210          "srwi   %0,%0,29"   "\n\t"
211          "andi.  %0,%0,1"    "\n"
212          : /*out*/"=b"(success)
213          : /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
214          : /*trash*/ "memory", "cc", "r15"
215       );
216    } while (success != 1);
217 #elif defined(VGA_arm)
218    unsigned int block[3]
219       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
220    do {
221       __asm__ __volatile__(
222          "mov    r5, %0"         "\n\t"
223          "ldr    r9, [r5, #0]"   "\n\t" // p
224          "ldr    r10, [r5, #4]"  "\n\t" // n
225          "ldrexh r8, [r9]"       "\n\t"
226          "add    r8, r8, r10"    "\n\t"
227          "strexh r4, r8, [r9]"   "\n\t"
228          "str    r4, [r5, #8]"   "\n\t"
229          : /*out*/
230          : /*in*/ "r"(&block[0])
231          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
232       );
233    } while (block[2] != 0);
234 #elif defined(VGA_s390x)
235    int dummy;
236    __asm__ __volatile__(
237       "   l	0,%0\n\t"
238       "0: st	0,%1\n\t"
239       "   icm	1,3,%1\n\t"
240       "   ar	1,%2\n\t"
241       "   stcm  1,3,%1\n\t"
242       "   l     1,%1\n\t"
243       "   cs	0,1,%0\n\t"
244       "   jl    0b\n\t"
245       : "+m" (*p), "+m" (dummy)
246       : "d" (n)
247       : "cc", "memory", "0", "1");
248 #elif defined(VGA_mips32)
249 #if defined (_MIPSEL)
250    unsigned int block[3]
251       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
252    do {
253       __asm__ __volatile__(
254          "move   $t0, %0"         "\n\t"
255          "lw   $t1, 0($t0)"       "\n\t" // p
256          "lw   $t2, 4($t0)"       "\n\t" // n
257          "ll   $t3, 0($t1)"       "\n\t"
258          "addu   $t3, $t3, $t2"   "\n\t"
259          "andi   $t3, $t3, 0xFFFF"  "\n\t"
260          "sc   $t3, 0($t1)"       "\n\t"
261          "sw $t3, 8($t0)"         "\n\t"
262          : /*out*/
263          : /*in*/ "r"(&block[0])
264          : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
265       );
266    } while (block[2] != 1);
267 #elif defined (_MIPSEB)
268    unsigned int block[3]
269       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
270    do {
271       __asm__ __volatile__(
272          "move   $t0, %0"         "\n\t"
273          "lw   $t1, 0($t0)"       "\n\t" // p
274          "li   $t2, 32694"        "\n\t" // n
275          "li   $t3, 0x1"          "\n\t"
276          "sll  $t2, $t2, 16"      "\n\t"
277          "sw   $t2, 0($t1)"       "\n\t"
278          "sw $t3, 8($t0)"         "\n\t"
279          : /*out*/
280          : /*in*/ "r"(&block[0])
281          : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
282       );
283    } while (block[2] != 1);
284 #endif
285 #else
286 # error "Unsupported arch"
287 #endif
288 }
289 
atomic_add_32bit(int * p,int n)290 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
291 {
292 #if defined(VGA_x86)
293    unsigned long block[2];
294    block[0] = (unsigned long)p;
295    block[1] = n;
296    __asm__ __volatile__(
297       "movl 0(%%esi),%%eax"       "\n\t"
298       "movl 4(%%esi),%%ebx"       "\n\t"
299       "lock; addl %%ebx,(%%eax)"  "\n"
300       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
301    );
302 #elif defined(VGA_amd64)
303    unsigned long block[2];
304    block[0] = (unsigned long)p;
305    block[1] = n;
306    __asm__ __volatile__(
307       "movq 0(%%rsi),%%rax"       "\n\t"
308       "movq 8(%%rsi),%%rbx"       "\n\t"
309       "lock; addl %%ebx,(%%rax)"  "\n"
310       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
311    );
312 #elif defined(VGA_ppc32)
313    unsigned long success;
314    do {
315       __asm__ __volatile__(
316          "lwarx  15,0,%1"    "\n\t"
317          "add    15,15,%2"   "\n\t"
318          "stwcx. 15,0,%1"    "\n\t"
319          "mfcr   %0"         "\n\t"
320          "srwi   %0,%0,29"   "\n\t"
321          "andi.  %0,%0,1"    "\n"
322          : /*out*/"=b"(success)
323          : /*in*/ "b"(p), "b"(n)
324          : /*trash*/ "memory", "cc", "r15"
325       );
326    } while (success != 1);
327 #elif defined(VGA_ppc64)
328    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
329       is 8-aligned -- guaranteed by caller. */
330    unsigned long success;
331    do {
332       __asm__ __volatile__(
333          "ldarx  15,0,%1"    "\n\t"
334          "add    15,15,%2"   "\n\t"
335          "stdcx. 15,0,%1"    "\n\t"
336          "mfcr   %0"         "\n\t"
337          "srwi   %0,%0,29"   "\n\t"
338          "andi.  %0,%0,1"    "\n"
339          : /*out*/"=b"(success)
340          : /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
341          : /*trash*/ "memory", "cc", "r15"
342       );
343    } while (success != 1);
344 #elif defined(VGA_arm)
345    unsigned int block[3]
346       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
347    do {
348       __asm__ __volatile__(
349          "mov   r5, %0"         "\n\t"
350          "ldr   r9, [r5, #0]"   "\n\t" // p
351          "ldr   r10, [r5, #4]"  "\n\t" // n
352          "ldrex r8, [r9]"       "\n\t"
353          "add   r8, r8, r10"    "\n\t"
354          "strex r4, r8, [r9]"   "\n\t"
355          "str   r4, [r5, #8]"   "\n\t"
356          : /*out*/
357          : /*in*/ "r"(&block[0])
358          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
359       );
360    } while (block[2] != 0);
361 #elif defined(VGA_s390x)
362    __asm__ __volatile__(
363       "   l	0,%0\n\t"
364       "0: lr	1,0\n\t"
365       "   ar	1,%1\n\t"
366       "   cs	0,1,%0\n\t"
367       "   jl    0b\n\t"
368       : "+m" (*p)
369       : "d" (n)
370       : "cc", "memory", "0", "1");
371 #elif defined(VGA_mips32)
372    unsigned int block[3]
373       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
374    do {
375       __asm__ __volatile__(
376          "move   $t0, %0"         "\n\t"
377          "lw   $t1, 0($t0)"       "\n\t" // p
378          "lw   $t2, 4($t0)"       "\n\t" // n
379          "ll   $t3, 0($t1)"       "\n\t"
380          "addu   $t3, $t3, $t2"   "\n\t"
381          "sc   $t3, 0($t1)"       "\n\t"
382          "sw $t3, 8($t0)"         "\n\t"
383          : /*out*/
384          : /*in*/ "r"(&block[0])
385          : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
386       );
387    } while (block[2] != 1);
388 #else
389 # error "Unsupported arch"
390 #endif
391 }
392 
atomic_add_64bit(long long int * p,int n)393 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
394 {
395 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32)
396    /* do nothing; is not supported */
397 #elif defined(VGA_amd64)
398    // this is a bit subtle.  It relies on the fact that, on a 64-bit platform,
399    // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
400    unsigned long long int block[2];
401    block[0] = (unsigned long long int)(unsigned long)p;
402    block[1] = n;
403    __asm__ __volatile__(
404       "movq 0(%%rsi),%%rax"      "\n\t"
405       "movq 8(%%rsi),%%rbx"      "\n\t"
406       "lock; addq %%rbx,(%%rax)" "\n"
407       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
408    );
409 #elif defined(VGA_ppc64)
410    unsigned long success;
411    do {
412       __asm__ __volatile__(
413          "ldarx  15,0,%1"    "\n\t"
414          "add    15,15,%2"   "\n\t"
415          "stdcx. 15,0,%1"    "\n\t"
416          "mfcr   %0"         "\n\t"
417          "srwi   %0,%0,29"   "\n\t"
418          "andi.  %0,%0,1"    "\n"
419          : /*out*/"=b"(success)
420          : /*in*/ "b"(p), "b"(n)
421          : /*trash*/ "memory", "cc", "r15"
422       );
423    } while (success != 1);
424 #elif defined(VGA_arm)
425    unsigned long long int block[3]
426      = { (unsigned long long int)(unsigned long)p,
427          (unsigned long long int)n,
428          0xFFFFFFFFFFFFFFFFULL };
429    do {
430       __asm__ __volatile__(
431          "mov    r5, %0"             "\n\t"
432          "ldr    r8,     [r5, #0]"   "\n\t" // p
433          "ldrd   r2, r3, [r5, #8]"   "\n\t" // n
434          "ldrexd r0, r1, [r8]"       "\n\t"
435          "adds   r2, r2, r0"         "\n\t"
436          "adc    r3, r3, r1"         "\n\t"
437          "strexd r1, r2, r3, [r8]"   "\n\t"
438          "str    r1, [r5, #16]"      "\n\t"
439          : /*out*/
440          : /*in*/ "r"(&block[0])
441          : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3"
442       );
443    } while (block[2] != 0xFFFFFFFF00000000ULL);
444 #elif defined(VGA_s390x)
445    __asm__ __volatile__(
446       "   lg	0,%0\n\t"
447       "0: lgr	1,0\n\t"
448       "   agr	1,%1\n\t"
449       "   csg	0,1,%0\n\t"
450       "   jl    0b\n\t"
451       : "+m" (*p)
452       : "d" (n)
453       : "cc", "memory", "0", "1");
454 #else
455 # error "Unsupported arch"
456 #endif
457 }
458 
main(int argc,char ** argv)459 int main ( int argc, char** argv )
460 {
461    int    i, status;
462    char*  page;
463    char*  p8;
464    short* p16;
465    int*   p32;
466    long long int* p64;
467    pid_t  child, p2;
468 
469    printf("parent, pre-fork\n");
470 
471    page = mmap( 0, sysconf(_SC_PAGESIZE),
472                    PROT_READ|PROT_WRITE,
473                    MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
474    if (page == MAP_FAILED) {
475       perror("mmap failed");
476       exit(1);
477    }
478 
479    p8  = (char*)(page+0);
480    p16 = (short*)(page+256);
481    p32 = (int*)(page+512);
482    p64 = (long long int*)(page+768);
483 
484    assert( IS_8_ALIGNED(p8) );
485    assert( IS_8_ALIGNED(p16) );
486    assert( IS_8_ALIGNED(p32) );
487    assert( IS_8_ALIGNED(p64) );
488 
489    memset(page, 0, 1024);
490 
491    *p8  = 0;
492    *p16 = 0;
493    *p32 = 0;
494    *p64 = 0;
495 
496    child = fork();
497    if (child == -1) {
498       perror("fork() failed\n");
499       return 1;
500    }
501 
502    if (child == 0) {
503       /* --- CHILD --- */
504       printf("child\n");
505       for (i = 0; i < NNN; i++) {
506          atomic_add_8bit(p8, 1);
507          atomic_add_16bit(p16, 1);
508          atomic_add_32bit(p32, 1);
509          atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
510       }
511       return 1;
512       /* NOTREACHED */
513 
514    }
515 
516    /* --- PARENT --- */
517 
518    printf("parent\n");
519 
520    for (i = 0; i < NNN; i++) {
521       atomic_add_8bit(p8, 1);
522       atomic_add_16bit(p16, 1);
523       atomic_add_32bit(p32, 1);
524       atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
525    }
526 
527    p2 = waitpid(child, &status, 0);
528    assert(p2 == child);
529 
530    /* assert that child finished normally */
531    assert(WIFEXITED(status));
532 
533    printf("FINAL VALUES:  8 bit %d,  16 bit %d,  32 bit %d,  64 bit %lld\n",
534           (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
535 
536    if (-74 == (int)(*(signed char*)p8)
537        && 32694 == (int)(*p16)
538        && 6913974 == *p32
539        && (0LL == *p64 || 682858642110LL == *p64)) {
540       printf("PASS\n");
541    } else {
542       printf("FAIL -- see source code for expected values\n");
543    }
544 
545    printf("parent exits\n");
546 
547    return 0;
548 }
549