1
2 /* This is an example of a program which does atomic memory operations
3 between two processes which share a page. Valgrind 3.4.1 and
4 earlier produce incorrect answers because it does not preserve
5 atomicity of the relevant instructions in the generated code; but
6 the post-DCAS-merge versions of Valgrind do behave correctly. */
7
8 /* On ARM, this can be compiled into either ARM or Thumb code, so as
9 to test both A and T encodings of LDREX/STREX et al. Also on ARM,
10 it tests doubleword atomics (LDREXD, STREXD) which I don't think it
11 does on any other platform. */
12
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <assert.h>
17 #include <unistd.h>
18 #include <sys/wait.h>
19 #include "tests/sys_mman.h"
20
21 #define NNN 3456987
22
23 #define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7))
24
25
atomic_add_8bit(char * p,int n)26 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
27 {
28 #if defined(VGA_x86)
29 unsigned long block[2];
30 block[0] = (unsigned long)p;
31 block[1] = n;
32 __asm__ __volatile__(
33 "movl 0(%%esi),%%eax" "\n\t"
34 "movl 4(%%esi),%%ebx" "\n\t"
35 "lock; addb %%bl,(%%eax)" "\n"
36 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
37 );
38 #elif defined(VGA_amd64)
39 unsigned long block[2];
40 block[0] = (unsigned long)p;
41 block[1] = n;
42 __asm__ __volatile__(
43 "movq 0(%%rsi),%%rax" "\n\t"
44 "movq 8(%%rsi),%%rbx" "\n\t"
45 "lock; addb %%bl,(%%rax)" "\n"
46 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
47 );
48 #elif defined(VGA_ppc32)
49 /* Nasty hack. Does correctly atomically do *p += n, but only if p
50 is 4-aligned -- guaranteed by caller. */
51 unsigned long success;
52 do {
53 __asm__ __volatile__(
54 "lwarx 15,0,%1" "\n\t"
55 "add 15,15,%2" "\n\t"
56 "stwcx. 15,0,%1" "\n\t"
57 "mfcr %0" "\n\t"
58 "srwi %0,%0,29" "\n\t"
59 "andi. %0,%0,1" "\n"
60 : /*out*/"=b"(success)
61 : /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
62 : /*trash*/ "memory", "cc", "r15"
63 );
64 } while (success != 1);
65 #elif defined(VGA_ppc64)
66 /* Nasty hack. Does correctly atomically do *p += n, but only if p
67 is 8-aligned -- guaranteed by caller. */
68 unsigned long success;
69 do {
70 __asm__ __volatile__(
71 "ldarx 15,0,%1" "\n\t"
72 "add 15,15,%2" "\n\t"
73 "stdcx. 15,0,%1" "\n\t"
74 "mfcr %0" "\n\t"
75 "srwi %0,%0,29" "\n\t"
76 "andi. %0,%0,1" "\n"
77 : /*out*/"=b"(success)
78 : /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
79 : /*trash*/ "memory", "cc", "r15"
80 );
81 } while (success != 1);
82 #elif defined(VGA_arm)
83 unsigned int block[3]
84 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
85 do {
86 __asm__ __volatile__(
87 "mov r5, %0" "\n\t"
88 "ldr r9, [r5, #0]" "\n\t" // p
89 "ldr r10, [r5, #4]" "\n\t" // n
90 "ldrexb r8, [r9]" "\n\t"
91 "add r8, r8, r10" "\n\t"
92 "strexb r4, r8, [r9]" "\n\t"
93 "str r4, [r5, #8]" "\n\t"
94 : /*out*/
95 : /*in*/ "r"(&block[0])
96 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
97 );
98 } while (block[2] != 0);
99 #elif defined(VGA_s390x)
100 int dummy;
101 __asm__ __volatile__(
102 " l 0,%0\n\t"
103 "0: st 0,%1\n\t"
104 " icm 1,1,%1\n\t"
105 " ar 1,%2\n\t"
106 " stcm 1,1,%1\n\t"
107 " l 1,%1\n\t"
108 " cs 0,1,%0\n\t"
109 " jl 0b\n\t"
110 : "+m" (*p), "+m" (dummy)
111 : "d" (n)
112 : "cc", "memory", "0", "1");
113 #elif defined(VGA_mips32)
114 #if defined (_MIPSEL)
115 unsigned int block[3]
116 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
117 do {
118 __asm__ __volatile__(
119 "move $t0, %0" "\n\t"
120 "lw $t1, 0($t0)" "\n\t" // p
121 "lw $t2, 4($t0)" "\n\t" // n
122 "ll $t3, 0($t1)" "\n\t"
123 "addu $t3, $t3, $t2" "\n\t"
124 "andi $t3, $t3, 0xFF" "\n\t"
125 "sc $t3, 0($t1)" "\n\t"
126 "sw $t3, 8($t0)" "\n\t"
127 : /*out*/
128 : /*in*/ "r"(&block[0])
129 : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
130 );
131 } while (block[2] != 1);
132 #elif defined (_MIPSEB)
133 unsigned int block[3]
134 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
135 do {
136 __asm__ __volatile__(
137 "move $t0, %0" "\n\t"
138 "lw $t1, 0($t0)" "\n\t" // p
139 "lw $t2, 4($t0)" "\n\t" // n
140 "li $t4, 0x000000FF" "\n\t"
141 "ll $t3, 0($t1)" "\n\t"
142 "addu $t3, $t3, $t2" "\n\t"
143 "and $t3, $t3, $t4" "\n\t"
144 "wsbh $t4, $t3" "\n\t"
145 "rotr $t4, $t4, 16" "\n\t"
146 "or $t3, $t4, $t3" "\n\t"
147 "sc $t3, 0($t1)" "\n\t"
148 "sw $t3, 8($t0)" "\n\t"
149 : /*out*/
150 : /*in*/ "r"(&block[0])
151 : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3", "t4"
152 );
153 } while (block[2] != 1);
154 #endif
155 #else
156 # error "Unsupported arch"
157 #endif
158 }
159
160
atomic_add_16bit(short * p,int n)161 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
162 {
163 #if defined(VGA_x86)
164 unsigned long block[2];
165 block[0] = (unsigned long)p;
166 block[1] = n;
167 __asm__ __volatile__(
168 "movl 0(%%esi),%%eax" "\n\t"
169 "movl 4(%%esi),%%ebx" "\n\t"
170 "lock; addw %%bx,(%%eax)" "\n"
171 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
172 );
173 #elif defined(VGA_amd64)
174 unsigned long block[2];
175 block[0] = (unsigned long)p;
176 block[1] = n;
177 __asm__ __volatile__(
178 "movq 0(%%rsi),%%rax" "\n\t"
179 "movq 8(%%rsi),%%rbx" "\n\t"
180 "lock; addw %%bx,(%%rax)" "\n"
181 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
182 );
183 #elif defined(VGA_ppc32)
184 /* Nasty hack. Does correctly atomically do *p += n, but only if p
185 is 8-aligned -- guaranteed by caller. */
186 unsigned long success;
187 do {
188 __asm__ __volatile__(
189 "lwarx 15,0,%1" "\n\t"
190 "add 15,15,%2" "\n\t"
191 "stwcx. 15,0,%1" "\n\t"
192 "mfcr %0" "\n\t"
193 "srwi %0,%0,29" "\n\t"
194 "andi. %0,%0,1" "\n"
195 : /*out*/"=b"(success)
196 : /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
197 : /*trash*/ "memory", "cc", "r15"
198 );
199 } while (success != 1);
200 #elif defined(VGA_ppc64)
201 /* Nasty hack. Does correctly atomically do *p += n, but only if p
202 is 8-aligned -- guaranteed by caller. */
203 unsigned long success;
204 do {
205 __asm__ __volatile__(
206 "ldarx 15,0,%1" "\n\t"
207 "add 15,15,%2" "\n\t"
208 "stdcx. 15,0,%1" "\n\t"
209 "mfcr %0" "\n\t"
210 "srwi %0,%0,29" "\n\t"
211 "andi. %0,%0,1" "\n"
212 : /*out*/"=b"(success)
213 : /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
214 : /*trash*/ "memory", "cc", "r15"
215 );
216 } while (success != 1);
217 #elif defined(VGA_arm)
218 unsigned int block[3]
219 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
220 do {
221 __asm__ __volatile__(
222 "mov r5, %0" "\n\t"
223 "ldr r9, [r5, #0]" "\n\t" // p
224 "ldr r10, [r5, #4]" "\n\t" // n
225 "ldrexh r8, [r9]" "\n\t"
226 "add r8, r8, r10" "\n\t"
227 "strexh r4, r8, [r9]" "\n\t"
228 "str r4, [r5, #8]" "\n\t"
229 : /*out*/
230 : /*in*/ "r"(&block[0])
231 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
232 );
233 } while (block[2] != 0);
234 #elif defined(VGA_s390x)
235 int dummy;
236 __asm__ __volatile__(
237 " l 0,%0\n\t"
238 "0: st 0,%1\n\t"
239 " icm 1,3,%1\n\t"
240 " ar 1,%2\n\t"
241 " stcm 1,3,%1\n\t"
242 " l 1,%1\n\t"
243 " cs 0,1,%0\n\t"
244 " jl 0b\n\t"
245 : "+m" (*p), "+m" (dummy)
246 : "d" (n)
247 : "cc", "memory", "0", "1");
248 #elif defined(VGA_mips32)
249 #if defined (_MIPSEL)
250 unsigned int block[3]
251 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
252 do {
253 __asm__ __volatile__(
254 "move $t0, %0" "\n\t"
255 "lw $t1, 0($t0)" "\n\t" // p
256 "lw $t2, 4($t0)" "\n\t" // n
257 "ll $t3, 0($t1)" "\n\t"
258 "addu $t3, $t3, $t2" "\n\t"
259 "andi $t3, $t3, 0xFFFF" "\n\t"
260 "sc $t3, 0($t1)" "\n\t"
261 "sw $t3, 8($t0)" "\n\t"
262 : /*out*/
263 : /*in*/ "r"(&block[0])
264 : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
265 );
266 } while (block[2] != 1);
267 #elif defined (_MIPSEB)
268 unsigned int block[3]
269 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
270 do {
271 __asm__ __volatile__(
272 "move $t0, %0" "\n\t"
273 "lw $t1, 0($t0)" "\n\t" // p
274 "li $t2, 32694" "\n\t" // n
275 "li $t3, 0x1" "\n\t"
276 "sll $t2, $t2, 16" "\n\t"
277 "sw $t2, 0($t1)" "\n\t"
278 "sw $t3, 8($t0)" "\n\t"
279 : /*out*/
280 : /*in*/ "r"(&block[0])
281 : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
282 );
283 } while (block[2] != 1);
284 #endif
285 #else
286 # error "Unsupported arch"
287 #endif
288 }
289
atomic_add_32bit(int * p,int n)290 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
291 {
292 #if defined(VGA_x86)
293 unsigned long block[2];
294 block[0] = (unsigned long)p;
295 block[1] = n;
296 __asm__ __volatile__(
297 "movl 0(%%esi),%%eax" "\n\t"
298 "movl 4(%%esi),%%ebx" "\n\t"
299 "lock; addl %%ebx,(%%eax)" "\n"
300 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
301 );
302 #elif defined(VGA_amd64)
303 unsigned long block[2];
304 block[0] = (unsigned long)p;
305 block[1] = n;
306 __asm__ __volatile__(
307 "movq 0(%%rsi),%%rax" "\n\t"
308 "movq 8(%%rsi),%%rbx" "\n\t"
309 "lock; addl %%ebx,(%%rax)" "\n"
310 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
311 );
312 #elif defined(VGA_ppc32)
313 unsigned long success;
314 do {
315 __asm__ __volatile__(
316 "lwarx 15,0,%1" "\n\t"
317 "add 15,15,%2" "\n\t"
318 "stwcx. 15,0,%1" "\n\t"
319 "mfcr %0" "\n\t"
320 "srwi %0,%0,29" "\n\t"
321 "andi. %0,%0,1" "\n"
322 : /*out*/"=b"(success)
323 : /*in*/ "b"(p), "b"(n)
324 : /*trash*/ "memory", "cc", "r15"
325 );
326 } while (success != 1);
327 #elif defined(VGA_ppc64)
328 /* Nasty hack. Does correctly atomically do *p += n, but only if p
329 is 8-aligned -- guaranteed by caller. */
330 unsigned long success;
331 do {
332 __asm__ __volatile__(
333 "ldarx 15,0,%1" "\n\t"
334 "add 15,15,%2" "\n\t"
335 "stdcx. 15,0,%1" "\n\t"
336 "mfcr %0" "\n\t"
337 "srwi %0,%0,29" "\n\t"
338 "andi. %0,%0,1" "\n"
339 : /*out*/"=b"(success)
340 : /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
341 : /*trash*/ "memory", "cc", "r15"
342 );
343 } while (success != 1);
344 #elif defined(VGA_arm)
345 unsigned int block[3]
346 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
347 do {
348 __asm__ __volatile__(
349 "mov r5, %0" "\n\t"
350 "ldr r9, [r5, #0]" "\n\t" // p
351 "ldr r10, [r5, #4]" "\n\t" // n
352 "ldrex r8, [r9]" "\n\t"
353 "add r8, r8, r10" "\n\t"
354 "strex r4, r8, [r9]" "\n\t"
355 "str r4, [r5, #8]" "\n\t"
356 : /*out*/
357 : /*in*/ "r"(&block[0])
358 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
359 );
360 } while (block[2] != 0);
361 #elif defined(VGA_s390x)
362 __asm__ __volatile__(
363 " l 0,%0\n\t"
364 "0: lr 1,0\n\t"
365 " ar 1,%1\n\t"
366 " cs 0,1,%0\n\t"
367 " jl 0b\n\t"
368 : "+m" (*p)
369 : "d" (n)
370 : "cc", "memory", "0", "1");
371 #elif defined(VGA_mips32)
372 unsigned int block[3]
373 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
374 do {
375 __asm__ __volatile__(
376 "move $t0, %0" "\n\t"
377 "lw $t1, 0($t0)" "\n\t" // p
378 "lw $t2, 4($t0)" "\n\t" // n
379 "ll $t3, 0($t1)" "\n\t"
380 "addu $t3, $t3, $t2" "\n\t"
381 "sc $t3, 0($t1)" "\n\t"
382 "sw $t3, 8($t0)" "\n\t"
383 : /*out*/
384 : /*in*/ "r"(&block[0])
385 : /*trash*/ "memory", "cc", "t0", "t1", "t2", "t3"
386 );
387 } while (block[2] != 1);
388 #else
389 # error "Unsupported arch"
390 #endif
391 }
392
atomic_add_64bit(long long int * p,int n)393 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
394 {
395 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32)
396 /* do nothing; is not supported */
397 #elif defined(VGA_amd64)
398 // this is a bit subtle. It relies on the fact that, on a 64-bit platform,
399 // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
400 unsigned long long int block[2];
401 block[0] = (unsigned long long int)(unsigned long)p;
402 block[1] = n;
403 __asm__ __volatile__(
404 "movq 0(%%rsi),%%rax" "\n\t"
405 "movq 8(%%rsi),%%rbx" "\n\t"
406 "lock; addq %%rbx,(%%rax)" "\n"
407 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
408 );
409 #elif defined(VGA_ppc64)
410 unsigned long success;
411 do {
412 __asm__ __volatile__(
413 "ldarx 15,0,%1" "\n\t"
414 "add 15,15,%2" "\n\t"
415 "stdcx. 15,0,%1" "\n\t"
416 "mfcr %0" "\n\t"
417 "srwi %0,%0,29" "\n\t"
418 "andi. %0,%0,1" "\n"
419 : /*out*/"=b"(success)
420 : /*in*/ "b"(p), "b"(n)
421 : /*trash*/ "memory", "cc", "r15"
422 );
423 } while (success != 1);
424 #elif defined(VGA_arm)
425 unsigned long long int block[3]
426 = { (unsigned long long int)(unsigned long)p,
427 (unsigned long long int)n,
428 0xFFFFFFFFFFFFFFFFULL };
429 do {
430 __asm__ __volatile__(
431 "mov r5, %0" "\n\t"
432 "ldr r8, [r5, #0]" "\n\t" // p
433 "ldrd r2, r3, [r5, #8]" "\n\t" // n
434 "ldrexd r0, r1, [r8]" "\n\t"
435 "adds r2, r2, r0" "\n\t"
436 "adc r3, r3, r1" "\n\t"
437 "strexd r1, r2, r3, [r8]" "\n\t"
438 "str r1, [r5, #16]" "\n\t"
439 : /*out*/
440 : /*in*/ "r"(&block[0])
441 : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3"
442 );
443 } while (block[2] != 0xFFFFFFFF00000000ULL);
444 #elif defined(VGA_s390x)
445 __asm__ __volatile__(
446 " lg 0,%0\n\t"
447 "0: lgr 1,0\n\t"
448 " agr 1,%1\n\t"
449 " csg 0,1,%0\n\t"
450 " jl 0b\n\t"
451 : "+m" (*p)
452 : "d" (n)
453 : "cc", "memory", "0", "1");
454 #else
455 # error "Unsupported arch"
456 #endif
457 }
458
main(int argc,char ** argv)459 int main ( int argc, char** argv )
460 {
461 int i, status;
462 char* page;
463 char* p8;
464 short* p16;
465 int* p32;
466 long long int* p64;
467 pid_t child, p2;
468
469 printf("parent, pre-fork\n");
470
471 page = mmap( 0, sysconf(_SC_PAGESIZE),
472 PROT_READ|PROT_WRITE,
473 MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
474 if (page == MAP_FAILED) {
475 perror("mmap failed");
476 exit(1);
477 }
478
479 p8 = (char*)(page+0);
480 p16 = (short*)(page+256);
481 p32 = (int*)(page+512);
482 p64 = (long long int*)(page+768);
483
484 assert( IS_8_ALIGNED(p8) );
485 assert( IS_8_ALIGNED(p16) );
486 assert( IS_8_ALIGNED(p32) );
487 assert( IS_8_ALIGNED(p64) );
488
489 memset(page, 0, 1024);
490
491 *p8 = 0;
492 *p16 = 0;
493 *p32 = 0;
494 *p64 = 0;
495
496 child = fork();
497 if (child == -1) {
498 perror("fork() failed\n");
499 return 1;
500 }
501
502 if (child == 0) {
503 /* --- CHILD --- */
504 printf("child\n");
505 for (i = 0; i < NNN; i++) {
506 atomic_add_8bit(p8, 1);
507 atomic_add_16bit(p16, 1);
508 atomic_add_32bit(p32, 1);
509 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
510 }
511 return 1;
512 /* NOTREACHED */
513
514 }
515
516 /* --- PARENT --- */
517
518 printf("parent\n");
519
520 for (i = 0; i < NNN; i++) {
521 atomic_add_8bit(p8, 1);
522 atomic_add_16bit(p16, 1);
523 atomic_add_32bit(p32, 1);
524 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
525 }
526
527 p2 = waitpid(child, &status, 0);
528 assert(p2 == child);
529
530 /* assert that child finished normally */
531 assert(WIFEXITED(status));
532
533 printf("FINAL VALUES: 8 bit %d, 16 bit %d, 32 bit %d, 64 bit %lld\n",
534 (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
535
536 if (-74 == (int)(*(signed char*)p8)
537 && 32694 == (int)(*p16)
538 && 6913974 == *p32
539 && (0LL == *p64 || 682858642110LL == *p64)) {
540 printf("PASS\n");
541 } else {
542 printf("FAIL -- see source code for expected values\n");
543 }
544
545 printf("parent exits\n");
546
547 return 0;
548 }
549