1
2 /* This is an example of a program which does atomic memory operations
3 between two processes which share a page. Valgrind 3.4.1 and
4 earlier produce incorrect answers because it does not preserve
5 atomicity of the relevant instructions in the generated code; but
6 the post-DCAS-merge versions of Valgrind do behave correctly. */
7
8 /* On ARM, this can be compiled into either ARM or Thumb code, so as
9 to test both A and T encodings of LDREX/STREX et al. Also on ARM,
10 it tests doubleword atomics (LDREXD, STREXD) which I don't think it
11 does on any other platform. */
12
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <assert.h>
17 #include <unistd.h>
18 #include <sys/wait.h>
19 #include "tests/sys_mman.h"
20
21 #define NNN 3456987
22
23 #define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7))
24
25
atomic_add_8bit(char * p,int n)26 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
27 {
28 #if defined(VGA_x86)
29 unsigned long block[2];
30 block[0] = (unsigned long)p;
31 block[1] = n;
32 __asm__ __volatile__(
33 "movl 0(%%esi),%%eax" "\n\t"
34 "movl 4(%%esi),%%ebx" "\n\t"
35 "lock; addb %%bl,(%%eax)" "\n"
36 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
37 );
38 #elif defined(VGA_amd64)
39 unsigned long block[2];
40 block[0] = (unsigned long)p;
41 block[1] = n;
42 __asm__ __volatile__(
43 "movq 0(%%rsi),%%rax" "\n\t"
44 "movq 8(%%rsi),%%rbx" "\n\t"
45 "lock; addb %%bl,(%%rax)" "\n"
46 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
47 );
48 #elif defined(VGA_ppc32)
49 /* Nasty hack. Does correctly atomically do *p += n, but only if p
50 is 4-aligned -- guaranteed by caller. */
51 unsigned long success;
52 do {
53 __asm__ __volatile__(
54 "lwarx 15,0,%1" "\n\t"
55 "add 15,15,%2" "\n\t"
56 "stwcx. 15,0,%1" "\n\t"
57 "mfcr %0" "\n\t"
58 "srwi %0,%0,29" "\n\t"
59 "andi. %0,%0,1" "\n"
60 : /*out*/"=b"(success)
61 : /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
62 : /*trash*/ "memory", "cc", "r15"
63 );
64 } while (success != 1);
65 #elif defined(VGA_ppc64be)
66 /* Nasty hack. Does correctly atomically do *p += n, but only if p
67 is 8-aligned -- guaranteed by caller. */
68 unsigned long success;
69 do {
70 __asm__ __volatile__(
71 "ldarx 15,0,%1" "\n\t"
72 "add 15,15,%2" "\n\t"
73 "stdcx. 15,0,%1" "\n\t"
74 "mfcr %0" "\n\t"
75 "srwi %0,%0,29" "\n\t"
76 "andi. %0,%0,1" "\n"
77 : /*out*/"=b"(success)
78 : /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
79 : /*trash*/ "memory", "cc", "r15"
80 );
81 } while (success != 1);
82 #elif defined(VGA_ppc64le)
83 /* Nasty hack. Does correctly atomically do *p += n, but only if p
84 is 8-aligned -- guaranteed by caller. */
85 unsigned long success;
86 do {
87 __asm__ __volatile__(
88 "ldarx 15,0,%1" "\n\t"
89 "add 15,15,%2" "\n\t"
90 "stdcx. 15,0,%1" "\n\t"
91 "mfcr %0" "\n\t"
92 "srwi %0,%0,29" "\n\t"
93 "andi. %0,%0,1" "\n"
94 : /*out*/"=b"(success)
95 : /*in*/ "b"(p), "b"(((unsigned long)n))
96 : /*trash*/ "memory", "cc", "r15"
97 );
98 } while (success != 1);
99 #elif defined(VGA_arm)
100 unsigned int block[3]
101 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
102 do {
103 __asm__ __volatile__(
104 "mov r5, %0" "\n\t"
105 "ldr r9, [r5, #0]" "\n\t" // p
106 "ldr r10, [r5, #4]" "\n\t" // n
107 "ldrexb r8, [r9]" "\n\t"
108 "add r8, r8, r10" "\n\t"
109 "strexb r4, r8, [r9]" "\n\t"
110 "str r4, [r5, #8]" "\n\t"
111 : /*out*/
112 : /*in*/ "r"(&block[0])
113 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
114 );
115 } while (block[2] != 0);
116 #elif defined(VGA_arm64)
117 unsigned long long int block[3]
118 = { (unsigned long long int)p, (unsigned long long int)n,
119 0xFFFFFFFFFFFFFFFFULL};
120 do {
121 __asm__ __volatile__(
122 "mov x5, %0" "\n\t"
123 "ldr x9, [x5, #0]" "\n\t" // p
124 "ldr x10, [x5, #8]" "\n\t" // n
125 "ldxrb w8, [x9]" "\n\t"
126 "add x8, x8, x10" "\n\t"
127 "stxrb w4, w8, [x9]" "\n\t"
128 "str x4, [x5, #16]" "\n\t"
129 : /*out*/
130 : /*in*/ "r"(&block[0])
131 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
132 );
133 } while (block[2] != 0);
134 #elif defined(VGA_s390x)
135 int dummy;
136 __asm__ __volatile__(
137 " l 0,%0\n\t"
138 "0: st 0,%1\n\t"
139 " icm 1,1,%1\n\t"
140 " ar 1,%2\n\t"
141 " stcm 1,1,%1\n\t"
142 " l 1,%1\n\t"
143 " cs 0,1,%0\n\t"
144 " jl 0b\n\t"
145 : "+m" (*p), "+m" (dummy)
146 : "d" (n)
147 : "cc", "memory", "0", "1");
148 #elif defined(VGA_mips32)
149 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
150 exception that can cause this function to fail. */
151 #if defined (_MIPSEL)
152 unsigned int block[3]
153 = { (unsigned int)p, (unsigned int)n, 0x0 };
154 do {
155 __asm__ __volatile__(
156 "move $t0, %0" "\n\t"
157 "lw $t1, 0($t0)" "\n\t" // p
158 "lw $t2, 4($t0)" "\n\t" // n
159 "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF
160 "li $t4, 0xFF" "\n\t"
161 "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFFFF00
162 "ll $t3, 0($t1)" "\n\t" // $t3 = old value
163 "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFFFF00
164 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
165 "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF
166 "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4
167 "sc $t3, 0($t1)" "\n\t"
168 "sw $t3, 8($t0)" "\n\t" // save result
169 : /*out*/
170 : /*in*/ "r"(&block[0])
171 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
172 );
173 } while (block[2] != 1);
174 #elif defined (_MIPSEB)
175 unsigned int block[3]
176 = { (unsigned int)p, (unsigned int)n << 24, 0x0 };
177 do {
178 __asm__ __volatile__(
179 "move $t0, %0" "\n\t"
180 "lw $t1, 0($t0)" "\n\t" // p
181 "lw $t2, 4($t0)" "\n\t" // n
182 "ll $t3, 0($t1)" "\n\t"
183 "addu $t3, $t3, $t2" "\n\t"
184 "sc $t3, 0($t1)" "\n\t"
185 "sw $t3, 8($t0)" "\n\t"
186 : /*out*/
187 : /*in*/ "r"(&block[0])
188 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
189 );
190 } while (block[2] != 1);
191 #endif
192 #elif defined(VGA_mips64)
193 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
194 exception that can cause this function to fail. */
195 #if defined (_MIPSEL)
196 unsigned long block[3]
197 = { (unsigned long)p, (unsigned long)n, 0x0ULL };
198 do {
199 __asm__ __volatile__(
200 "move $t0, %0" "\n\t"
201 "ld $t1, 0($t0)" "\n\t" // p
202 "ld $t2, 8($t0)" "\n\t" // n
203 "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF
204 "li $s0, 0xFF" "\n\t"
205 "nor $s0, $s0, $zero" "\n\t" // $s0 = 0xFFFFFF00
206 "ll $t3, 0($t1)" "\n\t" // $t3 = old value
207 "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFFFF00
208 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
209 "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF
210 "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0
211 "sc $t3, 0($t1)" "\n\t"
212 "sw $t3, 16($t0)" "\n\t" // save result
213 : /*out*/
214 : /*in*/ "r"(&block[0])
215 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
216 );
217 } while (block[2] != 1);
218 #elif defined (_MIPSEB)
219 unsigned long block[3]
220 = { (unsigned long)p, (unsigned long)n << 56, 0x0 };
221 do {
222 __asm__ __volatile__(
223 "move $t0, %0" "\n\t"
224 "ld $t1, 0($t0)" "\n\t" // p
225 "ld $t2, 8($t0)" "\n\t" // n
226 "lld $t3, 0($t1)" "\n\t"
227 "daddu $t3, $t3, $t2" "\n\t"
228 "scd $t3, 0($t1)" "\n\t"
229 "sd $t3, 16($t0)" "\n\t"
230 : /*out*/
231 : /*in*/ "r"(&block[0])
232 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
233 );
234 } while (block[2] != 1);
235 #endif
236 #elif defined(VGA_tilegx)
237 int i;
238 unsigned int *p4 = (unsigned int *)(((unsigned long long)p + 3) & (~3ULL));
239 unsigned int mask = (0xff) << ((int)p & 3);
240 unsigned int add = (n & 0xff) << ((int)p & 3);
241 unsigned int x, new;
242
243 while(1) {
244 x = *p4;
245 new = (x & (~mask)) | ((x + add) & mask);
246 __insn_mtspr(0x2780, x);
247 if ( __insn_cmpexch4(p4, new) == x)
248 break;
249 }
250 #else
251 # error "Unsupported arch"
252 #endif
253 }
254
255
atomic_add_16bit(short * p,int n)256 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
257 {
258 #if defined(VGA_x86)
259 unsigned long block[2];
260 block[0] = (unsigned long)p;
261 block[1] = n;
262 __asm__ __volatile__(
263 "movl 0(%%esi),%%eax" "\n\t"
264 "movl 4(%%esi),%%ebx" "\n\t"
265 "lock; addw %%bx,(%%eax)" "\n"
266 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
267 );
268 #elif defined(VGA_amd64)
269 unsigned long block[2];
270 block[0] = (unsigned long)p;
271 block[1] = n;
272 __asm__ __volatile__(
273 "movq 0(%%rsi),%%rax" "\n\t"
274 "movq 8(%%rsi),%%rbx" "\n\t"
275 "lock; addw %%bx,(%%rax)" "\n"
276 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
277 );
278 #elif defined(VGA_ppc32)
279 /* Nasty hack. Does correctly atomically do *p += n, but only if p
280 is 8-aligned -- guaranteed by caller. */
281 unsigned long success;
282 do {
283 __asm__ __volatile__(
284 "lwarx 15,0,%1" "\n\t"
285 "add 15,15,%2" "\n\t"
286 "stwcx. 15,0,%1" "\n\t"
287 "mfcr %0" "\n\t"
288 "srwi %0,%0,29" "\n\t"
289 "andi. %0,%0,1" "\n"
290 : /*out*/"=b"(success)
291 : /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
292 : /*trash*/ "memory", "cc", "r15"
293 );
294 } while (success != 1);
295 #elif defined(VGA_ppc64be)
296 /* Nasty hack. Does correctly atomically do *p += n, but only if p
297 is 8-aligned -- guaranteed by caller. */
298 unsigned long success;
299 do {
300 __asm__ __volatile__(
301 "ldarx 15,0,%1" "\n\t"
302 "add 15,15,%2" "\n\t"
303 "stdcx. 15,0,%1" "\n\t"
304 "mfcr %0" "\n\t"
305 "srwi %0,%0,29" "\n\t"
306 "andi. %0,%0,1" "\n"
307 : /*out*/"=b"(success)
308 : /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
309 : /*trash*/ "memory", "cc", "r15"
310 );
311 } while (success != 1);
312 #elif defined(VGA_ppc64le)
313 /* Nasty hack. Does correctly atomically do *p += n, but only if p
314 is 8-aligned -- guaranteed by caller. */
315 unsigned long success;
316 do {
317 __asm__ __volatile__(
318 "ldarx 15,0,%1" "\n\t"
319 "add 15,15,%2" "\n\t"
320 "stdcx. 15,0,%1" "\n\t"
321 "mfcr %0" "\n\t"
322 "srwi %0,%0,29" "\n\t"
323 "andi. %0,%0,1" "\n"
324 : /*out*/"=b"(success)
325 : /*in*/ "b"(p), "b"(((unsigned long)n))
326 : /*trash*/ "memory", "cc", "r15"
327 );
328 } while (success != 1);
329 #elif defined(VGA_arm)
330 unsigned int block[3]
331 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
332 do {
333 __asm__ __volatile__(
334 "mov r5, %0" "\n\t"
335 "ldr r9, [r5, #0]" "\n\t" // p
336 "ldr r10, [r5, #4]" "\n\t" // n
337 "ldrexh r8, [r9]" "\n\t"
338 "add r8, r8, r10" "\n\t"
339 "strexh r4, r8, [r9]" "\n\t"
340 "str r4, [r5, #8]" "\n\t"
341 : /*out*/
342 : /*in*/ "r"(&block[0])
343 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
344 );
345 } while (block[2] != 0);
346 #elif defined(VGA_arm64)
347 unsigned long long int block[3]
348 = { (unsigned long long int)p, (unsigned long long int)n,
349 0xFFFFFFFFFFFFFFFFULL};
350 do {
351 __asm__ __volatile__(
352 "mov x5, %0" "\n\t"
353 "ldr x9, [x5, #0]" "\n\t" // p
354 "ldr x10, [x5, #8]" "\n\t" // n
355 "ldxrh w8, [x9]" "\n\t"
356 "add x8, x8, x10" "\n\t"
357 "stxrh w4, w8, [x9]" "\n\t"
358 "str x4, [x5, #16]" "\n\t"
359 : /*out*/
360 : /*in*/ "r"(&block[0])
361 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
362 );
363 } while (block[2] != 0);
364 #elif defined(VGA_s390x)
365 int dummy;
366 __asm__ __volatile__(
367 " l 0,%0\n\t"
368 "0: st 0,%1\n\t"
369 " icm 1,3,%1\n\t"
370 " ar 1,%2\n\t"
371 " stcm 1,3,%1\n\t"
372 " l 1,%1\n\t"
373 " cs 0,1,%0\n\t"
374 " jl 0b\n\t"
375 : "+m" (*p), "+m" (dummy)
376 : "d" (n)
377 : "cc", "memory", "0", "1");
378 #elif defined(VGA_mips32)
379 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
380 exception that can cause this function to fail. */
381 #if defined (_MIPSEL)
382 unsigned int block[3]
383 = { (unsigned int)p, (unsigned int)n, 0x0 };
384 do {
385 __asm__ __volatile__(
386 "move $t0, %0" "\n\t"
387 "lw $t1, 0($t0)" "\n\t" // p
388 "lw $t2, 4($t0)" "\n\t" // n
389 "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF
390 "li $t4, 0xFFFF" "\n\t"
391 "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFF0000
392 "ll $t3, 0($t1)" "\n\t" // $t3 = old value
393 "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFF0000
394 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
395 "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF
396 "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4
397 "sc $t3, 0($t1)" "\n\t"
398 "sw $t3, 8($t0)" "\n\t" // save result
399 : /*out*/
400 : /*in*/ "r"(&block[0])
401 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
402 );
403 } while (block[2] != 1);
404 #elif defined (_MIPSEB)
405 unsigned int block[3]
406 = { (unsigned int)p, (unsigned int)n << 16, 0x0 };
407 do {
408 __asm__ __volatile__(
409 "move $t0, %0" "\n\t"
410 "lw $t1, 0($t0)" "\n\t" // p
411 "lw $t2, 4($t0)" "\n\t" // n
412 "ll $t3, 0($t1)" "\n\t"
413 "addu $t3, $t3, $t2" "\n\t"
414 "sc $t3, 0($t1)" "\n\t"
415 "sw $t3, 8($t0)" "\n\t"
416 : /*out*/
417 : /*in*/ "r"(&block[0])
418 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
419 );
420 } while (block[2] != 1);
421 #endif
422 #elif defined(VGA_mips64)
423 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
424 exception that can cause this function to fail. */
425 #if defined (_MIPSEL)
426 unsigned long block[3]
427 = { (unsigned long)p, (unsigned long)n, 0x0ULL };
428 do {
429 __asm__ __volatile__(
430 "move $t0, %0" "\n\t"
431 "ld $t1, 0($t0)" "\n\t" // p
432 "ld $t2, 8($t0)" "\n\t" // n
433 "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF
434 "li $s0, 0xFFFF" "\n\t"
435 "nor $s0, $s0, $zero" "\n\t" // $s0= 0xFFFF0000
436 "ll $t3, 0($t1)" "\n\t" // $t3 = old value
437 "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFF0000
438 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
439 "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF
440 "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0
441 "sc $t3, 0($t1)" "\n\t"
442 "sw $t3, 16($t0)" "\n\t" // save result
443 : /*out*/
444 : /*in*/ "r"(&block[0])
445 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
446 );
447 } while (block[2] != 1);
448 #elif defined (_MIPSEB)
449 unsigned long block[3]
450 = { (unsigned long)p, (unsigned long)n << 48, 0x0 };
451 do {
452 __asm__ __volatile__(
453 "move $t0, %0" "\n\t"
454 "ld $t1, 0($t0)" "\n\t" // p
455 "ld $t2, 8($t0)" "\n\t" // n
456 "lld $t3, 0($t1)" "\n\t"
457 "daddu $t3, $t3, $t2" "\n\t"
458 "scd $t3, 0($t1)" "\n\t"
459 "sd $t3, 16($t0)" "\n\t"
460 : /*out*/
461 : /*in*/ "r"(&block[0])
462 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
463 );
464 } while (block[2] != 1);
465 #endif
466 #elif defined(VGA_tilegx)
467 int i;
468 unsigned int *p4 = (unsigned int *)(((unsigned long long)p + 3) & (~3ULL));
469 unsigned int mask = (0xffff) << ((int)p & 3);
470 unsigned int add = (n & 0xffff) << ((int)p & 3);
471 unsigned int x, new;
472
473 while(1) {
474 x = *p4;
475 new = (x & (~mask)) | ((x + add) & mask);
476 __insn_mtspr(0x2780, x);
477 if ( __insn_cmpexch4(p4, new) == x)
478 break;
479 }
480 #else
481 # error "Unsupported arch"
482 #endif
483 }
484
atomic_add_32bit(int * p,int n)485 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
486 {
487 #if defined(VGA_x86)
488 unsigned long block[2];
489 block[0] = (unsigned long)p;
490 block[1] = n;
491 __asm__ __volatile__(
492 "movl 0(%%esi),%%eax" "\n\t"
493 "movl 4(%%esi),%%ebx" "\n\t"
494 "lock; addl %%ebx,(%%eax)" "\n"
495 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
496 );
497 #elif defined(VGA_amd64)
498 unsigned long block[2];
499 block[0] = (unsigned long)p;
500 block[1] = n;
501 __asm__ __volatile__(
502 "movq 0(%%rsi),%%rax" "\n\t"
503 "movq 8(%%rsi),%%rbx" "\n\t"
504 "lock; addl %%ebx,(%%rax)" "\n"
505 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
506 );
507 #elif defined(VGA_ppc32)
508 unsigned long success;
509 do {
510 __asm__ __volatile__(
511 "lwarx 15,0,%1" "\n\t"
512 "add 15,15,%2" "\n\t"
513 "stwcx. 15,0,%1" "\n\t"
514 "mfcr %0" "\n\t"
515 "srwi %0,%0,29" "\n\t"
516 "andi. %0,%0,1" "\n"
517 : /*out*/"=b"(success)
518 : /*in*/ "b"(p), "b"(n)
519 : /*trash*/ "memory", "cc", "r15"
520 );
521 } while (success != 1);
522 #elif defined(VGA_ppc64be)
523 /* Nasty hack. Does correctly atomically do *p += n, but only if p
524 is 8-aligned -- guaranteed by caller. */
525 unsigned long success;
526 do {
527 __asm__ __volatile__(
528 "ldarx 15,0,%1" "\n\t"
529 "add 15,15,%2" "\n\t"
530 "stdcx. 15,0,%1" "\n\t"
531 "mfcr %0" "\n\t"
532 "srwi %0,%0,29" "\n\t"
533 "andi. %0,%0,1" "\n"
534 : /*out*/"=b"(success)
535 : /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
536 : /*trash*/ "memory", "cc", "r15"
537 );
538 } while (success != 1);
539 #elif defined(VGA_ppc64le)
540 /* Nasty hack. Does correctly atomically do *p += n, but only if p
541 is 8-aligned -- guaranteed by caller. */
542 unsigned long success;
543 do {
544 __asm__ __volatile__(
545 "ldarx 15,0,%1" "\n\t"
546 "add 15,15,%2" "\n\t"
547 "stdcx. 15,0,%1" "\n\t"
548 "mfcr %0" "\n\t"
549 "srwi %0,%0,29" "\n\t"
550 "andi. %0,%0,1" "\n"
551 : /*out*/"=b"(success)
552 : /*in*/ "b"(p), "b"(((unsigned long)n))
553 : /*trash*/ "memory", "cc", "r15"
554 );
555 } while (success != 1);
556 #elif defined(VGA_arm)
557 unsigned int block[3]
558 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
559 do {
560 __asm__ __volatile__(
561 "mov r5, %0" "\n\t"
562 "ldr r9, [r5, #0]" "\n\t" // p
563 "ldr r10, [r5, #4]" "\n\t" // n
564 "ldrex r8, [r9]" "\n\t"
565 "add r8, r8, r10" "\n\t"
566 "strex r4, r8, [r9]" "\n\t"
567 "str r4, [r5, #8]" "\n\t"
568 : /*out*/
569 : /*in*/ "r"(&block[0])
570 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
571 );
572 } while (block[2] != 0);
573 #elif defined(VGA_arm64)
574 unsigned long long int block[3]
575 = { (unsigned long long int)p, (unsigned long long int)n,
576 0xFFFFFFFFFFFFFFFFULL};
577 do {
578 __asm__ __volatile__(
579 "mov x5, %0" "\n\t"
580 "ldr x9, [x5, #0]" "\n\t" // p
581 "ldr x10, [x5, #8]" "\n\t" // n
582 "ldxr w8, [x9]" "\n\t"
583 "add x8, x8, x10" "\n\t"
584 "stxr w4, w8, [x9]" "\n\t"
585 "str x4, [x5, #16]" "\n\t"
586 : /*out*/
587 : /*in*/ "r"(&block[0])
588 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
589 );
590 } while (block[2] != 0);
591 #elif defined(VGA_s390x)
592 __asm__ __volatile__(
593 " l 0,%0\n\t"
594 "0: lr 1,0\n\t"
595 " ar 1,%1\n\t"
596 " cs 0,1,%0\n\t"
597 " jl 0b\n\t"
598 : "+m" (*p)
599 : "d" (n)
600 : "cc", "memory", "0", "1");
601 #elif defined(VGA_mips32)
602 unsigned int block[3]
603 = { (unsigned int)p, (unsigned int)n, 0x0 };
604 do {
605 __asm__ __volatile__(
606 "move $t0, %0" "\n\t"
607 "lw $t1, 0($t0)" "\n\t" // p
608 "lw $t2, 4($t0)" "\n\t" // n
609 "ll $t3, 0($t1)" "\n\t"
610 "addu $t3, $t3, $t2" "\n\t"
611 "sc $t3, 0($t1)" "\n\t"
612 "sw $t3, 8($t0)" "\n\t"
613 : /*out*/
614 : /*in*/ "r"(&block[0])
615 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
616 );
617 } while (block[2] != 1);
618 #elif defined(VGA_mips64)
619 unsigned long block[3]
620 = { (unsigned long)p, (unsigned long)n, 0x0ULL };
621 do {
622 __asm__ __volatile__(
623 "move $t0, %0" "\n\t"
624 "ld $t1, 0($t0)" "\n\t" // p
625 "ld $t2, 8($t0)" "\n\t" // n
626 "ll $t3, 0($t1)" "\n\t"
627 "addu $t3, $t3, $t2" "\n\t"
628 "sc $t3, 0($t1)" "\n\t"
629 "sd $t3, 16($t0)" "\n\t"
630 : /*out*/
631 : /*in*/ "r"(&block[0])
632 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
633 );
634 } while (block[2] != 1);
635 #elif defined(VGA_tilegx)
636 __insn_fetchadd4(p, n);
637 #else
638 # error "Unsupported arch"
639 #endif
640 }
641
atomic_add_64bit(long long int * p,int n)642 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
643 {
644 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32)
645 /* do nothing; is not supported */
646 #elif defined(VGA_amd64)
647 // this is a bit subtle. It relies on the fact that, on a 64-bit platform,
648 // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
649 unsigned long long int block[2];
650 block[0] = (unsigned long long int)(unsigned long)p;
651 block[1] = n;
652 __asm__ __volatile__(
653 "movq 0(%%rsi),%%rax" "\n\t"
654 "movq 8(%%rsi),%%rbx" "\n\t"
655 "lock; addq %%rbx,(%%rax)" "\n"
656 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
657 );
658 #elif defined(VGA_ppc64be) || defined(VGA_ppc64le)
659 unsigned long success;
660 do {
661 __asm__ __volatile__(
662 "ldarx 15,0,%1" "\n\t"
663 "add 15,15,%2" "\n\t"
664 "stdcx. 15,0,%1" "\n\t"
665 "mfcr %0" "\n\t"
666 "srwi %0,%0,29" "\n\t"
667 "andi. %0,%0,1" "\n"
668 : /*out*/"=b"(success)
669 : /*in*/ "b"(p), "b"(n)
670 : /*trash*/ "memory", "cc", "r15"
671 );
672 } while (success != 1);
673 #elif defined(VGA_arm)
674 unsigned long long int block[3]
675 = { (unsigned long long int)(unsigned long)p,
676 (unsigned long long int)n,
677 0xFFFFFFFFFFFFFFFFULL };
678 do {
679 __asm__ __volatile__(
680 "mov r5, %0" "\n\t"
681 "ldr r8, [r5, #0]" "\n\t" // p
682 "ldrd r2, r3, [r5, #8]" "\n\t" // n
683 "ldrexd r0, r1, [r8]" "\n\t"
684 "adds r2, r2, r0" "\n\t"
685 "adc r3, r3, r1" "\n\t"
686 "strexd r1, r2, r3, [r8]" "\n\t"
687 "str r1, [r5, #16]" "\n\t"
688 : /*out*/
689 : /*in*/ "r"(&block[0])
690 : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3"
691 );
692 } while (block[2] != 0xFFFFFFFF00000000ULL);
693 #elif defined(VGA_arm64)
694 unsigned long long int block[3]
695 = { (unsigned long long int)p, (unsigned long long int)n,
696 0xFFFFFFFFFFFFFFFFULL};
697 do {
698 __asm__ __volatile__(
699 "mov x5, %0" "\n\t"
700 "ldr x9, [x5, #0]" "\n\t" // p
701 "ldr x10, [x5, #8]" "\n\t" // n
702 "ldxr x8, [x9]" "\n\t"
703 "add x8, x8, x10" "\n\t"
704 "stxr w4, x8, [x9]" "\n\t"
705 "str x4, [x5, #16]" "\n\t"
706 : /*out*/
707 : /*in*/ "r"(&block[0])
708 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
709 );
710 } while (block[2] != 0);
711 #elif defined(VGA_s390x)
712 __asm__ __volatile__(
713 " lg 0,%0\n\t"
714 "0: lgr 1,0\n\t"
715 " agr 1,%1\n\t"
716 " csg 0,1,%0\n\t"
717 " jl 0b\n\t"
718 : "+m" (*p)
719 : "d" (n)
720 : "cc", "memory", "0", "1");
721 #elif defined(VGA_mips64)
722 unsigned long block[3]
723 = { (unsigned long)p, (unsigned long)n, 0x0ULL };
724 do {
725 __asm__ __volatile__(
726 "move $t0, %0" "\n\t"
727 "ld $t1, 0($t0)" "\n\t" // p
728 "ld $t2, 8($t0)" "\n\t" // n
729 "lld $t3, 0($t1)" "\n\t"
730 "daddu $t3, $t3, $t2" "\n\t"
731 "scd $t3, 0($t1)" "\n\t"
732 "sd $t3, 16($t0)" "\n\t"
733 : /*out*/
734 : /*in*/ "r"(&block[0])
735 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
736 );
737 } while (block[2] != 1);
738 #elif defined(VGA_tilegx)
739 __insn_fetchadd(p, n);
740 #else
741 # error "Unsupported arch"
742 #endif
743 }
744
main(int argc,char ** argv)745 int main ( int argc, char** argv )
746 {
747 int i, status;
748 char* page;
749 char* p8;
750 short* p16;
751 int* p32;
752 long long int* p64;
753 pid_t child, p2;
754
755 printf("parent, pre-fork\n");
756
757 page = mmap( 0, sysconf(_SC_PAGESIZE),
758 PROT_READ|PROT_WRITE,
759 MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
760 if (page == MAP_FAILED) {
761 perror("mmap failed");
762 exit(1);
763 }
764
765 p8 = (char*)(page+0);
766 p16 = (short*)(page+256);
767 p32 = (int*)(page+512);
768 p64 = (long long int*)(page+768);
769
770 assert( IS_8_ALIGNED(p8) );
771 assert( IS_8_ALIGNED(p16) );
772 assert( IS_8_ALIGNED(p32) );
773 assert( IS_8_ALIGNED(p64) );
774
775 memset(page, 0, 1024);
776
777 *p8 = 0;
778 *p16 = 0;
779 *p32 = 0;
780 *p64 = 0;
781
782 child = fork();
783 if (child == -1) {
784 perror("fork() failed\n");
785 return 1;
786 }
787
788 if (child == 0) {
789 /* --- CHILD --- */
790 printf("child\n");
791 for (i = 0; i < NNN; i++) {
792 atomic_add_8bit(p8, 1);
793 atomic_add_16bit(p16, 1);
794 atomic_add_32bit(p32, 1);
795 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
796 }
797 return 1;
798 /* NOTREACHED */
799
800 }
801
802 /* --- PARENT --- */
803
804 printf("parent\n");
805
806 for (i = 0; i < NNN; i++) {
807 atomic_add_8bit(p8, 1);
808 atomic_add_16bit(p16, 1);
809 atomic_add_32bit(p32, 1);
810 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
811 }
812
813 p2 = waitpid(child, &status, 0);
814 assert(p2 == child);
815
816 /* assert that child finished normally */
817 assert(WIFEXITED(status));
818
819 printf("FINAL VALUES: 8 bit %d, 16 bit %d, 32 bit %d, 64 bit %lld\n",
820 (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
821
822 if (-74 == (int)(*(signed char*)p8)
823 && 32694 == (int)(*p16)
824 && 6913974 == *p32
825 && (0LL == *p64 || 682858642110LL == *p64)) {
826 printf("PASS\n");
827 } else {
828 printf("FAIL -- see source code for expected values\n");
829 }
830
831 printf("parent exits\n");
832
833 return 0;
834 }
835