1
2 /* This is an example of a program which does atomic memory operations
3 between two processes which share a page. Valgrind 3.4.1 and
4 earlier produce incorrect answers because it does not preserve
5 atomicity of the relevant instructions in the generated code; but
6 the post-DCAS-merge versions of Valgrind do behave correctly. */
7
8 /* On ARM, this can be compiled into either ARM or Thumb code, so as
9 to test both A and T encodings of LDREX/STREX et al. Also on ARM,
10 it tests doubleword atomics (LDREXD, STREXD) which I don't think it
11 does on any other platform. */
12
13 #include <stdlib.h>
14 #include <stdio.h>
15 #include <string.h>
16 #include <assert.h>
17 #include <unistd.h>
18 #include <sys/wait.h>
19 #include "tests/sys_mman.h"
20
21 #define NNN 3456987
22
23 #define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7))
24
25
atomic_add_8bit(char * p,int n)26 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
27 {
28 #if defined(VGA_x86)
29 unsigned long block[2];
30 block[0] = (unsigned long)p;
31 block[1] = n;
32 __asm__ __volatile__(
33 "movl 0(%%esi),%%eax" "\n\t"
34 "movl 4(%%esi),%%ebx" "\n\t"
35 "lock; addb %%bl,(%%eax)" "\n"
36 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
37 );
38 #elif defined(VGA_amd64)
39 unsigned long block[2];
40 block[0] = (unsigned long)p;
41 block[1] = n;
42 __asm__ __volatile__(
43 "movq 0(%%rsi),%%rax" "\n\t"
44 "movq 8(%%rsi),%%rbx" "\n\t"
45 "lock; addb %%bl,(%%rax)" "\n"
46 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
47 );
48 #elif defined(VGA_ppc32)
49 /* Nasty hack. Does correctly atomically do *p += n, but only if p
50 is 4-aligned -- guaranteed by caller. */
51 unsigned long success;
52 do {
53 __asm__ __volatile__(
54 "lwarx 15,0,%1" "\n\t"
55 "add 15,15,%2" "\n\t"
56 "stwcx. 15,0,%1" "\n\t"
57 "mfcr %0" "\n\t"
58 "srwi %0,%0,29" "\n\t"
59 "andi. %0,%0,1" "\n"
60 : /*out*/"=b"(success)
61 : /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
62 : /*trash*/ "memory", "cc", "r15"
63 );
64 } while (success != 1);
65 #elif defined(VGA_ppc64)
66 /* Nasty hack. Does correctly atomically do *p += n, but only if p
67 is 8-aligned -- guaranteed by caller. */
68 unsigned long success;
69 do {
70 __asm__ __volatile__(
71 "ldarx 15,0,%1" "\n\t"
72 "add 15,15,%2" "\n\t"
73 "stdcx. 15,0,%1" "\n\t"
74 "mfcr %0" "\n\t"
75 "srwi %0,%0,29" "\n\t"
76 "andi. %0,%0,1" "\n"
77 : /*out*/"=b"(success)
78 : /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
79 : /*trash*/ "memory", "cc", "r15"
80 );
81 } while (success != 1);
82 #elif defined(VGA_arm)
83 unsigned int block[3]
84 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
85 do {
86 __asm__ __volatile__(
87 "mov r5, %0" "\n\t"
88 "ldr r9, [r5, #0]" "\n\t" // p
89 "ldr r10, [r5, #4]" "\n\t" // n
90 "ldrexb r8, [r9]" "\n\t"
91 "add r8, r8, r10" "\n\t"
92 "strexb r4, r8, [r9]" "\n\t"
93 "str r4, [r5, #8]" "\n\t"
94 : /*out*/
95 : /*in*/ "r"(&block[0])
96 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
97 );
98 } while (block[2] != 0);
99 #elif defined(VGA_arm64)
100 unsigned long long int block[3]
101 = { (unsigned long long int)p, (unsigned long long int)n,
102 0xFFFFFFFFFFFFFFFFULL};
103 do {
104 __asm__ __volatile__(
105 "mov x5, %0" "\n\t"
106 "ldr x9, [x5, #0]" "\n\t" // p
107 "ldr x10, [x5, #8]" "\n\t" // n
108 "ldxrb w8, [x9]" "\n\t"
109 "add x8, x8, x10" "\n\t"
110 "stxrb w4, w8, [x9]" "\n\t"
111 "str x4, [x5, #16]" "\n\t"
112 : /*out*/
113 : /*in*/ "r"(&block[0])
114 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
115 );
116 } while (block[2] != 0);
117 #elif defined(VGA_s390x)
118 int dummy;
119 __asm__ __volatile__(
120 " l 0,%0\n\t"
121 "0: st 0,%1\n\t"
122 " icm 1,1,%1\n\t"
123 " ar 1,%2\n\t"
124 " stcm 1,1,%1\n\t"
125 " l 1,%1\n\t"
126 " cs 0,1,%0\n\t"
127 " jl 0b\n\t"
128 : "+m" (*p), "+m" (dummy)
129 : "d" (n)
130 : "cc", "memory", "0", "1");
131 #elif defined(VGA_mips32)
132 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
133 exception that can cause this function to fail. */
134 #if defined (_MIPSEL)
135 unsigned int block[3]
136 = { (unsigned int)p, (unsigned int)n, 0x0 };
137 do {
138 __asm__ __volatile__(
139 "move $t0, %0" "\n\t"
140 "lw $t1, 0($t0)" "\n\t" // p
141 "lw $t2, 4($t0)" "\n\t" // n
142 "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF
143 "li $t4, 0xFF" "\n\t"
144 "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFFFF00
145 "ll $t3, 0($t1)" "\n\t" // $t3 = old value
146 "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFFFF00
147 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
148 "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF
149 "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4
150 "sc $t3, 0($t1)" "\n\t"
151 "sw $t3, 8($t0)" "\n\t" // save result
152 : /*out*/
153 : /*in*/ "r"(&block[0])
154 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
155 );
156 } while (block[2] != 1);
157 #elif defined (_MIPSEB)
158 unsigned int block[3]
159 = { (unsigned int)p, (unsigned int)n << 24, 0x0 };
160 do {
161 __asm__ __volatile__(
162 "move $t0, %0" "\n\t"
163 "lw $t1, 0($t0)" "\n\t" // p
164 "lw $t2, 4($t0)" "\n\t" // n
165 "ll $t3, 0($t1)" "\n\t"
166 "addu $t3, $t3, $t2" "\n\t"
167 "sc $t3, 0($t1)" "\n\t"
168 "sw $t3, 8($t0)" "\n\t"
169 : /*out*/
170 : /*in*/ "r"(&block[0])
171 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
172 );
173 } while (block[2] != 1);
174 #endif
175 #elif defined(VGA_mips64)
176 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
177 exception that can cause this function to fail. */
178 #if defined (_MIPSEL)
179 unsigned long block[3]
180 = { (unsigned long)p, (unsigned long)n, 0x0ULL };
181 do {
182 __asm__ __volatile__(
183 "move $t0, %0" "\n\t"
184 "ld $t1, 0($t0)" "\n\t" // p
185 "ld $t2, 8($t0)" "\n\t" // n
186 "andi $t2, $t2, 0xFF" "\n\t" // n = n and 0xFF
187 "li $s0, 0xFF" "\n\t"
188 "nor $s0, $s0, $zero" "\n\t" // $s0 = 0xFFFFFF00
189 "ll $t3, 0($t1)" "\n\t" // $t3 = old value
190 "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFFFF00
191 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
192 "andi $t3, $t3, 0xFF" "\n\t" // $t3 = $t3 and 0xFF
193 "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0
194 "sc $t3, 0($t1)" "\n\t"
195 "sw $t3, 16($t0)" "\n\t" // save result
196 : /*out*/
197 : /*in*/ "r"(&block[0])
198 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
199 );
200 } while (block[2] != 1);
201 #elif defined (_MIPSEB)
202 unsigned long block[3]
203 = { (unsigned long)p, (unsigned long)n << 56, 0x0 };
204 do {
205 __asm__ __volatile__(
206 "move $t0, %0" "\n\t"
207 "ld $t1, 0($t0)" "\n\t" // p
208 "ld $t2, 8($t0)" "\n\t" // n
209 "lld $t3, 0($t1)" "\n\t"
210 "daddu $t3, $t3, $t2" "\n\t"
211 "scd $t3, 0($t1)" "\n\t"
212 "sd $t3, 16($t0)" "\n\t"
213 : /*out*/
214 : /*in*/ "r"(&block[0])
215 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
216 );
217 } while (block[2] != 1);
218 #endif
219 #else
220 # error "Unsupported arch"
221 #endif
222 }
223
224
atomic_add_16bit(short * p,int n)225 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
226 {
227 #if defined(VGA_x86)
228 unsigned long block[2];
229 block[0] = (unsigned long)p;
230 block[1] = n;
231 __asm__ __volatile__(
232 "movl 0(%%esi),%%eax" "\n\t"
233 "movl 4(%%esi),%%ebx" "\n\t"
234 "lock; addw %%bx,(%%eax)" "\n"
235 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
236 );
237 #elif defined(VGA_amd64)
238 unsigned long block[2];
239 block[0] = (unsigned long)p;
240 block[1] = n;
241 __asm__ __volatile__(
242 "movq 0(%%rsi),%%rax" "\n\t"
243 "movq 8(%%rsi),%%rbx" "\n\t"
244 "lock; addw %%bx,(%%rax)" "\n"
245 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
246 );
247 #elif defined(VGA_ppc32)
248 /* Nasty hack. Does correctly atomically do *p += n, but only if p
249 is 8-aligned -- guaranteed by caller. */
250 unsigned long success;
251 do {
252 __asm__ __volatile__(
253 "lwarx 15,0,%1" "\n\t"
254 "add 15,15,%2" "\n\t"
255 "stwcx. 15,0,%1" "\n\t"
256 "mfcr %0" "\n\t"
257 "srwi %0,%0,29" "\n\t"
258 "andi. %0,%0,1" "\n"
259 : /*out*/"=b"(success)
260 : /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
261 : /*trash*/ "memory", "cc", "r15"
262 );
263 } while (success != 1);
264 #elif defined(VGA_ppc64)
265 /* Nasty hack. Does correctly atomically do *p += n, but only if p
266 is 8-aligned -- guaranteed by caller. */
267 unsigned long success;
268 do {
269 __asm__ __volatile__(
270 "ldarx 15,0,%1" "\n\t"
271 "add 15,15,%2" "\n\t"
272 "stdcx. 15,0,%1" "\n\t"
273 "mfcr %0" "\n\t"
274 "srwi %0,%0,29" "\n\t"
275 "andi. %0,%0,1" "\n"
276 : /*out*/"=b"(success)
277 : /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
278 : /*trash*/ "memory", "cc", "r15"
279 );
280 } while (success != 1);
281 #elif defined(VGA_arm)
282 unsigned int block[3]
283 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
284 do {
285 __asm__ __volatile__(
286 "mov r5, %0" "\n\t"
287 "ldr r9, [r5, #0]" "\n\t" // p
288 "ldr r10, [r5, #4]" "\n\t" // n
289 "ldrexh r8, [r9]" "\n\t"
290 "add r8, r8, r10" "\n\t"
291 "strexh r4, r8, [r9]" "\n\t"
292 "str r4, [r5, #8]" "\n\t"
293 : /*out*/
294 : /*in*/ "r"(&block[0])
295 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
296 );
297 } while (block[2] != 0);
298 #elif defined(VGA_arm64)
299 unsigned long long int block[3]
300 = { (unsigned long long int)p, (unsigned long long int)n,
301 0xFFFFFFFFFFFFFFFFULL};
302 do {
303 __asm__ __volatile__(
304 "mov x5, %0" "\n\t"
305 "ldr x9, [x5, #0]" "\n\t" // p
306 "ldr x10, [x5, #8]" "\n\t" // n
307 "ldxrh w8, [x9]" "\n\t"
308 "add x8, x8, x10" "\n\t"
309 "stxrh w4, w8, [x9]" "\n\t"
310 "str x4, [x5, #16]" "\n\t"
311 : /*out*/
312 : /*in*/ "r"(&block[0])
313 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
314 );
315 } while (block[2] != 0);
316 #elif defined(VGA_s390x)
317 int dummy;
318 __asm__ __volatile__(
319 " l 0,%0\n\t"
320 "0: st 0,%1\n\t"
321 " icm 1,3,%1\n\t"
322 " ar 1,%2\n\t"
323 " stcm 1,3,%1\n\t"
324 " l 1,%1\n\t"
325 " cs 0,1,%0\n\t"
326 " jl 0b\n\t"
327 : "+m" (*p), "+m" (dummy)
328 : "d" (n)
329 : "cc", "memory", "0", "1");
330 #elif defined(VGA_mips32)
331 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
332 exception that can cause this function to fail. */
333 #if defined (_MIPSEL)
334 unsigned int block[3]
335 = { (unsigned int)p, (unsigned int)n, 0x0 };
336 do {
337 __asm__ __volatile__(
338 "move $t0, %0" "\n\t"
339 "lw $t1, 0($t0)" "\n\t" // p
340 "lw $t2, 4($t0)" "\n\t" // n
341 "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF
342 "li $t4, 0xFFFF" "\n\t"
343 "nor $t4, $t4, $zero" "\n\t" // $t4 = 0xFFFF0000
344 "ll $t3, 0($t1)" "\n\t" // $t3 = old value
345 "and $t4, $t4, $t3" "\n\t" // $t4 = $t3 and 0xFFFF0000
346 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
347 "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF
348 "or $t3, $t3, $t4" "\n\t" // $t3 = $t3 or $t4
349 "sc $t3, 0($t1)" "\n\t"
350 "sw $t3, 8($t0)" "\n\t" // save result
351 : /*out*/
352 : /*in*/ "r"(&block[0])
353 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "t4"
354 );
355 } while (block[2] != 1);
356 #elif defined (_MIPSEB)
357 unsigned int block[3]
358 = { (unsigned int)p, (unsigned int)n << 16, 0x0 };
359 do {
360 __asm__ __volatile__(
361 "move $t0, %0" "\n\t"
362 "lw $t1, 0($t0)" "\n\t" // p
363 "lw $t2, 4($t0)" "\n\t" // n
364 "ll $t3, 0($t1)" "\n\t"
365 "addu $t3, $t3, $t2" "\n\t"
366 "sc $t3, 0($t1)" "\n\t"
367 "sw $t3, 8($t0)" "\n\t"
368 : /*out*/
369 : /*in*/ "r"(&block[0])
370 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
371 );
372 } while (block[2] != 1);
373 #endif
374 #elif defined(VGA_mips64)
375 /* We rely on the fact that p is 4-aligned. Otherwise 'll' may throw an
376 exception that can cause this function to fail. */
377 #if defined (_MIPSEL)
378 unsigned long block[3]
379 = { (unsigned long)p, (unsigned long)n, 0x0ULL };
380 do {
381 __asm__ __volatile__(
382 "move $t0, %0" "\n\t"
383 "ld $t1, 0($t0)" "\n\t" // p
384 "ld $t2, 8($t0)" "\n\t" // n
385 "andi $t2, $t2, 0xFFFF" "\n\t" // n = n and 0xFFFF
386 "li $s0, 0xFFFF" "\n\t"
387 "nor $s0, $s0, $zero" "\n\t" // $s0= 0xFFFF0000
388 "ll $t3, 0($t1)" "\n\t" // $t3 = old value
389 "and $s0, $s0, $t3" "\n\t" // $s0 = $t3 and 0xFFFF0000
390 "addu $t3, $t3, $t2" "\n\t" // $t3 = $t3 + n
391 "andi $t3, $t3, 0xFFFF" "\n\t" // $t3 = $t3 and 0xFFFF
392 "or $t3, $t3, $s0" "\n\t" // $t3 = $t3 or $s0
393 "sc $t3, 0($t1)" "\n\t"
394 "sw $t3, 16($t0)" "\n\t" // save result
395 : /*out*/
396 : /*in*/ "r"(&block[0])
397 : /*trash*/ "memory", "t0", "t1", "t2", "t3", "s0"
398 );
399 } while (block[2] != 1);
400 #elif defined (_MIPSEB)
401 unsigned long block[3]
402 = { (unsigned long)p, (unsigned long)n << 48, 0x0 };
403 do {
404 __asm__ __volatile__(
405 "move $t0, %0" "\n\t"
406 "ld $t1, 0($t0)" "\n\t" // p
407 "ld $t2, 8($t0)" "\n\t" // n
408 "lld $t3, 0($t1)" "\n\t"
409 "daddu $t3, $t3, $t2" "\n\t"
410 "scd $t3, 0($t1)" "\n\t"
411 "sd $t3, 16($t0)" "\n\t"
412 : /*out*/
413 : /*in*/ "r"(&block[0])
414 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
415 );
416 } while (block[2] != 1);
417 #endif
418 #else
419 # error "Unsupported arch"
420 #endif
421 }
422
atomic_add_32bit(int * p,int n)423 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
424 {
425 #if defined(VGA_x86)
426 unsigned long block[2];
427 block[0] = (unsigned long)p;
428 block[1] = n;
429 __asm__ __volatile__(
430 "movl 0(%%esi),%%eax" "\n\t"
431 "movl 4(%%esi),%%ebx" "\n\t"
432 "lock; addl %%ebx,(%%eax)" "\n"
433 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
434 );
435 #elif defined(VGA_amd64)
436 unsigned long block[2];
437 block[0] = (unsigned long)p;
438 block[1] = n;
439 __asm__ __volatile__(
440 "movq 0(%%rsi),%%rax" "\n\t"
441 "movq 8(%%rsi),%%rbx" "\n\t"
442 "lock; addl %%ebx,(%%rax)" "\n"
443 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
444 );
445 #elif defined(VGA_ppc32)
446 unsigned long success;
447 do {
448 __asm__ __volatile__(
449 "lwarx 15,0,%1" "\n\t"
450 "add 15,15,%2" "\n\t"
451 "stwcx. 15,0,%1" "\n\t"
452 "mfcr %0" "\n\t"
453 "srwi %0,%0,29" "\n\t"
454 "andi. %0,%0,1" "\n"
455 : /*out*/"=b"(success)
456 : /*in*/ "b"(p), "b"(n)
457 : /*trash*/ "memory", "cc", "r15"
458 );
459 } while (success != 1);
460 #elif defined(VGA_ppc64)
461 /* Nasty hack. Does correctly atomically do *p += n, but only if p
462 is 8-aligned -- guaranteed by caller. */
463 unsigned long success;
464 do {
465 __asm__ __volatile__(
466 "ldarx 15,0,%1" "\n\t"
467 "add 15,15,%2" "\n\t"
468 "stdcx. 15,0,%1" "\n\t"
469 "mfcr %0" "\n\t"
470 "srwi %0,%0,29" "\n\t"
471 "andi. %0,%0,1" "\n"
472 : /*out*/"=b"(success)
473 : /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
474 : /*trash*/ "memory", "cc", "r15"
475 );
476 } while (success != 1);
477 #elif defined(VGA_arm)
478 unsigned int block[3]
479 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
480 do {
481 __asm__ __volatile__(
482 "mov r5, %0" "\n\t"
483 "ldr r9, [r5, #0]" "\n\t" // p
484 "ldr r10, [r5, #4]" "\n\t" // n
485 "ldrex r8, [r9]" "\n\t"
486 "add r8, r8, r10" "\n\t"
487 "strex r4, r8, [r9]" "\n\t"
488 "str r4, [r5, #8]" "\n\t"
489 : /*out*/
490 : /*in*/ "r"(&block[0])
491 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10", "r4"
492 );
493 } while (block[2] != 0);
494 #elif defined(VGA_arm64)
495 unsigned long long int block[3]
496 = { (unsigned long long int)p, (unsigned long long int)n,
497 0xFFFFFFFFFFFFFFFFULL};
498 do {
499 __asm__ __volatile__(
500 "mov x5, %0" "\n\t"
501 "ldr x9, [x5, #0]" "\n\t" // p
502 "ldr x10, [x5, #8]" "\n\t" // n
503 "ldxr w8, [x9]" "\n\t"
504 "add x8, x8, x10" "\n\t"
505 "stxr w4, w8, [x9]" "\n\t"
506 "str x4, [x5, #16]" "\n\t"
507 : /*out*/
508 : /*in*/ "r"(&block[0])
509 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
510 );
511 } while (block[2] != 0);
512 #elif defined(VGA_s390x)
513 __asm__ __volatile__(
514 " l 0,%0\n\t"
515 "0: lr 1,0\n\t"
516 " ar 1,%1\n\t"
517 " cs 0,1,%0\n\t"
518 " jl 0b\n\t"
519 : "+m" (*p)
520 : "d" (n)
521 : "cc", "memory", "0", "1");
522 #elif defined(VGA_mips32)
523 unsigned int block[3]
524 = { (unsigned int)p, (unsigned int)n, 0x0 };
525 do {
526 __asm__ __volatile__(
527 "move $t0, %0" "\n\t"
528 "lw $t1, 0($t0)" "\n\t" // p
529 "lw $t2, 4($t0)" "\n\t" // n
530 "ll $t3, 0($t1)" "\n\t"
531 "addu $t3, $t3, $t2" "\n\t"
532 "sc $t3, 0($t1)" "\n\t"
533 "sw $t3, 8($t0)" "\n\t"
534 : /*out*/
535 : /*in*/ "r"(&block[0])
536 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
537 );
538 } while (block[2] != 1);
539 #elif defined(VGA_mips64)
540 unsigned long block[3]
541 = { (unsigned long)p, (unsigned long)n, 0x0ULL };
542 do {
543 __asm__ __volatile__(
544 "move $t0, %0" "\n\t"
545 "ld $t1, 0($t0)" "\n\t" // p
546 "ld $t2, 8($t0)" "\n\t" // n
547 "ll $t3, 0($t1)" "\n\t"
548 "addu $t3, $t3, $t2" "\n\t"
549 "sc $t3, 0($t1)" "\n\t"
550 "sd $t3, 16($t0)" "\n\t"
551 : /*out*/
552 : /*in*/ "r"(&block[0])
553 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
554 );
555 } while (block[2] != 1);
556 #else
557 # error "Unsupported arch"
558 #endif
559 }
560
atomic_add_64bit(long long int * p,int n)561 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
562 {
563 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32)
564 /* do nothing; is not supported */
565 #elif defined(VGA_amd64)
566 // this is a bit subtle. It relies on the fact that, on a 64-bit platform,
567 // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
568 unsigned long long int block[2];
569 block[0] = (unsigned long long int)(unsigned long)p;
570 block[1] = n;
571 __asm__ __volatile__(
572 "movq 0(%%rsi),%%rax" "\n\t"
573 "movq 8(%%rsi),%%rbx" "\n\t"
574 "lock; addq %%rbx,(%%rax)" "\n"
575 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
576 );
577 #elif defined(VGA_ppc64)
578 unsigned long success;
579 do {
580 __asm__ __volatile__(
581 "ldarx 15,0,%1" "\n\t"
582 "add 15,15,%2" "\n\t"
583 "stdcx. 15,0,%1" "\n\t"
584 "mfcr %0" "\n\t"
585 "srwi %0,%0,29" "\n\t"
586 "andi. %0,%0,1" "\n"
587 : /*out*/"=b"(success)
588 : /*in*/ "b"(p), "b"(n)
589 : /*trash*/ "memory", "cc", "r15"
590 );
591 } while (success != 1);
592 #elif defined(VGA_arm)
593 unsigned long long int block[3]
594 = { (unsigned long long int)(unsigned long)p,
595 (unsigned long long int)n,
596 0xFFFFFFFFFFFFFFFFULL };
597 do {
598 __asm__ __volatile__(
599 "mov r5, %0" "\n\t"
600 "ldr r8, [r5, #0]" "\n\t" // p
601 "ldrd r2, r3, [r5, #8]" "\n\t" // n
602 "ldrexd r0, r1, [r8]" "\n\t"
603 "adds r2, r2, r0" "\n\t"
604 "adc r3, r3, r1" "\n\t"
605 "strexd r1, r2, r3, [r8]" "\n\t"
606 "str r1, [r5, #16]" "\n\t"
607 : /*out*/
608 : /*in*/ "r"(&block[0])
609 : /*trash*/ "memory", "cc", "r5", "r0", "r1", "r8", "r2", "r3"
610 );
611 } while (block[2] != 0xFFFFFFFF00000000ULL);
612 #elif defined(VGA_arm64)
613 unsigned long long int block[3]
614 = { (unsigned long long int)p, (unsigned long long int)n,
615 0xFFFFFFFFFFFFFFFFULL};
616 do {
617 __asm__ __volatile__(
618 "mov x5, %0" "\n\t"
619 "ldr x9, [x5, #0]" "\n\t" // p
620 "ldr x10, [x5, #8]" "\n\t" // n
621 "ldxr x8, [x9]" "\n\t"
622 "add x8, x8, x10" "\n\t"
623 "stxr w4, x8, [x9]" "\n\t"
624 "str x4, [x5, #16]" "\n\t"
625 : /*out*/
626 : /*in*/ "r"(&block[0])
627 : /*trash*/ "memory", "cc", "x5", "x8", "x9", "x10", "x4"
628 );
629 } while (block[2] != 0);
630 #elif defined(VGA_s390x)
631 __asm__ __volatile__(
632 " lg 0,%0\n\t"
633 "0: lgr 1,0\n\t"
634 " agr 1,%1\n\t"
635 " csg 0,1,%0\n\t"
636 " jl 0b\n\t"
637 : "+m" (*p)
638 : "d" (n)
639 : "cc", "memory", "0", "1");
640 #elif defined(VGA_mips64)
641 unsigned long block[3]
642 = { (unsigned long)p, (unsigned long)n, 0x0ULL };
643 do {
644 __asm__ __volatile__(
645 "move $t0, %0" "\n\t"
646 "ld $t1, 0($t0)" "\n\t" // p
647 "ld $t2, 8($t0)" "\n\t" // n
648 "lld $t3, 0($t1)" "\n\t"
649 "daddu $t3, $t3, $t2" "\n\t"
650 "scd $t3, 0($t1)" "\n\t"
651 "sd $t3, 16($t0)" "\n\t"
652 : /*out*/
653 : /*in*/ "r"(&block[0])
654 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
655 );
656 } while (block[2] != 1);
657 #else
658 # error "Unsupported arch"
659 #endif
660 }
661
main(int argc,char ** argv)662 int main ( int argc, char** argv )
663 {
664 int i, status;
665 char* page;
666 char* p8;
667 short* p16;
668 int* p32;
669 long long int* p64;
670 pid_t child, p2;
671
672 printf("parent, pre-fork\n");
673
674 page = mmap( 0, sysconf(_SC_PAGESIZE),
675 PROT_READ|PROT_WRITE,
676 MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
677 if (page == MAP_FAILED) {
678 perror("mmap failed");
679 exit(1);
680 }
681
682 p8 = (char*)(page+0);
683 p16 = (short*)(page+256);
684 p32 = (int*)(page+512);
685 p64 = (long long int*)(page+768);
686
687 assert( IS_8_ALIGNED(p8) );
688 assert( IS_8_ALIGNED(p16) );
689 assert( IS_8_ALIGNED(p32) );
690 assert( IS_8_ALIGNED(p64) );
691
692 memset(page, 0, 1024);
693
694 *p8 = 0;
695 *p16 = 0;
696 *p32 = 0;
697 *p64 = 0;
698
699 child = fork();
700 if (child == -1) {
701 perror("fork() failed\n");
702 return 1;
703 }
704
705 if (child == 0) {
706 /* --- CHILD --- */
707 printf("child\n");
708 for (i = 0; i < NNN; i++) {
709 atomic_add_8bit(p8, 1);
710 atomic_add_16bit(p16, 1);
711 atomic_add_32bit(p32, 1);
712 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
713 }
714 return 1;
715 /* NOTREACHED */
716
717 }
718
719 /* --- PARENT --- */
720
721 printf("parent\n");
722
723 for (i = 0; i < NNN; i++) {
724 atomic_add_8bit(p8, 1);
725 atomic_add_16bit(p16, 1);
726 atomic_add_32bit(p32, 1);
727 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
728 }
729
730 p2 = waitpid(child, &status, 0);
731 assert(p2 == child);
732
733 /* assert that child finished normally */
734 assert(WIFEXITED(status));
735
736 printf("FINAL VALUES: 8 bit %d, 16 bit %d, 32 bit %d, 64 bit %lld\n",
737 (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
738
739 if (-74 == (int)(*(signed char*)p8)
740 && 32694 == (int)(*p16)
741 && 6913974 == *p32
742 && (0LL == *p64 || 682858642110LL == *p64)) {
743 printf("PASS\n");
744 } else {
745 printf("FAIL -- see source code for expected values\n");
746 }
747
748 printf("parent exits\n");
749
750 return 0;
751 }
752