1
2 /* This is an example of a program which does atomic memory operations
3 between two processes which share a page. Valgrind 3.4.1 and
4 earlier produce incorrect answers because it does not preserve
5 atomicity of the relevant instructions in the generated code; but
6 the post-DCAS-merge versions of Valgrind do behave correctly. */
7
8 #include <stdlib.h>
9 #include <stdio.h>
10 #include <string.h>
11 #include <assert.h>
12 #include <unistd.h>
13 #include <sys/wait.h>
14 #include "tests/sys_mman.h"
15
16 #define NNN 3456987
17
18 #define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7))
19
20
atomic_add_8bit(char * p,int n)21 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
22 {
23 #if defined(VGA_x86)
24 unsigned long block[2];
25 block[0] = (unsigned long)p;
26 block[1] = n;
27 __asm__ __volatile__(
28 "movl 0(%%esi),%%eax" "\n\t"
29 "movl 4(%%esi),%%ebx" "\n\t"
30 "lock; addb %%bl,(%%eax)" "\n"
31 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
32 );
33 #elif defined(VGA_amd64)
34 unsigned long block[2];
35 block[0] = (unsigned long)p;
36 block[1] = n;
37 __asm__ __volatile__(
38 "movq 0(%%rsi),%%rax" "\n\t"
39 "movq 8(%%rsi),%%rbx" "\n\t"
40 "lock; addb %%bl,(%%rax)" "\n"
41 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
42 );
43 #elif defined(VGA_ppc32)
44 /* Nasty hack. Does correctly atomically do *p += n, but only if p
45 is 4-aligned -- guaranteed by caller. */
46 unsigned long success;
47 do {
48 __asm__ __volatile__(
49 "lwarx 15,0,%1" "\n\t"
50 "add 15,15,%2" "\n\t"
51 "stwcx. 15,0,%1" "\n\t"
52 "mfcr %0" "\n\t"
53 "srwi %0,%0,29" "\n\t"
54 "andi. %0,%0,1" "\n"
55 : /*out*/"=b"(success)
56 : /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
57 : /*trash*/ "memory", "cc", "r15"
58 );
59 } while (success != 1);
60 #elif defined(VGA_ppc64)
61 /* Nasty hack. Does correctly atomically do *p += n, but only if p
62 is 8-aligned -- guaranteed by caller. */
63 unsigned long success;
64 do {
65 __asm__ __volatile__(
66 "ldarx 15,0,%1" "\n\t"
67 "add 15,15,%2" "\n\t"
68 "stdcx. 15,0,%1" "\n\t"
69 "mfcr %0" "\n\t"
70 "srwi %0,%0,29" "\n\t"
71 "andi. %0,%0,1" "\n"
72 : /*out*/"=b"(success)
73 : /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
74 : /*trash*/ "memory", "cc", "r15"
75 );
76 } while (success != 1);
77 #elif defined(VGA_arm)
78 *p += n;
79 #else
80 # error "Unsupported arch"
81 #endif
82 }
83
84
atomic_add_16bit(short * p,int n)85 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
86 {
87 #if defined(VGA_x86)
88 unsigned long block[2];
89 block[0] = (unsigned long)p;
90 block[1] = n;
91 __asm__ __volatile__(
92 "movl 0(%%esi),%%eax" "\n\t"
93 "movl 4(%%esi),%%ebx" "\n\t"
94 "lock; addw %%bx,(%%eax)" "\n"
95 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
96 );
97 #elif defined(VGA_amd64)
98 unsigned long block[2];
99 block[0] = (unsigned long)p;
100 block[1] = n;
101 __asm__ __volatile__(
102 "movq 0(%%rsi),%%rax" "\n\t"
103 "movq 8(%%rsi),%%rbx" "\n\t"
104 "lock; addw %%bx,(%%rax)" "\n"
105 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
106 );
107 #elif defined(VGA_ppc32)
108 /* Nasty hack. Does correctly atomically do *p += n, but only if p
109 is 8-aligned -- guaranteed by caller. */
110 unsigned long success;
111 do {
112 __asm__ __volatile__(
113 "lwarx 15,0,%1" "\n\t"
114 "add 15,15,%2" "\n\t"
115 "stwcx. 15,0,%1" "\n\t"
116 "mfcr %0" "\n\t"
117 "srwi %0,%0,29" "\n\t"
118 "andi. %0,%0,1" "\n"
119 : /*out*/"=b"(success)
120 : /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
121 : /*trash*/ "memory", "cc", "r15"
122 );
123 } while (success != 1);
124 #elif defined(VGA_ppc64)
125 /* Nasty hack. Does correctly atomically do *p += n, but only if p
126 is 8-aligned -- guaranteed by caller. */
127 unsigned long success;
128 do {
129 __asm__ __volatile__(
130 "ldarx 15,0,%1" "\n\t"
131 "add 15,15,%2" "\n\t"
132 "stdcx. 15,0,%1" "\n\t"
133 "mfcr %0" "\n\t"
134 "srwi %0,%0,29" "\n\t"
135 "andi. %0,%0,1" "\n"
136 : /*out*/"=b"(success)
137 : /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
138 : /*trash*/ "memory", "cc", "r15"
139 );
140 } while (success != 1);
141 #elif defined(VGA_arm)
142 *p += n;
143 #else
144 # error "Unsupported arch"
145 #endif
146 }
147
atomic_add_32bit(int * p,int n)148 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
149 {
150 #if defined(VGA_x86)
151 unsigned long block[2];
152 block[0] = (unsigned long)p;
153 block[1] = n;
154 __asm__ __volatile__(
155 "movl 0(%%esi),%%eax" "\n\t"
156 "movl 4(%%esi),%%ebx" "\n\t"
157 "lock; addl %%ebx,(%%eax)" "\n"
158 : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
159 );
160 #elif defined(VGA_amd64)
161 unsigned long block[2];
162 block[0] = (unsigned long)p;
163 block[1] = n;
164 __asm__ __volatile__(
165 "movq 0(%%rsi),%%rax" "\n\t"
166 "movq 8(%%rsi),%%rbx" "\n\t"
167 "lock; addl %%ebx,(%%rax)" "\n"
168 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
169 );
170 #elif defined(VGA_ppc32)
171 unsigned long success;
172 do {
173 __asm__ __volatile__(
174 "lwarx 15,0,%1" "\n\t"
175 "add 15,15,%2" "\n\t"
176 "stwcx. 15,0,%1" "\n\t"
177 "mfcr %0" "\n\t"
178 "srwi %0,%0,29" "\n\t"
179 "andi. %0,%0,1" "\n"
180 : /*out*/"=b"(success)
181 : /*in*/ "b"(p), "b"(n)
182 : /*trash*/ "memory", "cc", "r15"
183 );
184 } while (success != 1);
185 #elif defined(VGA_ppc64)
186 /* Nasty hack. Does correctly atomically do *p += n, but only if p
187 is 8-aligned -- guaranteed by caller. */
188 unsigned long success;
189 do {
190 __asm__ __volatile__(
191 "ldarx 15,0,%1" "\n\t"
192 "add 15,15,%2" "\n\t"
193 "stdcx. 15,0,%1" "\n\t"
194 "mfcr %0" "\n\t"
195 "srwi %0,%0,29" "\n\t"
196 "andi. %0,%0,1" "\n"
197 : /*out*/"=b"(success)
198 : /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
199 : /*trash*/ "memory", "cc", "r15"
200 );
201 } while (success != 1);
202 #elif defined(VGA_arm)
203 unsigned int block[3]
204 = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
205 do {
206 __asm__ __volatile__(
207 "mov r5, %0" "\n\t"
208 "ldr r9, [r5, #0]" "\n\t" // p
209 "ldr r10, [r5, #4]" "\n\t" // n
210 "ldrex r8, [r9]" "\n\t"
211 "add r8, r8, r10" "\n\t"
212 "strex r11, r8, [r9]" "\n\t"
213 "str r11, [r5, #8]" "\n\t"
214 : /*out*/
215 : /*in*/ "r"(&block[0])
216 : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10"
217 );
218 } while (block[2] != 0);
219 #else
220 # error "Unsupported arch"
221 #endif
222 }
223
atomic_add_64bit(long long int * p,int n)224 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
225 {
226 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_arm)
227 /* do nothing; is not supported */
228 #elif defined(VGA_amd64)
229 // this is a bit subtle. It relies on the fact that, on a 64-bit platform,
230 // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
231 unsigned long long int block[2];
232 block[0] = (unsigned long long int)(unsigned long)p;
233 block[1] = n;
234 __asm__ __volatile__(
235 "movq 0(%%rsi),%%rax" "\n\t"
236 "movq 8(%%rsi),%%rbx" "\n\t"
237 "lock; addq %%rbx,(%%rax)" "\n"
238 : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
239 );
240 #elif defined(VGA_ppc64)
241 unsigned long success;
242 do {
243 __asm__ __volatile__(
244 "ldarx 15,0,%1" "\n\t"
245 "add 15,15,%2" "\n\t"
246 "stdcx. 15,0,%1" "\n\t"
247 "mfcr %0" "\n\t"
248 "srwi %0,%0,29" "\n\t"
249 "andi. %0,%0,1" "\n"
250 : /*out*/"=b"(success)
251 : /*in*/ "b"(p), "b"(n)
252 : /*trash*/ "memory", "cc", "r15"
253 );
254 } while (success != 1);
255 #else
256 # error "Unsupported arch"
257 #endif
258 }
259
main(int argc,char ** argv)260 int main ( int argc, char** argv )
261 {
262 int i, status;
263 char* page;
264 char* p8;
265 short* p16;
266 int* p32;
267 long long int* p64;
268 pid_t child, p2;
269
270 printf("parent, pre-fork\n");
271
272 page = mmap( 0, sysconf(_SC_PAGESIZE),
273 PROT_READ|PROT_WRITE,
274 MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
275 if (page == MAP_FAILED) {
276 perror("mmap failed");
277 exit(1);
278 }
279
280 p8 = (char*)(page+0);
281 p16 = (short*)(page+256);
282 p32 = (int*)(page+512);
283 p64 = (long long int*)(page+768);
284
285 assert( IS_8_ALIGNED(p8) );
286 assert( IS_8_ALIGNED(p16) );
287 assert( IS_8_ALIGNED(p32) );
288 assert( IS_8_ALIGNED(p64) );
289
290 memset(page, 0, 1024);
291
292 *p8 = 0;
293 *p16 = 0;
294 *p32 = 0;
295 *p64 = 0;
296
297 child = fork();
298 if (child == -1) {
299 perror("fork() failed\n");
300 return 1;
301 }
302
303 if (child == 0) {
304 /* --- CHILD --- */
305 printf("child\n");
306 for (i = 0; i < NNN; i++) {
307 atomic_add_8bit(p8, 1);
308 atomic_add_16bit(p16, 1);
309 atomic_add_32bit(p32, 1);
310 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
311 }
312 return 1;
313 /* NOTREACHED */
314
315 }
316
317 /* --- PARENT --- */
318
319 printf("parent\n");
320
321 for (i = 0; i < NNN; i++) {
322 atomic_add_8bit(p8, 1);
323 atomic_add_16bit(p16, 1);
324 atomic_add_32bit(p32, 1);
325 atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
326 }
327
328 p2 = waitpid(child, &status, 0);
329 assert(p2 == child);
330
331 /* assert that child finished normally */
332 assert(WIFEXITED(status));
333
334 printf("FINAL VALUES: 8 bit %d, 16 bit %d, 32 bit %d, 64 bit %lld\n",
335 (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
336
337 if (-74 == (int)(*(signed char*)p8)
338 && 32694 == (int)(*p16)
339 && 6913974 == *p32
340 && (0LL == *p64 || 682858642110LL == *p64)) {
341 printf("PASS\n");
342 } else {
343 printf("FAIL -- see source code for expected values\n");
344 }
345
346 printf("parent exits\n");
347
348 return 0;
349 }
350