• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 
2 /* This is an example of a program which does atomic memory operations
3    between two processes which share a page.  Valgrind 3.4.1 and
4    earlier produce incorrect answers because it does not preserve
5    atomicity of the relevant instructions in the generated code; but
6    the post-DCAS-merge versions of Valgrind do behave correctly. */
7 
8 #include <stdlib.h>
9 #include <stdio.h>
10 #include <string.h>
11 #include <assert.h>
12 #include <unistd.h>
13 #include <sys/wait.h>
14 #include "tests/sys_mman.h"
15 
16 #define NNN 3456987
17 
18 #define IS_8_ALIGNED(_ptr)   (0 == (((unsigned long)(_ptr)) & 7))
19 
20 
atomic_add_8bit(char * p,int n)21 __attribute__((noinline)) void atomic_add_8bit ( char* p, int n )
22 {
23 #if defined(VGA_x86)
24    unsigned long block[2];
25    block[0] = (unsigned long)p;
26    block[1] = n;
27    __asm__ __volatile__(
28       "movl 0(%%esi),%%eax"      "\n\t"
29       "movl 4(%%esi),%%ebx"      "\n\t"
30       "lock; addb %%bl,(%%eax)"  "\n"
31       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
32    );
33 #elif defined(VGA_amd64)
34    unsigned long block[2];
35    block[0] = (unsigned long)p;
36    block[1] = n;
37    __asm__ __volatile__(
38       "movq 0(%%rsi),%%rax"      "\n\t"
39       "movq 8(%%rsi),%%rbx"      "\n\t"
40       "lock; addb %%bl,(%%rax)"  "\n"
41       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
42    );
43 #elif defined(VGA_ppc32)
44    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
45       is 4-aligned -- guaranteed by caller. */
46    unsigned long success;
47    do {
48       __asm__ __volatile__(
49          "lwarx  15,0,%1"    "\n\t"
50          "add    15,15,%2"   "\n\t"
51          "stwcx. 15,0,%1"    "\n\t"
52          "mfcr   %0"         "\n\t"
53          "srwi   %0,%0,29"   "\n\t"
54          "andi.  %0,%0,1"    "\n"
55          : /*out*/"=b"(success)
56          : /*in*/ "b"(p), "b"(((unsigned long)n) << 24)
57          : /*trash*/ "memory", "cc", "r15"
58       );
59    } while (success != 1);
60 #elif defined(VGA_ppc64)
61    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
62       is 8-aligned -- guaranteed by caller. */
63    unsigned long success;
64    do {
65       __asm__ __volatile__(
66          "ldarx  15,0,%1"    "\n\t"
67          "add    15,15,%2"   "\n\t"
68          "stdcx. 15,0,%1"    "\n\t"
69          "mfcr   %0"         "\n\t"
70          "srwi   %0,%0,29"   "\n\t"
71          "andi.  %0,%0,1"    "\n"
72          : /*out*/"=b"(success)
73          : /*in*/ "b"(p), "b"(((unsigned long)n) << 56)
74          : /*trash*/ "memory", "cc", "r15"
75       );
76    } while (success != 1);
77 #elif defined(VGA_arm)
78    *p += n;
79 #else
80 # error "Unsupported arch"
81 #endif
82 }
83 
84 
atomic_add_16bit(short * p,int n)85 __attribute__((noinline)) void atomic_add_16bit ( short* p, int n )
86 {
87 #if defined(VGA_x86)
88    unsigned long block[2];
89    block[0] = (unsigned long)p;
90    block[1] = n;
91    __asm__ __volatile__(
92       "movl 0(%%esi),%%eax"      "\n\t"
93       "movl 4(%%esi),%%ebx"      "\n\t"
94       "lock; addw %%bx,(%%eax)"  "\n"
95       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
96    );
97 #elif defined(VGA_amd64)
98    unsigned long block[2];
99    block[0] = (unsigned long)p;
100    block[1] = n;
101    __asm__ __volatile__(
102       "movq 0(%%rsi),%%rax"      "\n\t"
103       "movq 8(%%rsi),%%rbx"      "\n\t"
104       "lock; addw %%bx,(%%rax)"  "\n"
105       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
106    );
107 #elif defined(VGA_ppc32)
108    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
109       is 8-aligned -- guaranteed by caller. */
110    unsigned long success;
111    do {
112       __asm__ __volatile__(
113          "lwarx  15,0,%1"    "\n\t"
114          "add    15,15,%2"   "\n\t"
115          "stwcx. 15,0,%1"    "\n\t"
116          "mfcr   %0"         "\n\t"
117          "srwi   %0,%0,29"   "\n\t"
118          "andi.  %0,%0,1"    "\n"
119          : /*out*/"=b"(success)
120          : /*in*/ "b"(p), "b"(((unsigned long)n) << 16)
121          : /*trash*/ "memory", "cc", "r15"
122       );
123    } while (success != 1);
124 #elif defined(VGA_ppc64)
125    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
126       is 8-aligned -- guaranteed by caller. */
127    unsigned long success;
128    do {
129       __asm__ __volatile__(
130          "ldarx  15,0,%1"    "\n\t"
131          "add    15,15,%2"   "\n\t"
132          "stdcx. 15,0,%1"    "\n\t"
133          "mfcr   %0"         "\n\t"
134          "srwi   %0,%0,29"   "\n\t"
135          "andi.  %0,%0,1"    "\n"
136          : /*out*/"=b"(success)
137          : /*in*/ "b"(p), "b"(((unsigned long)n) << 48)
138          : /*trash*/ "memory", "cc", "r15"
139       );
140    } while (success != 1);
141 #elif defined(VGA_arm)
142    *p += n;
143 #else
144 # error "Unsupported arch"
145 #endif
146 }
147 
atomic_add_32bit(int * p,int n)148 __attribute__((noinline)) void atomic_add_32bit ( int* p, int n )
149 {
150 #if defined(VGA_x86)
151    unsigned long block[2];
152    block[0] = (unsigned long)p;
153    block[1] = n;
154    __asm__ __volatile__(
155       "movl 0(%%esi),%%eax"       "\n\t"
156       "movl 4(%%esi),%%ebx"       "\n\t"
157       "lock; addl %%ebx,(%%eax)"  "\n"
158       : : "S"(&block[0])/* S means "esi only" */ : "memory","cc","eax","ebx"
159    );
160 #elif defined(VGA_amd64)
161    unsigned long block[2];
162    block[0] = (unsigned long)p;
163    block[1] = n;
164    __asm__ __volatile__(
165       "movq 0(%%rsi),%%rax"       "\n\t"
166       "movq 8(%%rsi),%%rbx"       "\n\t"
167       "lock; addl %%ebx,(%%rax)"  "\n"
168       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
169    );
170 #elif defined(VGA_ppc32)
171    unsigned long success;
172    do {
173       __asm__ __volatile__(
174          "lwarx  15,0,%1"    "\n\t"
175          "add    15,15,%2"   "\n\t"
176          "stwcx. 15,0,%1"    "\n\t"
177          "mfcr   %0"         "\n\t"
178          "srwi   %0,%0,29"   "\n\t"
179          "andi.  %0,%0,1"    "\n"
180          : /*out*/"=b"(success)
181          : /*in*/ "b"(p), "b"(n)
182          : /*trash*/ "memory", "cc", "r15"
183       );
184    } while (success != 1);
185 #elif defined(VGA_ppc64)
186    /* Nasty hack.  Does correctly atomically do *p += n, but only if p
187       is 8-aligned -- guaranteed by caller. */
188    unsigned long success;
189    do {
190       __asm__ __volatile__(
191          "ldarx  15,0,%1"    "\n\t"
192          "add    15,15,%2"   "\n\t"
193          "stdcx. 15,0,%1"    "\n\t"
194          "mfcr   %0"         "\n\t"
195          "srwi   %0,%0,29"   "\n\t"
196          "andi.  %0,%0,1"    "\n"
197          : /*out*/"=b"(success)
198          : /*in*/ "b"(p), "b"(((unsigned long)n) << 32)
199          : /*trash*/ "memory", "cc", "r15"
200       );
201    } while (success != 1);
202 #elif defined(VGA_arm)
203    unsigned int block[3]
204       = { (unsigned int)p, (unsigned int)n, 0xFFFFFFFF };
205    do {
206       __asm__ __volatile__(
207          "mov   r5, %0"         "\n\t"
208          "ldr   r9, [r5, #0]"   "\n\t" // p
209          "ldr   r10, [r5, #4]"  "\n\t" // n
210          "ldrex r8, [r9]"       "\n\t"
211          "add   r8, r8, r10"    "\n\t"
212          "strex r11, r8, [r9]"  "\n\t"
213          "str   r11, [r5, #8]"  "\n\t"
214          : /*out*/
215          : /*in*/ "r"(&block[0])
216          : /*trash*/ "memory", "cc", "r5", "r8", "r9", "r10"
217       );
218    } while (block[2] != 0);
219 #else
220 # error "Unsupported arch"
221 #endif
222 }
223 
atomic_add_64bit(long long int * p,int n)224 __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
225 {
226 #if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_arm)
227    /* do nothing; is not supported */
228 #elif defined(VGA_amd64)
229    // this is a bit subtle.  It relies on the fact that, on a 64-bit platform,
230    // sizeof(unsigned long long int) == sizeof(unsigned long) == sizeof(void*)
231    unsigned long long int block[2];
232    block[0] = (unsigned long long int)(unsigned long)p;
233    block[1] = n;
234    __asm__ __volatile__(
235       "movq 0(%%rsi),%%rax"      "\n\t"
236       "movq 8(%%rsi),%%rbx"      "\n\t"
237       "lock; addq %%rbx,(%%rax)" "\n"
238       : : "S"(&block[0])/* S means "rsi only" */ : "memory","cc","rax","rbx"
239    );
240 #elif defined(VGA_ppc64)
241    unsigned long success;
242    do {
243       __asm__ __volatile__(
244          "ldarx  15,0,%1"    "\n\t"
245          "add    15,15,%2"   "\n\t"
246          "stdcx. 15,0,%1"    "\n\t"
247          "mfcr   %0"         "\n\t"
248          "srwi   %0,%0,29"   "\n\t"
249          "andi.  %0,%0,1"    "\n"
250          : /*out*/"=b"(success)
251          : /*in*/ "b"(p), "b"(n)
252          : /*trash*/ "memory", "cc", "r15"
253       );
254    } while (success != 1);
255 #else
256 # error "Unsupported arch"
257 #endif
258 }
259 
main(int argc,char ** argv)260 int main ( int argc, char** argv )
261 {
262    int    i, status;
263    char*  page;
264    char*  p8;
265    short* p16;
266    int*   p32;
267    long long int* p64;
268    pid_t  child, p2;
269 
270    printf("parent, pre-fork\n");
271 
272    page = mmap( 0, sysconf(_SC_PAGESIZE),
273                    PROT_READ|PROT_WRITE,
274                    MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
275    if (page == MAP_FAILED) {
276       perror("mmap failed");
277       exit(1);
278    }
279 
280    p8  = (char*)(page+0);
281    p16 = (short*)(page+256);
282    p32 = (int*)(page+512);
283    p64 = (long long int*)(page+768);
284 
285    assert( IS_8_ALIGNED(p8) );
286    assert( IS_8_ALIGNED(p16) );
287    assert( IS_8_ALIGNED(p32) );
288    assert( IS_8_ALIGNED(p64) );
289 
290    memset(page, 0, 1024);
291 
292    *p8  = 0;
293    *p16 = 0;
294    *p32 = 0;
295    *p64 = 0;
296 
297    child = fork();
298    if (child == -1) {
299       perror("fork() failed\n");
300       return 1;
301    }
302 
303    if (child == 0) {
304       /* --- CHILD --- */
305       printf("child\n");
306       for (i = 0; i < NNN; i++) {
307          atomic_add_8bit(p8, 1);
308          atomic_add_16bit(p16, 1);
309          atomic_add_32bit(p32, 1);
310          atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
311       }
312       return 1;
313       /* NOTREACHED */
314 
315    }
316 
317    /* --- PARENT --- */
318 
319    printf("parent\n");
320 
321    for (i = 0; i < NNN; i++) {
322       atomic_add_8bit(p8, 1);
323       atomic_add_16bit(p16, 1);
324       atomic_add_32bit(p32, 1);
325       atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
326    }
327 
328    p2 = waitpid(child, &status, 0);
329    assert(p2 == child);
330 
331    /* assert that child finished normally */
332    assert(WIFEXITED(status));
333 
334    printf("FINAL VALUES:  8 bit %d,  16 bit %d,  32 bit %d,  64 bit %lld\n",
335           (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
336 
337    if (-74 == (int)(*(signed char*)p8)
338        && 32694 == (int)(*p16)
339        && 6913974 == *p32
340        && (0LL == *p64 || 682858642110LL == *p64)) {
341       printf("PASS\n");
342    } else {
343       printf("FAIL -- see source code for expected values\n");
344    }
345 
346    printf("parent exits\n");
347 
348    return 0;
349 }
350