1 /* This is an example of a program which does cavium atomic memory operations
2 between two processes which share a page. This test is based on :
3 memcheck/tests/atomic_incs.c */
4
5 #include <stdlib.h>
6 #include <stdio.h>
7 #include <string.h>
8 #include <assert.h>
9 #include <unistd.h>
10 #include <sys/wait.h>
11 #include "tests/sys_mman.h"
12
13 #define N 19
14 #define NNN 3456987 // Number of repetition.
15
16 /* Expected values */
17 int p1_expd[N] = { 2156643710, 2156643710, 3456986, 6913974,
18 4288053322, 0, 4294967295,
19 6913974, 21777111,
20 3456986, 2153186724,
21 6913974, 21777111,
22 4294967295, 4288053323, // Test 14
23 4288053322, 4273190185, // Test 16
24 0, 0 }; // Test 18
25
26 long long int p2_expd[N] = { 12633614303292, 12633614303292, 3555751, 6913974,
27 -6913974, 0, -1,
28 6913974, 23901514779351,
29 3456986, 11950752204196,
30 6913974, 23901514779351,
31 -1, -6913973, // Test 15
32 -6913974, -23901514779351, // Test 17
33 0, 0 }; // Test 19
34
35 #define IS_8_ALIGNED(_ptr) (0 == (((unsigned long)(_ptr)) & 7))
36
atomic_saa(int * p,int n)37 __attribute__((noinline)) void atomic_saa ( int* p, int n )
38 {
39 #if (_MIPS_ARCH_OCTEON2)
40 unsigned long block[2] = { (unsigned long)p, (unsigned long)n };
41 __asm__ __volatile__(
42 "move $t0, %0" "\n\t"
43 "ld $t1, 0($t0)" "\n\t" // p
44 "ld $t2, 8($t0)" "\n\t" // n
45 "saa $t2, ($t1)" "\n\t"
46 : /*out*/
47 : /*in*/ "r"(&block[0])
48 : /*trash*/ "memory", "t0", "t1", "t2"
49 );
50 #endif
51 }
52
atomic_saad(long long int * p,int n)53 __attribute__((noinline)) void atomic_saad ( long long int* p, int n )
54 {
55 #if (_MIPS_ARCH_OCTEON2)
56 unsigned long block[2] = { (unsigned long)p, (unsigned long)n };
57 __asm__ __volatile__(
58 "move $t0, %0" "\n\t"
59 "ld $t1, 0($t0)" "\n\t" // p
60 "ld $t2, 8($t0)" "\n\t" // n
61 "saad $t2, ($t1)" "\n\t"
62 : /*out*/
63 : /*in*/ "r"(&block[0])
64 : /*trash*/ "memory", "t0", "t1", "t2"
65 );
66 #endif
67 }
68
atomic_laa(int * p,int n)69 __attribute__((noinline)) void atomic_laa ( int* p, int n )
70 {
71 #if (_MIPS_ARCH_OCTEON2)
72 unsigned long block[2] = { (unsigned long)p, (unsigned long)n };
73 __asm__ __volatile__(
74 "move $t0, %0" "\n\t"
75 "ld $t1, 0($t0)" "\n\t" // p
76 "ld $t2, 8($t0)" "\n\t" // n
77 "laa $t3, ($t1), $t2" "\n\t"
78 : /*out*/
79 : /*in*/ "r"(&block[0])
80 : /*trash*/ "memory", "t0", "t1", "t2"
81 );
82 #endif
83 }
84
atomic_laad(long long int * p,int n)85 __attribute__((noinline)) void atomic_laad ( long long int* p, int n )
86 {
87 #if (_MIPS_ARCH_OCTEON2)
88 unsigned long block[2] = { (unsigned long)p, (unsigned long)n };
89 __asm__ __volatile__(
90 "move $t0, %0" "\n\t"
91 "ld $t1, 0($t0)" "\n\t" // p
92 "ld $t2, 8($t0)" "\n\t" // n
93 "laad $t3, ($t1), $t2" "\n\t"
94 : /*out*/
95 : /*in*/ "r"(&block[0])
96 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
97 );
98 #endif
99 }
100
atomic_law(int * p,int n)101 __attribute__((noinline)) void atomic_law ( int* p, int n )
102 {
103 #if (_MIPS_ARCH_OCTEON2)
104 unsigned long block[2] = { (unsigned long)p, (unsigned long)n };
105 __asm__ __volatile__(
106 "move $t0, %0" "\n\t"
107 "ld $t1, 0($t0)" "\n\t" // p
108 "ld $t2, 8($t0)" "\n\t" // n
109 "law $t3, ($t1), $t2" "\n\t"
110 : /*out*/
111 : /*in*/ "r"(&block[0])
112 : /*trash*/ "memory", "t0", "t1", "t2"
113 );
114 #endif
115 }
116
atomic_lawd(long long int * p,int n)117 __attribute__((noinline)) void atomic_lawd ( long long int* p, int n )
118 {
119 #if (_MIPS_ARCH_OCTEON2)
120 unsigned long block[2] = { (unsigned long)p, (unsigned long)n };
121 __asm__ __volatile__(
122 "move $t0, %0" "\n\t"
123 "ld $t1, 0($t0)" "\n\t" // p
124 "ld $t2, 8($t0)" "\n\t" // n
125 "lawd $t3, ($t1), $t2" "\n\t"
126 : /*out*/
127 : /*in*/ "r"(&block[0])
128 : /*trash*/ "memory", "t0", "t1", "t2", "t3"
129 );
130 #endif
131 }
132
atomic_lai(int * p)133 __attribute__((noinline)) void atomic_lai ( int* p )
134 {
135 #if (_MIPS_ARCH_OCTEON2)
136 unsigned long block[2] = { (unsigned long)p };
137 __asm__ __volatile__(
138 "move $t0, %0" "\n\t"
139 "ld $t1, 0($t0)" "\n\t" // p
140 "ld $t2, 8($t0)" "\n\t" // n
141 "lai $t2, ($t1)" "\n\t"
142 : /*out*/
143 : /*in*/ "r"(&block[0])
144 : /*trash*/ "memory", "t0", "t1", "t2"
145 );
146 #endif
147 }
148
atomic_laid(long long int * p)149 __attribute__((noinline)) void atomic_laid ( long long int* p )
150 {
151 #if (_MIPS_ARCH_OCTEON2)
152 unsigned long block[2] = { (unsigned long)p };
153 __asm__ __volatile__(
154 "move $t0, %0" "\n\t"
155 "ld $t1, 0($t0)" "\n\t" // p
156 "ld $t2, 8($t0)" "\n\t" // n
157 "laid $t2, ($t1)" "\n\t"
158 : /*out*/
159 : /*in*/ "r"(&block[0])
160 : /*trash*/ "memory", "t0", "t1", "t2"
161 );
162 #endif
163 }
164
atomic_lad(int * p)165 __attribute__((noinline)) void atomic_lad ( int* p )
166 {
167 #if (_MIPS_ARCH_OCTEON2)
168 unsigned long block[2] = { (unsigned long)p };
169 __asm__ __volatile__(
170 "move $t0, %0" "\n\t"
171 "ld $t1, 0($t0)" "\n\t" // p
172 "ld $t2, 8($t0)" "\n\t" // n
173 "lad $t2, ($t1)" "\n\t"
174 : /*out*/
175 : /*in*/ "r"(&block[0])
176 : /*trash*/ "memory", "t0", "t1", "t2"
177 );
178 #endif
179 }
180
atomic_ladd(long long int * p)181 __attribute__((noinline)) void atomic_ladd ( long long int* p )
182 {
183 #if (_MIPS_ARCH_OCTEON2)
184 unsigned long block[2] = { (unsigned long)p };
185 __asm__ __volatile__(
186 "move $t0, %0" "\n\t"
187 "ld $t1, 0($t0)" "\n\t" // p
188 "ld $t2, 8($t0)" "\n\t" // n
189 "ladd $t2, ($t1)" "\n\t"
190 : /*out*/
191 : /*in*/ "r"(&block[0])
192 : /*trash*/ "memory", "t0", "t1", "t2"
193 );
194 #endif
195 }
196
atomic_lac(int * p)197 __attribute__((noinline)) void atomic_lac ( int* p )
198 {
199 #if (_MIPS_ARCH_OCTEON2)
200 unsigned long block[2] = { (unsigned long)p };
201 __asm__ __volatile__(
202 "move $t0, %0" "\n\t"
203 "ld $t1, 0($t0)" "\n\t" // p
204 "ld $t2, 8($t0)" "\n\t" // n
205 "lac $t2, ($t1)" "\n\t"
206 : /*out*/
207 : /*in*/ "r"(&block[0])
208 : /*trash*/ "memory", "t0", "t1", "t2"
209 );
210 #endif
211 }
212
atomic_lacd(long long int * p)213 __attribute__((noinline)) void atomic_lacd ( long long int* p )
214 {
215 #if (_MIPS_ARCH_OCTEON2)
216 unsigned long block[2] = { (unsigned long)p };
217 __asm__ __volatile__(
218 "move $t0, %0" "\n\t"
219 "ld $t1, 0($t0)" "\n\t" // p
220 "ld $t2, 8($t0)" "\n\t" // n
221 "lacd $t2, ($t1)" "\n\t"
222 : /*out*/
223 : /*in*/ "r"(&block[0])
224 : /*trash*/ "memory", "t0", "t1", "t2"
225 );
226 #endif
227 }
228
atomic_las(int * p)229 __attribute__((noinline)) void atomic_las ( int* p )
230 {
231 #if (_MIPS_ARCH_OCTEON2)
232 unsigned long block[2] = { (unsigned long)p };
233 __asm__ __volatile__(
234 "move $t0, %0" "\n\t"
235 "ld $t1, 0($t0)" "\n\t" // p
236 "ld $t2, 8($t0)" "\n\t" // n
237 "las $t2, ($t1)" "\n\t"
238 : /*out*/
239 : /*in*/ "r"(&block[0])
240 : /*trash*/ "memory", "t0", "t1", "t2"
241 );
242 #endif
243 }
244
atomic_lasd(long long int * p)245 __attribute__((noinline)) void atomic_lasd ( long long int* p )
246 {
247 #if (_MIPS_ARCH_OCTEON2)
248 unsigned long block[2] = { (unsigned long)p };
249 __asm__ __volatile__(
250 "move $t0, %0" "\n\t"
251 "ld $t1, 0($t0)" "\n\t" // p
252 "ld $t2, 8($t0)" "\n\t" // n
253 "lasd $t2, ($t1)" "\n\t"
254 : /*out*/
255 : /*in*/ "r"(&block[0])
256 : /*trash*/ "memory", "t0", "t1", "t2"
257 );
258 #endif
259 }
260
261 #define TRIOP_AND_SAA(instruction, base1, base2, n) \
262 { \
263 __asm__ __volatile__( \
264 instruction" $t0, (%0), %2" "\n\t" \
265 "saa $t0, (%1)" "\n\t" \
266 : /*out*/ \
267 : /*in*/ "r"(base1), "r"(base2), "r"(n) \
268 : /*trash*/ "memory", "t0" \
269 ); \
270 }
271
272 #define TRIOP_AND_SAAD(instruction, base1, base2, n) \
273 { \
274 __asm__ __volatile__( \
275 instruction" $t0, (%0), %2" "\n\t" \
276 "saad $t0, (%1)" "\n\t" \
277 : /*out*/ \
278 : /*in*/ "r"(base1), "r"(base2), "r"(n) \
279 : /*trash*/ "memory", "t0" \
280 ); \
281 }
282
283 #define BINOP_AND_SAA(instruction, base1, base2) \
284 { \
285 __asm__ __volatile__( \
286 instruction" $t0, (%0)" "\n\t" \
287 "saa $t0, (%1)" "\n\t" \
288 : /*out*/ \
289 : /*in*/ "r"(base1), "r"(base2) \
290 : /*trash*/ "memory", "t0" \
291 ); \
292 }
293
294 #define BINOP_AND_SAAD(instruction, base1, base2) \
295 { \
296 __asm__ __volatile__( \
297 instruction" $t0, (%0)" "\n\t" \
298 "saad $t0, (%1)" "\n\t" \
299 : /*out*/ \
300 : /*in*/ "r"(base1), "r"(base2) \
301 : /*trash*/ "memory", "t0" \
302 ); \
303 }
304
main(int argc,char ** argv)305 int main ( int argc, char** argv )
306 {
307 #if (_MIPS_ARCH_OCTEON2)
308 int i, status;
309 char* page[N];
310 int* p1[N];
311 long long int* p2[N];
312 pid_t child, pc2;
313
314 for (i = 0; i < N; i++) {
315 page[i] = mmap( 0, sysconf(_SC_PAGESIZE),
316 PROT_READ|PROT_WRITE,
317 MAP_ANONYMOUS|MAP_SHARED, -1, 0 );
318 if (page[i] == MAP_FAILED) {
319 perror("mmap failed");
320 exit(1);
321 }
322 p1[i] = (int*)(page[i] + 0);
323 p2[i] = (long long int*)(page[i] + 256);
324
325 assert( IS_8_ALIGNED(p1[i]) );
326 assert( IS_8_ALIGNED(p2[i]) );
327
328 memset(page[i], 0, 1024);
329 memset(page[i], 0, 1024);
330
331 *p1[i] = 0;
332 *p2[i] = 0;
333 }
334
335 child = fork();
336 if (child == -1) {
337 perror("fork() failed\n");
338 return 1;
339 }
340
341 if (child == 0) {
342 /* --- CHILD --- */
343 for (i = 0; i < NNN; i++) {
344 atomic_saa(p1[0], i);
345 atomic_saad(p2[0], i + 98765 ); /* ensure we hit the upper 32 bits */
346 atomic_laa(p1[1], i);
347 atomic_laad(p2[1], i + 98765 ); /* ensure we hit the upper 32 bits */
348 atomic_law(p1[2], i);
349 atomic_lawd(p2[2], i + 98765 ); /* ensure we hit the upper 32 bits */
350 atomic_lai(p1[3]);
351 atomic_laid(p2[3]);
352 atomic_lad(p1[4]);
353 atomic_ladd(p2[4]);
354 atomic_lac(p1[5]);
355 atomic_lacd(p2[5]);
356 atomic_las(p1[6]);
357 atomic_lasd(p2[6]);
358 TRIOP_AND_SAA("laa ", p1[7], p1[8], 1)
359 TRIOP_AND_SAAD("laad ", p2[7], p2[8], 1)
360 TRIOP_AND_SAA("law ", p1[9], p1[10], i)
361 TRIOP_AND_SAAD("lawd ", p2[9], p2[10], i)
362 BINOP_AND_SAA("lai ", p1[11], p1[12])
363 BINOP_AND_SAAD("laid ", p2[11], p2[12])
364 BINOP_AND_SAA("las ", p1[13], p1[14])
365 BINOP_AND_SAAD("lasd ", p2[13], p2[14])
366 BINOP_AND_SAA("lad ", p1[15], p1[16])
367 BINOP_AND_SAAD("ladd ", p2[15], p2[16])
368 BINOP_AND_SAA("lac ", p1[17], p1[18])
369 BINOP_AND_SAAD("lacd ", p2[17], p2[18])
370 }
371 return 1;
372 /* NOTREACHED */
373
374 }
375
376 /* --- PARENT --- */
377 for (i = 0; i < NNN; i++) {
378 atomic_saa(p1[0], i);
379 atomic_saad(p2[0], i + 98765); /* ensure we hit the upper 32 bits */
380 atomic_laa(p1[1], i);
381 atomic_laad(p2[1], i + 98765); /* ensure we hit the upper 32 bits */
382 atomic_law(p1[2], i);
383 atomic_lawd(p2[2], i + 98765 ); /* ensure we hit the upper 32 bits */
384 atomic_lai(p1[3]);
385 atomic_laid(p2[3]);
386 atomic_lad(p1[4]);
387 atomic_ladd(p2[4]);
388 atomic_lac(p1[5]);
389 atomic_lacd(p2[5]);
390 atomic_las(p1[6]);
391 atomic_lasd(p2[6]);
392 TRIOP_AND_SAA("laa ", p1[7], p1[8], 1)
393 TRIOP_AND_SAAD("laad ", p2[7], p2[8], 1)
394 TRIOP_AND_SAA("law ", p1[9], p1[10], i)
395 TRIOP_AND_SAAD("lawd ", p2[9], p2[10], i)
396 BINOP_AND_SAA("lai ", p1[11], p1[12])
397 BINOP_AND_SAAD("laid ", p2[11], p2[12])
398 BINOP_AND_SAA("las ", p1[13], p1[14])
399 BINOP_AND_SAAD("lasd ", p2[13], p2[14])
400 BINOP_AND_SAA("lad ", p1[15], p1[16])
401 BINOP_AND_SAAD("ladd ", p2[15], p2[16])
402 BINOP_AND_SAA("lac ", p1[17], p1[18])
403 BINOP_AND_SAAD("lacd ", p2[17], p2[18])
404 }
405
406 pc2 = waitpid(child, &status, 0);
407 assert(pc2 == child);
408
409 /* assert that child finished normally */
410 assert(WIFEXITED(status));
411
412 printf("Store Atomic Add: 32 bit %u, 64 bit %lld\n", *p1[0], *p2[0]);
413 printf("Load Atomic Add: 32 bit %u, 64 bit %lld\n", *p1[1], *p2[1]);
414 printf("Load Atomic Swap: 32 bit %u, 64 bit %lld\n", *p1[2], *p2[2]);
415 printf("Load Atomic Increment: 32 bit %u, 64 bit %lld\n", *p1[3], *p2[3]);
416 printf("Load Atomic Decrement: 32 bit %u, 64 bit %lld\n", *p1[4], *p2[4]);
417 printf("Load Atomic Clear: 32 bit %u, 64 bit %lld\n", *p1[5], *p2[5]);
418 printf("Load Atomic Set: 32 bit %u, 64 bit %lld\n", *p1[6], *p2[6]);
419 printf("laa and saa: base1: %u, base2: %u\n", *p1[7], *p1[8]);
420 printf("laad and saad: base1: %lld, base2: %lld\n", *p2[7], *p2[8]);
421 printf("law and saa: base1: %u, base2: %u\n", *p1[9], *p1[10]);
422 printf("lawd and saad: base1: %lld, base2: %lld\n", *p2[9], *p2[10]);
423 printf("lai and saa: base1: %u, base2: %u\n", *p1[11], *p1[12]);
424 printf("laid and saad: base1: %lld, base2: %lld\n", *p2[11], *p2[12]);
425 printf("las and saa: base1: %u, base2: %u\n", *p1[13], *p1[14]);
426 printf("lasd and saad: base1: %lld, base2: %lld\n", *p2[13], *p2[14]);
427 printf("lad and saa: base1: %u, base2: %u\n", *p1[15], *p1[16]);
428 printf("ladd and saad: base1: %lld, base2: %lld\n", *p2[15], *p2[16]);
429 printf("lac and saa: base1: %u, base2: %u\n", *p1[17], *p1[18]);
430 printf("lacd and saad: base1: %lld, base2: %lld\n", *p2[17], *p2[18]);
431
432 for (i = 0; i < N; i++) {
433 if (p1_expd[i] == *p1[i] && p2_expd[i] == *p2[i]) {
434 printf("PASS %d\n", i+1);
435 } else {
436 printf("FAIL %d -- see source code for expected values\n", i+1);
437 }
438 }
439
440 printf("parent exits\n");
441 #endif
442 return 0;
443 }
444