1 // SPDX-License-Identifier: LGPL-2.1
2 #define _GNU_SOURCE
3 #include <assert.h>
4 #include <linux/membarrier.h>
5 #include <pthread.h>
6 #include <sched.h>
7 #include <stdatomic.h>
8 #include <stdint.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <syscall.h>
13 #include <unistd.h>
14 #include <poll.h>
15 #include <sys/types.h>
16 #include <signal.h>
17 #include <errno.h>
18 #include <stddef.h>
19
rseq_gettid(void)20 static inline pid_t rseq_gettid(void)
21 {
22 return syscall(__NR_gettid);
23 }
24
25 #define NR_INJECT 9
26 static int loop_cnt[NR_INJECT + 1];
27
28 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
29 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
30 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
31 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
32 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
33 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
34
35 static int opt_modulo, verbose;
36
37 static int opt_yield, opt_signal, opt_sleep,
38 opt_disable_rseq, opt_threads = 200,
39 opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
40
41 #ifndef RSEQ_SKIP_FASTPATH
42 static long long opt_reps = 5000;
43 #else
44 static long long opt_reps = 100;
45 #endif
46
47 static __thread __attribute__((tls_model("initial-exec")))
48 unsigned int signals_delivered;
49
50 #ifndef BENCHMARK
51
52 static __thread __attribute__((tls_model("initial-exec"), unused))
53 unsigned int yield_mod_cnt, nr_abort;
54
55 #define printf_verbose(fmt, ...) \
56 do { \
57 if (verbose) \
58 printf(fmt, ## __VA_ARGS__); \
59 } while (0)
60
61 #ifdef __i386__
62
63 #define INJECT_ASM_REG "eax"
64
65 #define RSEQ_INJECT_CLOBBER \
66 , INJECT_ASM_REG
67
68 #define RSEQ_INJECT_ASM(n) \
69 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
70 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
71 "jz 333f\n\t" \
72 "222:\n\t" \
73 "dec %%" INJECT_ASM_REG "\n\t" \
74 "jnz 222b\n\t" \
75 "333:\n\t"
76
77 #elif defined(__x86_64__)
78
79 #define INJECT_ASM_REG_P "rax"
80 #define INJECT_ASM_REG "eax"
81
82 #define RSEQ_INJECT_CLOBBER \
83 , INJECT_ASM_REG_P \
84 , INJECT_ASM_REG
85
86 #define RSEQ_INJECT_ASM(n) \
87 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
88 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
89 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
90 "jz 333f\n\t" \
91 "222:\n\t" \
92 "dec %%" INJECT_ASM_REG "\n\t" \
93 "jnz 222b\n\t" \
94 "333:\n\t"
95
96 #elif defined(__s390__)
97
98 #define RSEQ_INJECT_INPUT \
99 , [loop_cnt_1]"m"(loop_cnt[1]) \
100 , [loop_cnt_2]"m"(loop_cnt[2]) \
101 , [loop_cnt_3]"m"(loop_cnt[3]) \
102 , [loop_cnt_4]"m"(loop_cnt[4]) \
103 , [loop_cnt_5]"m"(loop_cnt[5]) \
104 , [loop_cnt_6]"m"(loop_cnt[6])
105
106 #define INJECT_ASM_REG "r12"
107
108 #define RSEQ_INJECT_CLOBBER \
109 , INJECT_ASM_REG
110
111 #define RSEQ_INJECT_ASM(n) \
112 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
113 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
114 "je 333f\n\t" \
115 "222:\n\t" \
116 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
117 "jnz 222b\n\t" \
118 "333:\n\t"
119
120 #elif defined(__ARMEL__)
121
122 #define RSEQ_INJECT_INPUT \
123 , [loop_cnt_1]"m"(loop_cnt[1]) \
124 , [loop_cnt_2]"m"(loop_cnt[2]) \
125 , [loop_cnt_3]"m"(loop_cnt[3]) \
126 , [loop_cnt_4]"m"(loop_cnt[4]) \
127 , [loop_cnt_5]"m"(loop_cnt[5]) \
128 , [loop_cnt_6]"m"(loop_cnt[6])
129
130 #define INJECT_ASM_REG "r4"
131
132 #define RSEQ_INJECT_CLOBBER \
133 , INJECT_ASM_REG
134
135 #define RSEQ_INJECT_ASM(n) \
136 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
137 "cmp " INJECT_ASM_REG ", #0\n\t" \
138 "beq 333f\n\t" \
139 "222:\n\t" \
140 "subs " INJECT_ASM_REG ", #1\n\t" \
141 "bne 222b\n\t" \
142 "333:\n\t"
143
144 #elif defined(__AARCH64EL__)
145
146 #define RSEQ_INJECT_INPUT \
147 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
148 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
149 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
150 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
151 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
152 , [loop_cnt_6] "Qo" (loop_cnt[6])
153
154 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
155
156 #define RSEQ_INJECT_ASM(n) \
157 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
158 " cbz " INJECT_ASM_REG ", 333f\n" \
159 "222:\n" \
160 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
161 " cbnz " INJECT_ASM_REG ", 222b\n" \
162 "333:\n"
163
164 #elif defined(__PPC__)
165
166 #define RSEQ_INJECT_INPUT \
167 , [loop_cnt_1]"m"(loop_cnt[1]) \
168 , [loop_cnt_2]"m"(loop_cnt[2]) \
169 , [loop_cnt_3]"m"(loop_cnt[3]) \
170 , [loop_cnt_4]"m"(loop_cnt[4]) \
171 , [loop_cnt_5]"m"(loop_cnt[5]) \
172 , [loop_cnt_6]"m"(loop_cnt[6])
173
174 #define INJECT_ASM_REG "r18"
175
176 #define RSEQ_INJECT_CLOBBER \
177 , INJECT_ASM_REG
178
179 #define RSEQ_INJECT_ASM(n) \
180 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
181 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
182 "beq 333f\n\t" \
183 "222:\n\t" \
184 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
185 "bne 222b\n\t" \
186 "333:\n\t"
187
188 #elif defined(__mips__)
189
190 #define RSEQ_INJECT_INPUT \
191 , [loop_cnt_1]"m"(loop_cnt[1]) \
192 , [loop_cnt_2]"m"(loop_cnt[2]) \
193 , [loop_cnt_3]"m"(loop_cnt[3]) \
194 , [loop_cnt_4]"m"(loop_cnt[4]) \
195 , [loop_cnt_5]"m"(loop_cnt[5]) \
196 , [loop_cnt_6]"m"(loop_cnt[6])
197
198 #define INJECT_ASM_REG "$5"
199
200 #define RSEQ_INJECT_CLOBBER \
201 , INJECT_ASM_REG
202
203 #define RSEQ_INJECT_ASM(n) \
204 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
205 "beqz " INJECT_ASM_REG ", 333f\n\t" \
206 "222:\n\t" \
207 "addiu " INJECT_ASM_REG ", -1\n\t" \
208 "bnez " INJECT_ASM_REG ", 222b\n\t" \
209 "333:\n\t"
210
211 #else
212 #error unsupported target
213 #endif
214
215 #define RSEQ_INJECT_FAILED \
216 nr_abort++;
217
218 #define RSEQ_INJECT_C(n) \
219 { \
220 int loc_i, loc_nr_loops = loop_cnt[n]; \
221 \
222 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
223 rseq_barrier(); \
224 } \
225 if (loc_nr_loops == -1 && opt_modulo) { \
226 if (yield_mod_cnt == opt_modulo - 1) { \
227 if (opt_sleep > 0) \
228 poll(NULL, 0, opt_sleep); \
229 if (opt_yield) \
230 sched_yield(); \
231 if (opt_signal) \
232 raise(SIGUSR1); \
233 yield_mod_cnt = 0; \
234 } else { \
235 yield_mod_cnt++; \
236 } \
237 } \
238 }
239
240 #else
241
242 #define printf_verbose(fmt, ...)
243
244 #endif /* BENCHMARK */
245
246 #include "rseq.h"
247
248 struct percpu_lock_entry {
249 intptr_t v;
250 } __attribute__((aligned(128)));
251
252 struct percpu_lock {
253 struct percpu_lock_entry c[CPU_SETSIZE];
254 };
255
256 struct test_data_entry {
257 intptr_t count;
258 } __attribute__((aligned(128)));
259
260 struct spinlock_test_data {
261 struct percpu_lock lock;
262 struct test_data_entry c[CPU_SETSIZE];
263 };
264
265 struct spinlock_thread_test_data {
266 struct spinlock_test_data *data;
267 long long reps;
268 int reg;
269 };
270
271 struct inc_test_data {
272 struct test_data_entry c[CPU_SETSIZE];
273 };
274
275 struct inc_thread_test_data {
276 struct inc_test_data *data;
277 long long reps;
278 int reg;
279 };
280
281 struct percpu_list_node {
282 intptr_t data;
283 struct percpu_list_node *next;
284 };
285
286 struct percpu_list_entry {
287 struct percpu_list_node *head;
288 } __attribute__((aligned(128)));
289
290 struct percpu_list {
291 struct percpu_list_entry c[CPU_SETSIZE];
292 };
293
294 #define BUFFER_ITEM_PER_CPU 100
295
296 struct percpu_buffer_node {
297 intptr_t data;
298 };
299
300 struct percpu_buffer_entry {
301 intptr_t offset;
302 intptr_t buflen;
303 struct percpu_buffer_node **array;
304 } __attribute__((aligned(128)));
305
306 struct percpu_buffer {
307 struct percpu_buffer_entry c[CPU_SETSIZE];
308 };
309
310 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
311
312 struct percpu_memcpy_buffer_node {
313 intptr_t data1;
314 uint64_t data2;
315 };
316
317 struct percpu_memcpy_buffer_entry {
318 intptr_t offset;
319 intptr_t buflen;
320 struct percpu_memcpy_buffer_node *array;
321 } __attribute__((aligned(128)));
322
323 struct percpu_memcpy_buffer {
324 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
325 };
326
327 /* A simple percpu spinlock. Grabs lock on current cpu. */
rseq_this_cpu_lock(struct percpu_lock * lock)328 static int rseq_this_cpu_lock(struct percpu_lock *lock)
329 {
330 int cpu;
331
332 for (;;) {
333 int ret;
334
335 cpu = rseq_cpu_start();
336 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
337 0, 1, cpu);
338 if (rseq_likely(!ret))
339 break;
340 /* Retry if comparison fails or rseq aborts. */
341 }
342 /*
343 * Acquire semantic when taking lock after control dependency.
344 * Matches rseq_smp_store_release().
345 */
346 rseq_smp_acquire__after_ctrl_dep();
347 return cpu;
348 }
349
rseq_percpu_unlock(struct percpu_lock * lock,int cpu)350 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
351 {
352 assert(lock->c[cpu].v == 1);
353 /*
354 * Release lock, with release semantic. Matches
355 * rseq_smp_acquire__after_ctrl_dep().
356 */
357 rseq_smp_store_release(&lock->c[cpu].v, 0);
358 }
359
test_percpu_spinlock_thread(void * arg)360 void *test_percpu_spinlock_thread(void *arg)
361 {
362 struct spinlock_thread_test_data *thread_data = arg;
363 struct spinlock_test_data *data = thread_data->data;
364 long long i, reps;
365
366 if (!opt_disable_rseq && thread_data->reg &&
367 rseq_register_current_thread())
368 abort();
369 reps = thread_data->reps;
370 for (i = 0; i < reps; i++) {
371 int cpu = rseq_this_cpu_lock(&data->lock);
372 data->c[cpu].count++;
373 rseq_percpu_unlock(&data->lock, cpu);
374 #ifndef BENCHMARK
375 if (i != 0 && !(i % (reps / 10)))
376 printf_verbose("tid %d: count %lld\n",
377 (int) rseq_gettid(), i);
378 #endif
379 }
380 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
381 (int) rseq_gettid(), nr_abort, signals_delivered);
382 if (!opt_disable_rseq && thread_data->reg &&
383 rseq_unregister_current_thread())
384 abort();
385 return NULL;
386 }
387
388 /*
389 * A simple test which implements a sharded counter using a per-cpu
390 * lock. Obviously real applications might prefer to simply use a
391 * per-cpu increment; however, this is reasonable for a test and the
392 * lock can be extended to synchronize more complicated operations.
393 */
test_percpu_spinlock(void)394 void test_percpu_spinlock(void)
395 {
396 const int num_threads = opt_threads;
397 int i, ret;
398 uint64_t sum;
399 pthread_t test_threads[num_threads];
400 struct spinlock_test_data data;
401 struct spinlock_thread_test_data thread_data[num_threads];
402
403 memset(&data, 0, sizeof(data));
404 for (i = 0; i < num_threads; i++) {
405 thread_data[i].reps = opt_reps;
406 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
407 thread_data[i].reg = 1;
408 else
409 thread_data[i].reg = 0;
410 thread_data[i].data = &data;
411 ret = pthread_create(&test_threads[i], NULL,
412 test_percpu_spinlock_thread,
413 &thread_data[i]);
414 if (ret) {
415 errno = ret;
416 perror("pthread_create");
417 abort();
418 }
419 }
420
421 for (i = 0; i < num_threads; i++) {
422 ret = pthread_join(test_threads[i], NULL);
423 if (ret) {
424 errno = ret;
425 perror("pthread_join");
426 abort();
427 }
428 }
429
430 sum = 0;
431 for (i = 0; i < CPU_SETSIZE; i++)
432 sum += data.c[i].count;
433
434 assert(sum == (uint64_t)opt_reps * num_threads);
435 }
436
test_percpu_inc_thread(void * arg)437 void *test_percpu_inc_thread(void *arg)
438 {
439 struct inc_thread_test_data *thread_data = arg;
440 struct inc_test_data *data = thread_data->data;
441 long long i, reps;
442
443 if (!opt_disable_rseq && thread_data->reg &&
444 rseq_register_current_thread())
445 abort();
446 reps = thread_data->reps;
447 for (i = 0; i < reps; i++) {
448 int ret;
449
450 do {
451 int cpu;
452
453 cpu = rseq_cpu_start();
454 ret = rseq_addv(&data->c[cpu].count, 1, cpu);
455 } while (rseq_unlikely(ret));
456 #ifndef BENCHMARK
457 if (i != 0 && !(i % (reps / 10)))
458 printf_verbose("tid %d: count %lld\n",
459 (int) rseq_gettid(), i);
460 #endif
461 }
462 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
463 (int) rseq_gettid(), nr_abort, signals_delivered);
464 if (!opt_disable_rseq && thread_data->reg &&
465 rseq_unregister_current_thread())
466 abort();
467 return NULL;
468 }
469
test_percpu_inc(void)470 void test_percpu_inc(void)
471 {
472 const int num_threads = opt_threads;
473 int i, ret;
474 uint64_t sum;
475 pthread_t test_threads[num_threads];
476 struct inc_test_data data;
477 struct inc_thread_test_data thread_data[num_threads];
478
479 memset(&data, 0, sizeof(data));
480 for (i = 0; i < num_threads; i++) {
481 thread_data[i].reps = opt_reps;
482 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
483 thread_data[i].reg = 1;
484 else
485 thread_data[i].reg = 0;
486 thread_data[i].data = &data;
487 ret = pthread_create(&test_threads[i], NULL,
488 test_percpu_inc_thread,
489 &thread_data[i]);
490 if (ret) {
491 errno = ret;
492 perror("pthread_create");
493 abort();
494 }
495 }
496
497 for (i = 0; i < num_threads; i++) {
498 ret = pthread_join(test_threads[i], NULL);
499 if (ret) {
500 errno = ret;
501 perror("pthread_join");
502 abort();
503 }
504 }
505
506 sum = 0;
507 for (i = 0; i < CPU_SETSIZE; i++)
508 sum += data.c[i].count;
509
510 assert(sum == (uint64_t)opt_reps * num_threads);
511 }
512
this_cpu_list_push(struct percpu_list * list,struct percpu_list_node * node,int * _cpu)513 void this_cpu_list_push(struct percpu_list *list,
514 struct percpu_list_node *node,
515 int *_cpu)
516 {
517 int cpu;
518
519 for (;;) {
520 intptr_t *targetptr, newval, expect;
521 int ret;
522
523 cpu = rseq_cpu_start();
524 /* Load list->c[cpu].head with single-copy atomicity. */
525 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
526 newval = (intptr_t)node;
527 targetptr = (intptr_t *)&list->c[cpu].head;
528 node->next = (struct percpu_list_node *)expect;
529 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
530 if (rseq_likely(!ret))
531 break;
532 /* Retry if comparison fails or rseq aborts. */
533 }
534 if (_cpu)
535 *_cpu = cpu;
536 }
537
538 /*
539 * Unlike a traditional lock-less linked list; the availability of a
540 * rseq primitive allows us to implement pop without concerns over
541 * ABA-type races.
542 */
this_cpu_list_pop(struct percpu_list * list,int * _cpu)543 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
544 int *_cpu)
545 {
546 struct percpu_list_node *node = NULL;
547 int cpu;
548
549 for (;;) {
550 struct percpu_list_node *head;
551 intptr_t *targetptr, expectnot, *load;
552 long offset;
553 int ret;
554
555 cpu = rseq_cpu_start();
556 targetptr = (intptr_t *)&list->c[cpu].head;
557 expectnot = (intptr_t)NULL;
558 offset = offsetof(struct percpu_list_node, next);
559 load = (intptr_t *)&head;
560 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
561 offset, load, cpu);
562 if (rseq_likely(!ret)) {
563 node = head;
564 break;
565 }
566 if (ret > 0)
567 break;
568 /* Retry if rseq aborts. */
569 }
570 if (_cpu)
571 *_cpu = cpu;
572 return node;
573 }
574
575 /*
576 * __percpu_list_pop is not safe against concurrent accesses. Should
577 * only be used on lists that are not concurrently modified.
578 */
__percpu_list_pop(struct percpu_list * list,int cpu)579 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
580 {
581 struct percpu_list_node *node;
582
583 node = list->c[cpu].head;
584 if (!node)
585 return NULL;
586 list->c[cpu].head = node->next;
587 return node;
588 }
589
test_percpu_list_thread(void * arg)590 void *test_percpu_list_thread(void *arg)
591 {
592 long long i, reps;
593 struct percpu_list *list = (struct percpu_list *)arg;
594
595 if (!opt_disable_rseq && rseq_register_current_thread())
596 abort();
597
598 reps = opt_reps;
599 for (i = 0; i < reps; i++) {
600 struct percpu_list_node *node;
601
602 node = this_cpu_list_pop(list, NULL);
603 if (opt_yield)
604 sched_yield(); /* encourage shuffling */
605 if (node)
606 this_cpu_list_push(list, node, NULL);
607 }
608
609 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
610 (int) rseq_gettid(), nr_abort, signals_delivered);
611 if (!opt_disable_rseq && rseq_unregister_current_thread())
612 abort();
613
614 return NULL;
615 }
616
617 /* Simultaneous modification to a per-cpu linked list from many threads. */
test_percpu_list(void)618 void test_percpu_list(void)
619 {
620 const int num_threads = opt_threads;
621 int i, j, ret;
622 uint64_t sum = 0, expected_sum = 0;
623 struct percpu_list list;
624 pthread_t test_threads[num_threads];
625 cpu_set_t allowed_cpus;
626
627 memset(&list, 0, sizeof(list));
628
629 /* Generate list entries for every usable cpu. */
630 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
631 for (i = 0; i < CPU_SETSIZE; i++) {
632 if (!CPU_ISSET(i, &allowed_cpus))
633 continue;
634 for (j = 1; j <= 100; j++) {
635 struct percpu_list_node *node;
636
637 expected_sum += j;
638
639 node = malloc(sizeof(*node));
640 assert(node);
641 node->data = j;
642 node->next = list.c[i].head;
643 list.c[i].head = node;
644 }
645 }
646
647 for (i = 0; i < num_threads; i++) {
648 ret = pthread_create(&test_threads[i], NULL,
649 test_percpu_list_thread, &list);
650 if (ret) {
651 errno = ret;
652 perror("pthread_create");
653 abort();
654 }
655 }
656
657 for (i = 0; i < num_threads; i++) {
658 ret = pthread_join(test_threads[i], NULL);
659 if (ret) {
660 errno = ret;
661 perror("pthread_join");
662 abort();
663 }
664 }
665
666 for (i = 0; i < CPU_SETSIZE; i++) {
667 struct percpu_list_node *node;
668
669 if (!CPU_ISSET(i, &allowed_cpus))
670 continue;
671
672 while ((node = __percpu_list_pop(&list, i))) {
673 sum += node->data;
674 free(node);
675 }
676 }
677
678 /*
679 * All entries should now be accounted for (unless some external
680 * actor is interfering with our allowed affinity while this
681 * test is running).
682 */
683 assert(sum == expected_sum);
684 }
685
this_cpu_buffer_push(struct percpu_buffer * buffer,struct percpu_buffer_node * node,int * _cpu)686 bool this_cpu_buffer_push(struct percpu_buffer *buffer,
687 struct percpu_buffer_node *node,
688 int *_cpu)
689 {
690 bool result = false;
691 int cpu;
692
693 for (;;) {
694 intptr_t *targetptr_spec, newval_spec;
695 intptr_t *targetptr_final, newval_final;
696 intptr_t offset;
697 int ret;
698
699 cpu = rseq_cpu_start();
700 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
701 if (offset == buffer->c[cpu].buflen)
702 break;
703 newval_spec = (intptr_t)node;
704 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
705 newval_final = offset + 1;
706 targetptr_final = &buffer->c[cpu].offset;
707 if (opt_mb)
708 ret = rseq_cmpeqv_trystorev_storev_release(
709 targetptr_final, offset, targetptr_spec,
710 newval_spec, newval_final, cpu);
711 else
712 ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
713 offset, targetptr_spec, newval_spec,
714 newval_final, cpu);
715 if (rseq_likely(!ret)) {
716 result = true;
717 break;
718 }
719 /* Retry if comparison fails or rseq aborts. */
720 }
721 if (_cpu)
722 *_cpu = cpu;
723 return result;
724 }
725
this_cpu_buffer_pop(struct percpu_buffer * buffer,int * _cpu)726 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
727 int *_cpu)
728 {
729 struct percpu_buffer_node *head;
730 int cpu;
731
732 for (;;) {
733 intptr_t *targetptr, newval;
734 intptr_t offset;
735 int ret;
736
737 cpu = rseq_cpu_start();
738 /* Load offset with single-copy atomicity. */
739 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
740 if (offset == 0) {
741 head = NULL;
742 break;
743 }
744 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
745 newval = offset - 1;
746 targetptr = (intptr_t *)&buffer->c[cpu].offset;
747 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
748 (intptr_t *)&buffer->c[cpu].array[offset - 1],
749 (intptr_t)head, newval, cpu);
750 if (rseq_likely(!ret))
751 break;
752 /* Retry if comparison fails or rseq aborts. */
753 }
754 if (_cpu)
755 *_cpu = cpu;
756 return head;
757 }
758
759 /*
760 * __percpu_buffer_pop is not safe against concurrent accesses. Should
761 * only be used on buffers that are not concurrently modified.
762 */
__percpu_buffer_pop(struct percpu_buffer * buffer,int cpu)763 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
764 int cpu)
765 {
766 struct percpu_buffer_node *head;
767 intptr_t offset;
768
769 offset = buffer->c[cpu].offset;
770 if (offset == 0)
771 return NULL;
772 head = buffer->c[cpu].array[offset - 1];
773 buffer->c[cpu].offset = offset - 1;
774 return head;
775 }
776
test_percpu_buffer_thread(void * arg)777 void *test_percpu_buffer_thread(void *arg)
778 {
779 long long i, reps;
780 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
781
782 if (!opt_disable_rseq && rseq_register_current_thread())
783 abort();
784
785 reps = opt_reps;
786 for (i = 0; i < reps; i++) {
787 struct percpu_buffer_node *node;
788
789 node = this_cpu_buffer_pop(buffer, NULL);
790 if (opt_yield)
791 sched_yield(); /* encourage shuffling */
792 if (node) {
793 if (!this_cpu_buffer_push(buffer, node, NULL)) {
794 /* Should increase buffer size. */
795 abort();
796 }
797 }
798 }
799
800 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
801 (int) rseq_gettid(), nr_abort, signals_delivered);
802 if (!opt_disable_rseq && rseq_unregister_current_thread())
803 abort();
804
805 return NULL;
806 }
807
808 /* Simultaneous modification to a per-cpu buffer from many threads. */
test_percpu_buffer(void)809 void test_percpu_buffer(void)
810 {
811 const int num_threads = opt_threads;
812 int i, j, ret;
813 uint64_t sum = 0, expected_sum = 0;
814 struct percpu_buffer buffer;
815 pthread_t test_threads[num_threads];
816 cpu_set_t allowed_cpus;
817
818 memset(&buffer, 0, sizeof(buffer));
819
820 /* Generate list entries for every usable cpu. */
821 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
822 for (i = 0; i < CPU_SETSIZE; i++) {
823 if (!CPU_ISSET(i, &allowed_cpus))
824 continue;
825 /* Worse-case is every item in same CPU. */
826 buffer.c[i].array =
827 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
828 BUFFER_ITEM_PER_CPU);
829 assert(buffer.c[i].array);
830 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
831 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
832 struct percpu_buffer_node *node;
833
834 expected_sum += j;
835
836 /*
837 * We could theoretically put the word-sized
838 * "data" directly in the buffer. However, we
839 * want to model objects that would not fit
840 * within a single word, so allocate an object
841 * for each node.
842 */
843 node = malloc(sizeof(*node));
844 assert(node);
845 node->data = j;
846 buffer.c[i].array[j - 1] = node;
847 buffer.c[i].offset++;
848 }
849 }
850
851 for (i = 0; i < num_threads; i++) {
852 ret = pthread_create(&test_threads[i], NULL,
853 test_percpu_buffer_thread, &buffer);
854 if (ret) {
855 errno = ret;
856 perror("pthread_create");
857 abort();
858 }
859 }
860
861 for (i = 0; i < num_threads; i++) {
862 ret = pthread_join(test_threads[i], NULL);
863 if (ret) {
864 errno = ret;
865 perror("pthread_join");
866 abort();
867 }
868 }
869
870 for (i = 0; i < CPU_SETSIZE; i++) {
871 struct percpu_buffer_node *node;
872
873 if (!CPU_ISSET(i, &allowed_cpus))
874 continue;
875
876 while ((node = __percpu_buffer_pop(&buffer, i))) {
877 sum += node->data;
878 free(node);
879 }
880 free(buffer.c[i].array);
881 }
882
883 /*
884 * All entries should now be accounted for (unless some external
885 * actor is interfering with our allowed affinity while this
886 * test is running).
887 */
888 assert(sum == expected_sum);
889 }
890
this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node item,int * _cpu)891 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
892 struct percpu_memcpy_buffer_node item,
893 int *_cpu)
894 {
895 bool result = false;
896 int cpu;
897
898 for (;;) {
899 intptr_t *targetptr_final, newval_final, offset;
900 char *destptr, *srcptr;
901 size_t copylen;
902 int ret;
903
904 cpu = rseq_cpu_start();
905 /* Load offset with single-copy atomicity. */
906 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
907 if (offset == buffer->c[cpu].buflen)
908 break;
909 destptr = (char *)&buffer->c[cpu].array[offset];
910 srcptr = (char *)&item;
911 /* copylen must be <= 4kB. */
912 copylen = sizeof(item);
913 newval_final = offset + 1;
914 targetptr_final = &buffer->c[cpu].offset;
915 if (opt_mb)
916 ret = rseq_cmpeqv_trymemcpy_storev_release(
917 targetptr_final, offset,
918 destptr, srcptr, copylen,
919 newval_final, cpu);
920 else
921 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
922 offset, destptr, srcptr, copylen,
923 newval_final, cpu);
924 if (rseq_likely(!ret)) {
925 result = true;
926 break;
927 }
928 /* Retry if comparison fails or rseq aborts. */
929 }
930 if (_cpu)
931 *_cpu = cpu;
932 return result;
933 }
934
this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node * item,int * _cpu)935 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
936 struct percpu_memcpy_buffer_node *item,
937 int *_cpu)
938 {
939 bool result = false;
940 int cpu;
941
942 for (;;) {
943 intptr_t *targetptr_final, newval_final, offset;
944 char *destptr, *srcptr;
945 size_t copylen;
946 int ret;
947
948 cpu = rseq_cpu_start();
949 /* Load offset with single-copy atomicity. */
950 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
951 if (offset == 0)
952 break;
953 destptr = (char *)item;
954 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
955 /* copylen must be <= 4kB. */
956 copylen = sizeof(*item);
957 newval_final = offset - 1;
958 targetptr_final = &buffer->c[cpu].offset;
959 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
960 offset, destptr, srcptr, copylen,
961 newval_final, cpu);
962 if (rseq_likely(!ret)) {
963 result = true;
964 break;
965 }
966 /* Retry if comparison fails or rseq aborts. */
967 }
968 if (_cpu)
969 *_cpu = cpu;
970 return result;
971 }
972
973 /*
974 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
975 * only be used on buffers that are not concurrently modified.
976 */
__percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node * item,int cpu)977 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
978 struct percpu_memcpy_buffer_node *item,
979 int cpu)
980 {
981 intptr_t offset;
982
983 offset = buffer->c[cpu].offset;
984 if (offset == 0)
985 return false;
986 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
987 buffer->c[cpu].offset = offset - 1;
988 return true;
989 }
990
test_percpu_memcpy_buffer_thread(void * arg)991 void *test_percpu_memcpy_buffer_thread(void *arg)
992 {
993 long long i, reps;
994 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
995
996 if (!opt_disable_rseq && rseq_register_current_thread())
997 abort();
998
999 reps = opt_reps;
1000 for (i = 0; i < reps; i++) {
1001 struct percpu_memcpy_buffer_node item;
1002 bool result;
1003
1004 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1005 if (opt_yield)
1006 sched_yield(); /* encourage shuffling */
1007 if (result) {
1008 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1009 /* Should increase buffer size. */
1010 abort();
1011 }
1012 }
1013 }
1014
1015 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1016 (int) rseq_gettid(), nr_abort, signals_delivered);
1017 if (!opt_disable_rseq && rseq_unregister_current_thread())
1018 abort();
1019
1020 return NULL;
1021 }
1022
1023 /* Simultaneous modification to a per-cpu buffer from many threads. */
test_percpu_memcpy_buffer(void)1024 void test_percpu_memcpy_buffer(void)
1025 {
1026 const int num_threads = opt_threads;
1027 int i, j, ret;
1028 uint64_t sum = 0, expected_sum = 0;
1029 struct percpu_memcpy_buffer buffer;
1030 pthread_t test_threads[num_threads];
1031 cpu_set_t allowed_cpus;
1032
1033 memset(&buffer, 0, sizeof(buffer));
1034
1035 /* Generate list entries for every usable cpu. */
1036 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1037 for (i = 0; i < CPU_SETSIZE; i++) {
1038 if (!CPU_ISSET(i, &allowed_cpus))
1039 continue;
1040 /* Worse-case is every item in same CPU. */
1041 buffer.c[i].array =
1042 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1043 MEMCPY_BUFFER_ITEM_PER_CPU);
1044 assert(buffer.c[i].array);
1045 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1046 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1047 expected_sum += 2 * j + 1;
1048
1049 /*
1050 * We could theoretically put the word-sized
1051 * "data" directly in the buffer. However, we
1052 * want to model objects that would not fit
1053 * within a single word, so allocate an object
1054 * for each node.
1055 */
1056 buffer.c[i].array[j - 1].data1 = j;
1057 buffer.c[i].array[j - 1].data2 = j + 1;
1058 buffer.c[i].offset++;
1059 }
1060 }
1061
1062 for (i = 0; i < num_threads; i++) {
1063 ret = pthread_create(&test_threads[i], NULL,
1064 test_percpu_memcpy_buffer_thread,
1065 &buffer);
1066 if (ret) {
1067 errno = ret;
1068 perror("pthread_create");
1069 abort();
1070 }
1071 }
1072
1073 for (i = 0; i < num_threads; i++) {
1074 ret = pthread_join(test_threads[i], NULL);
1075 if (ret) {
1076 errno = ret;
1077 perror("pthread_join");
1078 abort();
1079 }
1080 }
1081
1082 for (i = 0; i < CPU_SETSIZE; i++) {
1083 struct percpu_memcpy_buffer_node item;
1084
1085 if (!CPU_ISSET(i, &allowed_cpus))
1086 continue;
1087
1088 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1089 sum += item.data1;
1090 sum += item.data2;
1091 }
1092 free(buffer.c[i].array);
1093 }
1094
1095 /*
1096 * All entries should now be accounted for (unless some external
1097 * actor is interfering with our allowed affinity while this
1098 * test is running).
1099 */
1100 assert(sum == expected_sum);
1101 }
1102
test_signal_interrupt_handler(int signo)1103 static void test_signal_interrupt_handler(int signo)
1104 {
1105 signals_delivered++;
1106 }
1107
set_signal_handler(void)1108 static int set_signal_handler(void)
1109 {
1110 int ret = 0;
1111 struct sigaction sa;
1112 sigset_t sigset;
1113
1114 ret = sigemptyset(&sigset);
1115 if (ret < 0) {
1116 perror("sigemptyset");
1117 return ret;
1118 }
1119
1120 sa.sa_handler = test_signal_interrupt_handler;
1121 sa.sa_mask = sigset;
1122 sa.sa_flags = 0;
1123 ret = sigaction(SIGUSR1, &sa, NULL);
1124 if (ret < 0) {
1125 perror("sigaction");
1126 return ret;
1127 }
1128
1129 printf_verbose("Signal handler set for SIGUSR1\n");
1130
1131 return ret;
1132 }
1133
1134 struct test_membarrier_thread_args {
1135 int stop;
1136 intptr_t percpu_list_ptr;
1137 };
1138
1139 /* Worker threads modify data in their "active" percpu lists. */
test_membarrier_worker_thread(void * arg)1140 void *test_membarrier_worker_thread(void *arg)
1141 {
1142 struct test_membarrier_thread_args *args =
1143 (struct test_membarrier_thread_args *)arg;
1144 const int iters = opt_reps;
1145 int i;
1146
1147 if (rseq_register_current_thread()) {
1148 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1149 errno, strerror(errno));
1150 abort();
1151 }
1152
1153 /* Wait for initialization. */
1154 while (!atomic_load(&args->percpu_list_ptr)) {}
1155
1156 for (i = 0; i < iters; ++i) {
1157 int ret;
1158
1159 do {
1160 int cpu = rseq_cpu_start();
1161
1162 ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
1163 sizeof(struct percpu_list_entry) * cpu, 1, cpu);
1164 } while (rseq_unlikely(ret));
1165 }
1166
1167 if (rseq_unregister_current_thread()) {
1168 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1169 errno, strerror(errno));
1170 abort();
1171 }
1172 return NULL;
1173 }
1174
test_membarrier_init_percpu_list(struct percpu_list * list)1175 void test_membarrier_init_percpu_list(struct percpu_list *list)
1176 {
1177 int i;
1178
1179 memset(list, 0, sizeof(*list));
1180 for (i = 0; i < CPU_SETSIZE; i++) {
1181 struct percpu_list_node *node;
1182
1183 node = malloc(sizeof(*node));
1184 assert(node);
1185 node->data = 0;
1186 node->next = NULL;
1187 list->c[i].head = node;
1188 }
1189 }
1190
test_membarrier_free_percpu_list(struct percpu_list * list)1191 void test_membarrier_free_percpu_list(struct percpu_list *list)
1192 {
1193 int i;
1194
1195 for (i = 0; i < CPU_SETSIZE; i++)
1196 free(list->c[i].head);
1197 }
1198
sys_membarrier(int cmd,int flags,int cpu_id)1199 static int sys_membarrier(int cmd, int flags, int cpu_id)
1200 {
1201 return syscall(__NR_membarrier, cmd, flags, cpu_id);
1202 }
1203
1204 /*
1205 * The manager thread swaps per-cpu lists that worker threads see,
1206 * and validates that there are no unexpected modifications.
1207 */
test_membarrier_manager_thread(void * arg)1208 void *test_membarrier_manager_thread(void *arg)
1209 {
1210 struct test_membarrier_thread_args *args =
1211 (struct test_membarrier_thread_args *)arg;
1212 struct percpu_list list_a, list_b;
1213 intptr_t expect_a = 0, expect_b = 0;
1214 int cpu_a = 0, cpu_b = 0;
1215
1216 if (rseq_register_current_thread()) {
1217 fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
1218 errno, strerror(errno));
1219 abort();
1220 }
1221
1222 /* Init lists. */
1223 test_membarrier_init_percpu_list(&list_a);
1224 test_membarrier_init_percpu_list(&list_b);
1225
1226 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1227
1228 while (!atomic_load(&args->stop)) {
1229 /* list_a is "active". */
1230 cpu_a = rand() % CPU_SETSIZE;
1231 /*
1232 * As list_b is "inactive", we should never see changes
1233 * to list_b.
1234 */
1235 if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
1236 fprintf(stderr, "Membarrier test failed\n");
1237 abort();
1238 }
1239
1240 /* Make list_b "active". */
1241 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
1242 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1243 MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
1244 errno != ENXIO /* missing CPU */) {
1245 perror("sys_membarrier");
1246 abort();
1247 }
1248 /*
1249 * Cpu A should now only modify list_b, so the values
1250 * in list_a should be stable.
1251 */
1252 expect_a = atomic_load(&list_a.c[cpu_a].head->data);
1253
1254 cpu_b = rand() % CPU_SETSIZE;
1255 /*
1256 * As list_a is "inactive", we should never see changes
1257 * to list_a.
1258 */
1259 if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
1260 fprintf(stderr, "Membarrier test failed\n");
1261 abort();
1262 }
1263
1264 /* Make list_a "active". */
1265 atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
1266 if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
1267 MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
1268 errno != ENXIO /* missing CPU*/) {
1269 perror("sys_membarrier");
1270 abort();
1271 }
1272 /* Remember a value from list_b. */
1273 expect_b = atomic_load(&list_b.c[cpu_b].head->data);
1274 }
1275
1276 test_membarrier_free_percpu_list(&list_a);
1277 test_membarrier_free_percpu_list(&list_b);
1278
1279 if (rseq_unregister_current_thread()) {
1280 fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
1281 errno, strerror(errno));
1282 abort();
1283 }
1284 return NULL;
1285 }
1286
1287 /* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
1288 #ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
test_membarrier(void)1289 void test_membarrier(void)
1290 {
1291 const int num_threads = opt_threads;
1292 struct test_membarrier_thread_args thread_args;
1293 pthread_t worker_threads[num_threads];
1294 pthread_t manager_thread;
1295 int i, ret;
1296
1297 if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
1298 perror("sys_membarrier");
1299 abort();
1300 }
1301
1302 thread_args.stop = 0;
1303 thread_args.percpu_list_ptr = 0;
1304 ret = pthread_create(&manager_thread, NULL,
1305 test_membarrier_manager_thread, &thread_args);
1306 if (ret) {
1307 errno = ret;
1308 perror("pthread_create");
1309 abort();
1310 }
1311
1312 for (i = 0; i < num_threads; i++) {
1313 ret = pthread_create(&worker_threads[i], NULL,
1314 test_membarrier_worker_thread, &thread_args);
1315 if (ret) {
1316 errno = ret;
1317 perror("pthread_create");
1318 abort();
1319 }
1320 }
1321
1322
1323 for (i = 0; i < num_threads; i++) {
1324 ret = pthread_join(worker_threads[i], NULL);
1325 if (ret) {
1326 errno = ret;
1327 perror("pthread_join");
1328 abort();
1329 }
1330 }
1331
1332 atomic_store(&thread_args.stop, 1);
1333 ret = pthread_join(manager_thread, NULL);
1334 if (ret) {
1335 errno = ret;
1336 perror("pthread_join");
1337 abort();
1338 }
1339 }
1340 #else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
test_membarrier(void)1341 void test_membarrier(void)
1342 {
1343 fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
1344 "Skipping membarrier test.\n");
1345 }
1346 #endif
1347
show_usage(int argc,char ** argv)1348 static void show_usage(int argc, char **argv)
1349 {
1350 printf("Usage : %s <OPTIONS>\n",
1351 argv[0]);
1352 printf("OPTIONS:\n");
1353 printf(" [-1 loops] Number of loops for delay injection 1\n");
1354 printf(" [-2 loops] Number of loops for delay injection 2\n");
1355 printf(" [-3 loops] Number of loops for delay injection 3\n");
1356 printf(" [-4 loops] Number of loops for delay injection 4\n");
1357 printf(" [-5 loops] Number of loops for delay injection 5\n");
1358 printf(" [-6 loops] Number of loops for delay injection 6\n");
1359 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1360 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1361 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1362 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1363 printf(" [-y] Yield\n");
1364 printf(" [-k] Kill thread with signal\n");
1365 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1366 printf(" [-t N] Number of threads (default 200)\n");
1367 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1368 printf(" [-d] Disable rseq system call (no initialization)\n");
1369 printf(" [-D M] Disable rseq for each M threads\n");
1370 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
1371 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1372 printf(" [-v] Verbose output.\n");
1373 printf(" [-h] Show this help.\n");
1374 printf("\n");
1375 }
1376
main(int argc,char ** argv)1377 int main(int argc, char **argv)
1378 {
1379 int i;
1380
1381 for (i = 1; i < argc; i++) {
1382 if (argv[i][0] != '-')
1383 continue;
1384 switch (argv[i][1]) {
1385 case '1':
1386 case '2':
1387 case '3':
1388 case '4':
1389 case '5':
1390 case '6':
1391 case '7':
1392 case '8':
1393 case '9':
1394 if (argc < i + 2) {
1395 show_usage(argc, argv);
1396 goto error;
1397 }
1398 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1399 i++;
1400 break;
1401 case 'm':
1402 if (argc < i + 2) {
1403 show_usage(argc, argv);
1404 goto error;
1405 }
1406 opt_modulo = atol(argv[i + 1]);
1407 if (opt_modulo < 0) {
1408 show_usage(argc, argv);
1409 goto error;
1410 }
1411 i++;
1412 break;
1413 case 's':
1414 if (argc < i + 2) {
1415 show_usage(argc, argv);
1416 goto error;
1417 }
1418 opt_sleep = atol(argv[i + 1]);
1419 if (opt_sleep < 0) {
1420 show_usage(argc, argv);
1421 goto error;
1422 }
1423 i++;
1424 break;
1425 case 'y':
1426 opt_yield = 1;
1427 break;
1428 case 'k':
1429 opt_signal = 1;
1430 break;
1431 case 'd':
1432 opt_disable_rseq = 1;
1433 break;
1434 case 'D':
1435 if (argc < i + 2) {
1436 show_usage(argc, argv);
1437 goto error;
1438 }
1439 opt_disable_mod = atol(argv[i + 1]);
1440 if (opt_disable_mod < 0) {
1441 show_usage(argc, argv);
1442 goto error;
1443 }
1444 i++;
1445 break;
1446 case 't':
1447 if (argc < i + 2) {
1448 show_usage(argc, argv);
1449 goto error;
1450 }
1451 opt_threads = atol(argv[i + 1]);
1452 if (opt_threads < 0) {
1453 show_usage(argc, argv);
1454 goto error;
1455 }
1456 i++;
1457 break;
1458 case 'r':
1459 if (argc < i + 2) {
1460 show_usage(argc, argv);
1461 goto error;
1462 }
1463 opt_reps = atoll(argv[i + 1]);
1464 if (opt_reps < 0) {
1465 show_usage(argc, argv);
1466 goto error;
1467 }
1468 i++;
1469 break;
1470 case 'h':
1471 show_usage(argc, argv);
1472 goto end;
1473 case 'T':
1474 if (argc < i + 2) {
1475 show_usage(argc, argv);
1476 goto error;
1477 }
1478 opt_test = *argv[i + 1];
1479 switch (opt_test) {
1480 case 's':
1481 case 'l':
1482 case 'i':
1483 case 'b':
1484 case 'm':
1485 case 'r':
1486 break;
1487 default:
1488 show_usage(argc, argv);
1489 goto error;
1490 }
1491 i++;
1492 break;
1493 case 'v':
1494 verbose = 1;
1495 break;
1496 case 'M':
1497 opt_mb = 1;
1498 break;
1499 default:
1500 show_usage(argc, argv);
1501 goto error;
1502 }
1503 }
1504
1505 loop_cnt_1 = loop_cnt[1];
1506 loop_cnt_2 = loop_cnt[2];
1507 loop_cnt_3 = loop_cnt[3];
1508 loop_cnt_4 = loop_cnt[4];
1509 loop_cnt_5 = loop_cnt[5];
1510 loop_cnt_6 = loop_cnt[6];
1511
1512 if (set_signal_handler())
1513 goto error;
1514
1515 if (!opt_disable_rseq && rseq_register_current_thread())
1516 goto error;
1517 switch (opt_test) {
1518 case 's':
1519 printf_verbose("spinlock\n");
1520 test_percpu_spinlock();
1521 break;
1522 case 'l':
1523 printf_verbose("linked list\n");
1524 test_percpu_list();
1525 break;
1526 case 'b':
1527 printf_verbose("buffer\n");
1528 test_percpu_buffer();
1529 break;
1530 case 'm':
1531 printf_verbose("memcpy buffer\n");
1532 test_percpu_memcpy_buffer();
1533 break;
1534 case 'i':
1535 printf_verbose("counter increment\n");
1536 test_percpu_inc();
1537 break;
1538 case 'r':
1539 printf_verbose("membarrier\n");
1540 test_membarrier();
1541 break;
1542 }
1543 if (!opt_disable_rseq && rseq_unregister_current_thread())
1544 abort();
1545 end:
1546 return 0;
1547
1548 error:
1549 return -1;
1550 }
1551