1 // SPDX-License-Identifier: LGPL-2.1
2 #define _GNU_SOURCE
3 #include <assert.h>
4 #include <pthread.h>
5 #include <sched.h>
6 #include <stdint.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10 #include <syscall.h>
11 #include <unistd.h>
12 #include <poll.h>
13 #include <sys/types.h>
14 #include <signal.h>
15 #include <errno.h>
16 #include <stddef.h>
17
gettid(void)18 static inline pid_t gettid(void)
19 {
20 return syscall(__NR_gettid);
21 }
22
23 #define NR_INJECT 9
24 static int loop_cnt[NR_INJECT + 1];
25
26 static int loop_cnt_1 asm("asm_loop_cnt_1") __attribute__((used));
27 static int loop_cnt_2 asm("asm_loop_cnt_2") __attribute__((used));
28 static int loop_cnt_3 asm("asm_loop_cnt_3") __attribute__((used));
29 static int loop_cnt_4 asm("asm_loop_cnt_4") __attribute__((used));
30 static int loop_cnt_5 asm("asm_loop_cnt_5") __attribute__((used));
31 static int loop_cnt_6 asm("asm_loop_cnt_6") __attribute__((used));
32
33 static int opt_modulo, verbose;
34
35 static int opt_yield, opt_signal, opt_sleep,
36 opt_disable_rseq, opt_threads = 200,
37 opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
38
39 #ifndef RSEQ_SKIP_FASTPATH
40 static long long opt_reps = 5000;
41 #else
42 static long long opt_reps = 100;
43 #endif
44
45 static __thread __attribute__((tls_model("initial-exec")))
46 unsigned int signals_delivered;
47
48 #ifndef BENCHMARK
49
50 static __thread __attribute__((tls_model("initial-exec"), unused))
51 unsigned int yield_mod_cnt, nr_abort;
52
53 #define printf_verbose(fmt, ...) \
54 do { \
55 if (verbose) \
56 printf(fmt, ## __VA_ARGS__); \
57 } while (0)
58
59 #ifdef __i386__
60
61 #define INJECT_ASM_REG "eax"
62
63 #define RSEQ_INJECT_CLOBBER \
64 , INJECT_ASM_REG
65
66 #define RSEQ_INJECT_ASM(n) \
67 "mov asm_loop_cnt_" #n ", %%" INJECT_ASM_REG "\n\t" \
68 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
69 "jz 333f\n\t" \
70 "222:\n\t" \
71 "dec %%" INJECT_ASM_REG "\n\t" \
72 "jnz 222b\n\t" \
73 "333:\n\t"
74
75 #elif defined(__x86_64__)
76
77 #define INJECT_ASM_REG_P "rax"
78 #define INJECT_ASM_REG "eax"
79
80 #define RSEQ_INJECT_CLOBBER \
81 , INJECT_ASM_REG_P \
82 , INJECT_ASM_REG
83
84 #define RSEQ_INJECT_ASM(n) \
85 "lea asm_loop_cnt_" #n "(%%rip), %%" INJECT_ASM_REG_P "\n\t" \
86 "mov (%%" INJECT_ASM_REG_P "), %%" INJECT_ASM_REG "\n\t" \
87 "test %%" INJECT_ASM_REG ",%%" INJECT_ASM_REG "\n\t" \
88 "jz 333f\n\t" \
89 "222:\n\t" \
90 "dec %%" INJECT_ASM_REG "\n\t" \
91 "jnz 222b\n\t" \
92 "333:\n\t"
93
94 #elif defined(__s390__)
95
96 #define RSEQ_INJECT_INPUT \
97 , [loop_cnt_1]"m"(loop_cnt[1]) \
98 , [loop_cnt_2]"m"(loop_cnt[2]) \
99 , [loop_cnt_3]"m"(loop_cnt[3]) \
100 , [loop_cnt_4]"m"(loop_cnt[4]) \
101 , [loop_cnt_5]"m"(loop_cnt[5]) \
102 , [loop_cnt_6]"m"(loop_cnt[6])
103
104 #define INJECT_ASM_REG "r12"
105
106 #define RSEQ_INJECT_CLOBBER \
107 , INJECT_ASM_REG
108
109 #define RSEQ_INJECT_ASM(n) \
110 "l %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
111 "ltr %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG "\n\t" \
112 "je 333f\n\t" \
113 "222:\n\t" \
114 "ahi %%" INJECT_ASM_REG ", -1\n\t" \
115 "jnz 222b\n\t" \
116 "333:\n\t"
117
118 #elif defined(__ARMEL__)
119
120 #define RSEQ_INJECT_INPUT \
121 , [loop_cnt_1]"m"(loop_cnt[1]) \
122 , [loop_cnt_2]"m"(loop_cnt[2]) \
123 , [loop_cnt_3]"m"(loop_cnt[3]) \
124 , [loop_cnt_4]"m"(loop_cnt[4]) \
125 , [loop_cnt_5]"m"(loop_cnt[5]) \
126 , [loop_cnt_6]"m"(loop_cnt[6])
127
128 #define INJECT_ASM_REG "r4"
129
130 #define RSEQ_INJECT_CLOBBER \
131 , INJECT_ASM_REG
132
133 #define RSEQ_INJECT_ASM(n) \
134 "ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
135 "cmp " INJECT_ASM_REG ", #0\n\t" \
136 "beq 333f\n\t" \
137 "222:\n\t" \
138 "subs " INJECT_ASM_REG ", #1\n\t" \
139 "bne 222b\n\t" \
140 "333:\n\t"
141
142 #elif defined(__AARCH64EL__)
143
144 #define RSEQ_INJECT_INPUT \
145 , [loop_cnt_1] "Qo" (loop_cnt[1]) \
146 , [loop_cnt_2] "Qo" (loop_cnt[2]) \
147 , [loop_cnt_3] "Qo" (loop_cnt[3]) \
148 , [loop_cnt_4] "Qo" (loop_cnt[4]) \
149 , [loop_cnt_5] "Qo" (loop_cnt[5]) \
150 , [loop_cnt_6] "Qo" (loop_cnt[6])
151
152 #define INJECT_ASM_REG RSEQ_ASM_TMP_REG32
153
154 #define RSEQ_INJECT_ASM(n) \
155 " ldr " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n" \
156 " cbz " INJECT_ASM_REG ", 333f\n" \
157 "222:\n" \
158 " sub " INJECT_ASM_REG ", " INJECT_ASM_REG ", #1\n" \
159 " cbnz " INJECT_ASM_REG ", 222b\n" \
160 "333:\n"
161
162 #elif __PPC__
163
164 #define RSEQ_INJECT_INPUT \
165 , [loop_cnt_1]"m"(loop_cnt[1]) \
166 , [loop_cnt_2]"m"(loop_cnt[2]) \
167 , [loop_cnt_3]"m"(loop_cnt[3]) \
168 , [loop_cnt_4]"m"(loop_cnt[4]) \
169 , [loop_cnt_5]"m"(loop_cnt[5]) \
170 , [loop_cnt_6]"m"(loop_cnt[6])
171
172 #define INJECT_ASM_REG "r18"
173
174 #define RSEQ_INJECT_CLOBBER \
175 , INJECT_ASM_REG
176
177 #define RSEQ_INJECT_ASM(n) \
178 "lwz %%" INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
179 "cmpwi %%" INJECT_ASM_REG ", 0\n\t" \
180 "beq 333f\n\t" \
181 "222:\n\t" \
182 "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
183 "bne 222b\n\t" \
184 "333:\n\t"
185
186 #elif defined(__mips__)
187
188 #define RSEQ_INJECT_INPUT \
189 , [loop_cnt_1]"m"(loop_cnt[1]) \
190 , [loop_cnt_2]"m"(loop_cnt[2]) \
191 , [loop_cnt_3]"m"(loop_cnt[3]) \
192 , [loop_cnt_4]"m"(loop_cnt[4]) \
193 , [loop_cnt_5]"m"(loop_cnt[5]) \
194 , [loop_cnt_6]"m"(loop_cnt[6])
195
196 #define INJECT_ASM_REG "$5"
197
198 #define RSEQ_INJECT_CLOBBER \
199 , INJECT_ASM_REG
200
201 #define RSEQ_INJECT_ASM(n) \
202 "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
203 "beqz " INJECT_ASM_REG ", 333f\n\t" \
204 "222:\n\t" \
205 "addiu " INJECT_ASM_REG ", -1\n\t" \
206 "bnez " INJECT_ASM_REG ", 222b\n\t" \
207 "333:\n\t"
208
209 #else
210 #error unsupported target
211 #endif
212
213 #define RSEQ_INJECT_FAILED \
214 nr_abort++;
215
216 #define RSEQ_INJECT_C(n) \
217 { \
218 int loc_i, loc_nr_loops = loop_cnt[n]; \
219 \
220 for (loc_i = 0; loc_i < loc_nr_loops; loc_i++) { \
221 rseq_barrier(); \
222 } \
223 if (loc_nr_loops == -1 && opt_modulo) { \
224 if (yield_mod_cnt == opt_modulo - 1) { \
225 if (opt_sleep > 0) \
226 poll(NULL, 0, opt_sleep); \
227 if (opt_yield) \
228 sched_yield(); \
229 if (opt_signal) \
230 raise(SIGUSR1); \
231 yield_mod_cnt = 0; \
232 } else { \
233 yield_mod_cnt++; \
234 } \
235 } \
236 }
237
238 #else
239
240 #define printf_verbose(fmt, ...)
241
242 #endif /* BENCHMARK */
243
244 #include "rseq.h"
245
246 struct percpu_lock_entry {
247 intptr_t v;
248 } __attribute__((aligned(128)));
249
250 struct percpu_lock {
251 struct percpu_lock_entry c[CPU_SETSIZE];
252 };
253
254 struct test_data_entry {
255 intptr_t count;
256 } __attribute__((aligned(128)));
257
258 struct spinlock_test_data {
259 struct percpu_lock lock;
260 struct test_data_entry c[CPU_SETSIZE];
261 };
262
263 struct spinlock_thread_test_data {
264 struct spinlock_test_data *data;
265 long long reps;
266 int reg;
267 };
268
269 struct inc_test_data {
270 struct test_data_entry c[CPU_SETSIZE];
271 };
272
273 struct inc_thread_test_data {
274 struct inc_test_data *data;
275 long long reps;
276 int reg;
277 };
278
279 struct percpu_list_node {
280 intptr_t data;
281 struct percpu_list_node *next;
282 };
283
284 struct percpu_list_entry {
285 struct percpu_list_node *head;
286 } __attribute__((aligned(128)));
287
288 struct percpu_list {
289 struct percpu_list_entry c[CPU_SETSIZE];
290 };
291
292 #define BUFFER_ITEM_PER_CPU 100
293
294 struct percpu_buffer_node {
295 intptr_t data;
296 };
297
298 struct percpu_buffer_entry {
299 intptr_t offset;
300 intptr_t buflen;
301 struct percpu_buffer_node **array;
302 } __attribute__((aligned(128)));
303
304 struct percpu_buffer {
305 struct percpu_buffer_entry c[CPU_SETSIZE];
306 };
307
308 #define MEMCPY_BUFFER_ITEM_PER_CPU 100
309
310 struct percpu_memcpy_buffer_node {
311 intptr_t data1;
312 uint64_t data2;
313 };
314
315 struct percpu_memcpy_buffer_entry {
316 intptr_t offset;
317 intptr_t buflen;
318 struct percpu_memcpy_buffer_node *array;
319 } __attribute__((aligned(128)));
320
321 struct percpu_memcpy_buffer {
322 struct percpu_memcpy_buffer_entry c[CPU_SETSIZE];
323 };
324
325 /* A simple percpu spinlock. Grabs lock on current cpu. */
rseq_this_cpu_lock(struct percpu_lock * lock)326 static int rseq_this_cpu_lock(struct percpu_lock *lock)
327 {
328 int cpu;
329
330 for (;;) {
331 int ret;
332
333 cpu = rseq_cpu_start();
334 ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
335 0, 1, cpu);
336 if (rseq_likely(!ret))
337 break;
338 /* Retry if comparison fails or rseq aborts. */
339 }
340 /*
341 * Acquire semantic when taking lock after control dependency.
342 * Matches rseq_smp_store_release().
343 */
344 rseq_smp_acquire__after_ctrl_dep();
345 return cpu;
346 }
347
rseq_percpu_unlock(struct percpu_lock * lock,int cpu)348 static void rseq_percpu_unlock(struct percpu_lock *lock, int cpu)
349 {
350 assert(lock->c[cpu].v == 1);
351 /*
352 * Release lock, with release semantic. Matches
353 * rseq_smp_acquire__after_ctrl_dep().
354 */
355 rseq_smp_store_release(&lock->c[cpu].v, 0);
356 }
357
test_percpu_spinlock_thread(void * arg)358 void *test_percpu_spinlock_thread(void *arg)
359 {
360 struct spinlock_thread_test_data *thread_data = arg;
361 struct spinlock_test_data *data = thread_data->data;
362 long long i, reps;
363
364 if (!opt_disable_rseq && thread_data->reg &&
365 rseq_register_current_thread())
366 abort();
367 reps = thread_data->reps;
368 for (i = 0; i < reps; i++) {
369 int cpu = rseq_cpu_start();
370
371 cpu = rseq_this_cpu_lock(&data->lock);
372 data->c[cpu].count++;
373 rseq_percpu_unlock(&data->lock, cpu);
374 #ifndef BENCHMARK
375 if (i != 0 && !(i % (reps / 10)))
376 printf_verbose("tid %d: count %lld\n", (int) gettid(), i);
377 #endif
378 }
379 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
380 (int) gettid(), nr_abort, signals_delivered);
381 if (!opt_disable_rseq && thread_data->reg &&
382 rseq_unregister_current_thread())
383 abort();
384 return NULL;
385 }
386
387 /*
388 * A simple test which implements a sharded counter using a per-cpu
389 * lock. Obviously real applications might prefer to simply use a
390 * per-cpu increment; however, this is reasonable for a test and the
391 * lock can be extended to synchronize more complicated operations.
392 */
test_percpu_spinlock(void)393 void test_percpu_spinlock(void)
394 {
395 const int num_threads = opt_threads;
396 int i, ret;
397 uint64_t sum;
398 pthread_t test_threads[num_threads];
399 struct spinlock_test_data data;
400 struct spinlock_thread_test_data thread_data[num_threads];
401
402 memset(&data, 0, sizeof(data));
403 for (i = 0; i < num_threads; i++) {
404 thread_data[i].reps = opt_reps;
405 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
406 thread_data[i].reg = 1;
407 else
408 thread_data[i].reg = 0;
409 thread_data[i].data = &data;
410 ret = pthread_create(&test_threads[i], NULL,
411 test_percpu_spinlock_thread,
412 &thread_data[i]);
413 if (ret) {
414 errno = ret;
415 perror("pthread_create");
416 abort();
417 }
418 }
419
420 for (i = 0; i < num_threads; i++) {
421 ret = pthread_join(test_threads[i], NULL);
422 if (ret) {
423 errno = ret;
424 perror("pthread_join");
425 abort();
426 }
427 }
428
429 sum = 0;
430 for (i = 0; i < CPU_SETSIZE; i++)
431 sum += data.c[i].count;
432
433 assert(sum == (uint64_t)opt_reps * num_threads);
434 }
435
test_percpu_inc_thread(void * arg)436 void *test_percpu_inc_thread(void *arg)
437 {
438 struct inc_thread_test_data *thread_data = arg;
439 struct inc_test_data *data = thread_data->data;
440 long long i, reps;
441
442 if (!opt_disable_rseq && thread_data->reg &&
443 rseq_register_current_thread())
444 abort();
445 reps = thread_data->reps;
446 for (i = 0; i < reps; i++) {
447 int ret;
448
449 do {
450 int cpu;
451
452 cpu = rseq_cpu_start();
453 ret = rseq_addv(&data->c[cpu].count, 1, cpu);
454 } while (rseq_unlikely(ret));
455 #ifndef BENCHMARK
456 if (i != 0 && !(i % (reps / 10)))
457 printf_verbose("tid %d: count %lld\n", (int) gettid(), i);
458 #endif
459 }
460 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
461 (int) gettid(), nr_abort, signals_delivered);
462 if (!opt_disable_rseq && thread_data->reg &&
463 rseq_unregister_current_thread())
464 abort();
465 return NULL;
466 }
467
test_percpu_inc(void)468 void test_percpu_inc(void)
469 {
470 const int num_threads = opt_threads;
471 int i, ret;
472 uint64_t sum;
473 pthread_t test_threads[num_threads];
474 struct inc_test_data data;
475 struct inc_thread_test_data thread_data[num_threads];
476
477 memset(&data, 0, sizeof(data));
478 for (i = 0; i < num_threads; i++) {
479 thread_data[i].reps = opt_reps;
480 if (opt_disable_mod <= 0 || (i % opt_disable_mod))
481 thread_data[i].reg = 1;
482 else
483 thread_data[i].reg = 0;
484 thread_data[i].data = &data;
485 ret = pthread_create(&test_threads[i], NULL,
486 test_percpu_inc_thread,
487 &thread_data[i]);
488 if (ret) {
489 errno = ret;
490 perror("pthread_create");
491 abort();
492 }
493 }
494
495 for (i = 0; i < num_threads; i++) {
496 ret = pthread_join(test_threads[i], NULL);
497 if (ret) {
498 errno = ret;
499 perror("pthread_join");
500 abort();
501 }
502 }
503
504 sum = 0;
505 for (i = 0; i < CPU_SETSIZE; i++)
506 sum += data.c[i].count;
507
508 assert(sum == (uint64_t)opt_reps * num_threads);
509 }
510
this_cpu_list_push(struct percpu_list * list,struct percpu_list_node * node,int * _cpu)511 void this_cpu_list_push(struct percpu_list *list,
512 struct percpu_list_node *node,
513 int *_cpu)
514 {
515 int cpu;
516
517 for (;;) {
518 intptr_t *targetptr, newval, expect;
519 int ret;
520
521 cpu = rseq_cpu_start();
522 /* Load list->c[cpu].head with single-copy atomicity. */
523 expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
524 newval = (intptr_t)node;
525 targetptr = (intptr_t *)&list->c[cpu].head;
526 node->next = (struct percpu_list_node *)expect;
527 ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
528 if (rseq_likely(!ret))
529 break;
530 /* Retry if comparison fails or rseq aborts. */
531 }
532 if (_cpu)
533 *_cpu = cpu;
534 }
535
536 /*
537 * Unlike a traditional lock-less linked list; the availability of a
538 * rseq primitive allows us to implement pop without concerns over
539 * ABA-type races.
540 */
this_cpu_list_pop(struct percpu_list * list,int * _cpu)541 struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
542 int *_cpu)
543 {
544 struct percpu_list_node *node = NULL;
545 int cpu;
546
547 for (;;) {
548 struct percpu_list_node *head;
549 intptr_t *targetptr, expectnot, *load;
550 off_t offset;
551 int ret;
552
553 cpu = rseq_cpu_start();
554 targetptr = (intptr_t *)&list->c[cpu].head;
555 expectnot = (intptr_t)NULL;
556 offset = offsetof(struct percpu_list_node, next);
557 load = (intptr_t *)&head;
558 ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
559 offset, load, cpu);
560 if (rseq_likely(!ret)) {
561 node = head;
562 break;
563 }
564 if (ret > 0)
565 break;
566 /* Retry if rseq aborts. */
567 }
568 if (_cpu)
569 *_cpu = cpu;
570 return node;
571 }
572
573 /*
574 * __percpu_list_pop is not safe against concurrent accesses. Should
575 * only be used on lists that are not concurrently modified.
576 */
__percpu_list_pop(struct percpu_list * list,int cpu)577 struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
578 {
579 struct percpu_list_node *node;
580
581 node = list->c[cpu].head;
582 if (!node)
583 return NULL;
584 list->c[cpu].head = node->next;
585 return node;
586 }
587
test_percpu_list_thread(void * arg)588 void *test_percpu_list_thread(void *arg)
589 {
590 long long i, reps;
591 struct percpu_list *list = (struct percpu_list *)arg;
592
593 if (!opt_disable_rseq && rseq_register_current_thread())
594 abort();
595
596 reps = opt_reps;
597 for (i = 0; i < reps; i++) {
598 struct percpu_list_node *node;
599
600 node = this_cpu_list_pop(list, NULL);
601 if (opt_yield)
602 sched_yield(); /* encourage shuffling */
603 if (node)
604 this_cpu_list_push(list, node, NULL);
605 }
606
607 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
608 (int) gettid(), nr_abort, signals_delivered);
609 if (!opt_disable_rseq && rseq_unregister_current_thread())
610 abort();
611
612 return NULL;
613 }
614
615 /* Simultaneous modification to a per-cpu linked list from many threads. */
test_percpu_list(void)616 void test_percpu_list(void)
617 {
618 const int num_threads = opt_threads;
619 int i, j, ret;
620 uint64_t sum = 0, expected_sum = 0;
621 struct percpu_list list;
622 pthread_t test_threads[num_threads];
623 cpu_set_t allowed_cpus;
624
625 memset(&list, 0, sizeof(list));
626
627 /* Generate list entries for every usable cpu. */
628 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
629 for (i = 0; i < CPU_SETSIZE; i++) {
630 if (!CPU_ISSET(i, &allowed_cpus))
631 continue;
632 for (j = 1; j <= 100; j++) {
633 struct percpu_list_node *node;
634
635 expected_sum += j;
636
637 node = malloc(sizeof(*node));
638 assert(node);
639 node->data = j;
640 node->next = list.c[i].head;
641 list.c[i].head = node;
642 }
643 }
644
645 for (i = 0; i < num_threads; i++) {
646 ret = pthread_create(&test_threads[i], NULL,
647 test_percpu_list_thread, &list);
648 if (ret) {
649 errno = ret;
650 perror("pthread_create");
651 abort();
652 }
653 }
654
655 for (i = 0; i < num_threads; i++) {
656 ret = pthread_join(test_threads[i], NULL);
657 if (ret) {
658 errno = ret;
659 perror("pthread_join");
660 abort();
661 }
662 }
663
664 for (i = 0; i < CPU_SETSIZE; i++) {
665 struct percpu_list_node *node;
666
667 if (!CPU_ISSET(i, &allowed_cpus))
668 continue;
669
670 while ((node = __percpu_list_pop(&list, i))) {
671 sum += node->data;
672 free(node);
673 }
674 }
675
676 /*
677 * All entries should now be accounted for (unless some external
678 * actor is interfering with our allowed affinity while this
679 * test is running).
680 */
681 assert(sum == expected_sum);
682 }
683
this_cpu_buffer_push(struct percpu_buffer * buffer,struct percpu_buffer_node * node,int * _cpu)684 bool this_cpu_buffer_push(struct percpu_buffer *buffer,
685 struct percpu_buffer_node *node,
686 int *_cpu)
687 {
688 bool result = false;
689 int cpu;
690
691 for (;;) {
692 intptr_t *targetptr_spec, newval_spec;
693 intptr_t *targetptr_final, newval_final;
694 intptr_t offset;
695 int ret;
696
697 cpu = rseq_cpu_start();
698 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
699 if (offset == buffer->c[cpu].buflen)
700 break;
701 newval_spec = (intptr_t)node;
702 targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
703 newval_final = offset + 1;
704 targetptr_final = &buffer->c[cpu].offset;
705 if (opt_mb)
706 ret = rseq_cmpeqv_trystorev_storev_release(
707 targetptr_final, offset, targetptr_spec,
708 newval_spec, newval_final, cpu);
709 else
710 ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
711 offset, targetptr_spec, newval_spec,
712 newval_final, cpu);
713 if (rseq_likely(!ret)) {
714 result = true;
715 break;
716 }
717 /* Retry if comparison fails or rseq aborts. */
718 }
719 if (_cpu)
720 *_cpu = cpu;
721 return result;
722 }
723
this_cpu_buffer_pop(struct percpu_buffer * buffer,int * _cpu)724 struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
725 int *_cpu)
726 {
727 struct percpu_buffer_node *head;
728 int cpu;
729
730 for (;;) {
731 intptr_t *targetptr, newval;
732 intptr_t offset;
733 int ret;
734
735 cpu = rseq_cpu_start();
736 /* Load offset with single-copy atomicity. */
737 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
738 if (offset == 0) {
739 head = NULL;
740 break;
741 }
742 head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
743 newval = offset - 1;
744 targetptr = (intptr_t *)&buffer->c[cpu].offset;
745 ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
746 (intptr_t *)&buffer->c[cpu].array[offset - 1],
747 (intptr_t)head, newval, cpu);
748 if (rseq_likely(!ret))
749 break;
750 /* Retry if comparison fails or rseq aborts. */
751 }
752 if (_cpu)
753 *_cpu = cpu;
754 return head;
755 }
756
757 /*
758 * __percpu_buffer_pop is not safe against concurrent accesses. Should
759 * only be used on buffers that are not concurrently modified.
760 */
__percpu_buffer_pop(struct percpu_buffer * buffer,int cpu)761 struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer,
762 int cpu)
763 {
764 struct percpu_buffer_node *head;
765 intptr_t offset;
766
767 offset = buffer->c[cpu].offset;
768 if (offset == 0)
769 return NULL;
770 head = buffer->c[cpu].array[offset - 1];
771 buffer->c[cpu].offset = offset - 1;
772 return head;
773 }
774
test_percpu_buffer_thread(void * arg)775 void *test_percpu_buffer_thread(void *arg)
776 {
777 long long i, reps;
778 struct percpu_buffer *buffer = (struct percpu_buffer *)arg;
779
780 if (!opt_disable_rseq && rseq_register_current_thread())
781 abort();
782
783 reps = opt_reps;
784 for (i = 0; i < reps; i++) {
785 struct percpu_buffer_node *node;
786
787 node = this_cpu_buffer_pop(buffer, NULL);
788 if (opt_yield)
789 sched_yield(); /* encourage shuffling */
790 if (node) {
791 if (!this_cpu_buffer_push(buffer, node, NULL)) {
792 /* Should increase buffer size. */
793 abort();
794 }
795 }
796 }
797
798 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
799 (int) gettid(), nr_abort, signals_delivered);
800 if (!opt_disable_rseq && rseq_unregister_current_thread())
801 abort();
802
803 return NULL;
804 }
805
806 /* Simultaneous modification to a per-cpu buffer from many threads. */
test_percpu_buffer(void)807 void test_percpu_buffer(void)
808 {
809 const int num_threads = opt_threads;
810 int i, j, ret;
811 uint64_t sum = 0, expected_sum = 0;
812 struct percpu_buffer buffer;
813 pthread_t test_threads[num_threads];
814 cpu_set_t allowed_cpus;
815
816 memset(&buffer, 0, sizeof(buffer));
817
818 /* Generate list entries for every usable cpu. */
819 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
820 for (i = 0; i < CPU_SETSIZE; i++) {
821 if (!CPU_ISSET(i, &allowed_cpus))
822 continue;
823 /* Worse-case is every item in same CPU. */
824 buffer.c[i].array =
825 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
826 BUFFER_ITEM_PER_CPU);
827 assert(buffer.c[i].array);
828 buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU;
829 for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) {
830 struct percpu_buffer_node *node;
831
832 expected_sum += j;
833
834 /*
835 * We could theoretically put the word-sized
836 * "data" directly in the buffer. However, we
837 * want to model objects that would not fit
838 * within a single word, so allocate an object
839 * for each node.
840 */
841 node = malloc(sizeof(*node));
842 assert(node);
843 node->data = j;
844 buffer.c[i].array[j - 1] = node;
845 buffer.c[i].offset++;
846 }
847 }
848
849 for (i = 0; i < num_threads; i++) {
850 ret = pthread_create(&test_threads[i], NULL,
851 test_percpu_buffer_thread, &buffer);
852 if (ret) {
853 errno = ret;
854 perror("pthread_create");
855 abort();
856 }
857 }
858
859 for (i = 0; i < num_threads; i++) {
860 ret = pthread_join(test_threads[i], NULL);
861 if (ret) {
862 errno = ret;
863 perror("pthread_join");
864 abort();
865 }
866 }
867
868 for (i = 0; i < CPU_SETSIZE; i++) {
869 struct percpu_buffer_node *node;
870
871 if (!CPU_ISSET(i, &allowed_cpus))
872 continue;
873
874 while ((node = __percpu_buffer_pop(&buffer, i))) {
875 sum += node->data;
876 free(node);
877 }
878 free(buffer.c[i].array);
879 }
880
881 /*
882 * All entries should now be accounted for (unless some external
883 * actor is interfering with our allowed affinity while this
884 * test is running).
885 */
886 assert(sum == expected_sum);
887 }
888
this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node item,int * _cpu)889 bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
890 struct percpu_memcpy_buffer_node item,
891 int *_cpu)
892 {
893 bool result = false;
894 int cpu;
895
896 for (;;) {
897 intptr_t *targetptr_final, newval_final, offset;
898 char *destptr, *srcptr;
899 size_t copylen;
900 int ret;
901
902 cpu = rseq_cpu_start();
903 /* Load offset with single-copy atomicity. */
904 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
905 if (offset == buffer->c[cpu].buflen)
906 break;
907 destptr = (char *)&buffer->c[cpu].array[offset];
908 srcptr = (char *)&item;
909 /* copylen must be <= 4kB. */
910 copylen = sizeof(item);
911 newval_final = offset + 1;
912 targetptr_final = &buffer->c[cpu].offset;
913 if (opt_mb)
914 ret = rseq_cmpeqv_trymemcpy_storev_release(
915 targetptr_final, offset,
916 destptr, srcptr, copylen,
917 newval_final, cpu);
918 else
919 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
920 offset, destptr, srcptr, copylen,
921 newval_final, cpu);
922 if (rseq_likely(!ret)) {
923 result = true;
924 break;
925 }
926 /* Retry if comparison fails or rseq aborts. */
927 }
928 if (_cpu)
929 *_cpu = cpu;
930 return result;
931 }
932
this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node * item,int * _cpu)933 bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
934 struct percpu_memcpy_buffer_node *item,
935 int *_cpu)
936 {
937 bool result = false;
938 int cpu;
939
940 for (;;) {
941 intptr_t *targetptr_final, newval_final, offset;
942 char *destptr, *srcptr;
943 size_t copylen;
944 int ret;
945
946 cpu = rseq_cpu_start();
947 /* Load offset with single-copy atomicity. */
948 offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
949 if (offset == 0)
950 break;
951 destptr = (char *)item;
952 srcptr = (char *)&buffer->c[cpu].array[offset - 1];
953 /* copylen must be <= 4kB. */
954 copylen = sizeof(*item);
955 newval_final = offset - 1;
956 targetptr_final = &buffer->c[cpu].offset;
957 ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
958 offset, destptr, srcptr, copylen,
959 newval_final, cpu);
960 if (rseq_likely(!ret)) {
961 result = true;
962 break;
963 }
964 /* Retry if comparison fails or rseq aborts. */
965 }
966 if (_cpu)
967 *_cpu = cpu;
968 return result;
969 }
970
971 /*
972 * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should
973 * only be used on buffers that are not concurrently modified.
974 */
__percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer * buffer,struct percpu_memcpy_buffer_node * item,int cpu)975 bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
976 struct percpu_memcpy_buffer_node *item,
977 int cpu)
978 {
979 intptr_t offset;
980
981 offset = buffer->c[cpu].offset;
982 if (offset == 0)
983 return false;
984 memcpy(item, &buffer->c[cpu].array[offset - 1], sizeof(*item));
985 buffer->c[cpu].offset = offset - 1;
986 return true;
987 }
988
test_percpu_memcpy_buffer_thread(void * arg)989 void *test_percpu_memcpy_buffer_thread(void *arg)
990 {
991 long long i, reps;
992 struct percpu_memcpy_buffer *buffer = (struct percpu_memcpy_buffer *)arg;
993
994 if (!opt_disable_rseq && rseq_register_current_thread())
995 abort();
996
997 reps = opt_reps;
998 for (i = 0; i < reps; i++) {
999 struct percpu_memcpy_buffer_node item;
1000 bool result;
1001
1002 result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL);
1003 if (opt_yield)
1004 sched_yield(); /* encourage shuffling */
1005 if (result) {
1006 if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) {
1007 /* Should increase buffer size. */
1008 abort();
1009 }
1010 }
1011 }
1012
1013 printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
1014 (int) gettid(), nr_abort, signals_delivered);
1015 if (!opt_disable_rseq && rseq_unregister_current_thread())
1016 abort();
1017
1018 return NULL;
1019 }
1020
1021 /* Simultaneous modification to a per-cpu buffer from many threads. */
test_percpu_memcpy_buffer(void)1022 void test_percpu_memcpy_buffer(void)
1023 {
1024 const int num_threads = opt_threads;
1025 int i, j, ret;
1026 uint64_t sum = 0, expected_sum = 0;
1027 struct percpu_memcpy_buffer buffer;
1028 pthread_t test_threads[num_threads];
1029 cpu_set_t allowed_cpus;
1030
1031 memset(&buffer, 0, sizeof(buffer));
1032
1033 /* Generate list entries for every usable cpu. */
1034 sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
1035 for (i = 0; i < CPU_SETSIZE; i++) {
1036 if (!CPU_ISSET(i, &allowed_cpus))
1037 continue;
1038 /* Worse-case is every item in same CPU. */
1039 buffer.c[i].array =
1040 malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
1041 MEMCPY_BUFFER_ITEM_PER_CPU);
1042 assert(buffer.c[i].array);
1043 buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU;
1044 for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
1045 expected_sum += 2 * j + 1;
1046
1047 /*
1048 * We could theoretically put the word-sized
1049 * "data" directly in the buffer. However, we
1050 * want to model objects that would not fit
1051 * within a single word, so allocate an object
1052 * for each node.
1053 */
1054 buffer.c[i].array[j - 1].data1 = j;
1055 buffer.c[i].array[j - 1].data2 = j + 1;
1056 buffer.c[i].offset++;
1057 }
1058 }
1059
1060 for (i = 0; i < num_threads; i++) {
1061 ret = pthread_create(&test_threads[i], NULL,
1062 test_percpu_memcpy_buffer_thread,
1063 &buffer);
1064 if (ret) {
1065 errno = ret;
1066 perror("pthread_create");
1067 abort();
1068 }
1069 }
1070
1071 for (i = 0; i < num_threads; i++) {
1072 ret = pthread_join(test_threads[i], NULL);
1073 if (ret) {
1074 errno = ret;
1075 perror("pthread_join");
1076 abort();
1077 }
1078 }
1079
1080 for (i = 0; i < CPU_SETSIZE; i++) {
1081 struct percpu_memcpy_buffer_node item;
1082
1083 if (!CPU_ISSET(i, &allowed_cpus))
1084 continue;
1085
1086 while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
1087 sum += item.data1;
1088 sum += item.data2;
1089 }
1090 free(buffer.c[i].array);
1091 }
1092
1093 /*
1094 * All entries should now be accounted for (unless some external
1095 * actor is interfering with our allowed affinity while this
1096 * test is running).
1097 */
1098 assert(sum == expected_sum);
1099 }
1100
test_signal_interrupt_handler(int signo)1101 static void test_signal_interrupt_handler(int signo)
1102 {
1103 signals_delivered++;
1104 }
1105
set_signal_handler(void)1106 static int set_signal_handler(void)
1107 {
1108 int ret = 0;
1109 struct sigaction sa;
1110 sigset_t sigset;
1111
1112 ret = sigemptyset(&sigset);
1113 if (ret < 0) {
1114 perror("sigemptyset");
1115 return ret;
1116 }
1117
1118 sa.sa_handler = test_signal_interrupt_handler;
1119 sa.sa_mask = sigset;
1120 sa.sa_flags = 0;
1121 ret = sigaction(SIGUSR1, &sa, NULL);
1122 if (ret < 0) {
1123 perror("sigaction");
1124 return ret;
1125 }
1126
1127 printf_verbose("Signal handler set for SIGUSR1\n");
1128
1129 return ret;
1130 }
1131
show_usage(int argc,char ** argv)1132 static void show_usage(int argc, char **argv)
1133 {
1134 printf("Usage : %s <OPTIONS>\n",
1135 argv[0]);
1136 printf("OPTIONS:\n");
1137 printf(" [-1 loops] Number of loops for delay injection 1\n");
1138 printf(" [-2 loops] Number of loops for delay injection 2\n");
1139 printf(" [-3 loops] Number of loops for delay injection 3\n");
1140 printf(" [-4 loops] Number of loops for delay injection 4\n");
1141 printf(" [-5 loops] Number of loops for delay injection 5\n");
1142 printf(" [-6 loops] Number of loops for delay injection 6\n");
1143 printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
1144 printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
1145 printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
1146 printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
1147 printf(" [-y] Yield\n");
1148 printf(" [-k] Kill thread with signal\n");
1149 printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
1150 printf(" [-t N] Number of threads (default 200)\n");
1151 printf(" [-r N] Number of repetitions per thread (default 5000)\n");
1152 printf(" [-d] Disable rseq system call (no initialization)\n");
1153 printf(" [-D M] Disable rseq for each M threads\n");
1154 printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
1155 printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
1156 printf(" [-v] Verbose output.\n");
1157 printf(" [-h] Show this help.\n");
1158 printf("\n");
1159 }
1160
main(int argc,char ** argv)1161 int main(int argc, char **argv)
1162 {
1163 int i;
1164
1165 for (i = 1; i < argc; i++) {
1166 if (argv[i][0] != '-')
1167 continue;
1168 switch (argv[i][1]) {
1169 case '1':
1170 case '2':
1171 case '3':
1172 case '4':
1173 case '5':
1174 case '6':
1175 case '7':
1176 case '8':
1177 case '9':
1178 if (argc < i + 2) {
1179 show_usage(argc, argv);
1180 goto error;
1181 }
1182 loop_cnt[argv[i][1] - '0'] = atol(argv[i + 1]);
1183 i++;
1184 break;
1185 case 'm':
1186 if (argc < i + 2) {
1187 show_usage(argc, argv);
1188 goto error;
1189 }
1190 opt_modulo = atol(argv[i + 1]);
1191 if (opt_modulo < 0) {
1192 show_usage(argc, argv);
1193 goto error;
1194 }
1195 i++;
1196 break;
1197 case 's':
1198 if (argc < i + 2) {
1199 show_usage(argc, argv);
1200 goto error;
1201 }
1202 opt_sleep = atol(argv[i + 1]);
1203 if (opt_sleep < 0) {
1204 show_usage(argc, argv);
1205 goto error;
1206 }
1207 i++;
1208 break;
1209 case 'y':
1210 opt_yield = 1;
1211 break;
1212 case 'k':
1213 opt_signal = 1;
1214 break;
1215 case 'd':
1216 opt_disable_rseq = 1;
1217 break;
1218 case 'D':
1219 if (argc < i + 2) {
1220 show_usage(argc, argv);
1221 goto error;
1222 }
1223 opt_disable_mod = atol(argv[i + 1]);
1224 if (opt_disable_mod < 0) {
1225 show_usage(argc, argv);
1226 goto error;
1227 }
1228 i++;
1229 break;
1230 case 't':
1231 if (argc < i + 2) {
1232 show_usage(argc, argv);
1233 goto error;
1234 }
1235 opt_threads = atol(argv[i + 1]);
1236 if (opt_threads < 0) {
1237 show_usage(argc, argv);
1238 goto error;
1239 }
1240 i++;
1241 break;
1242 case 'r':
1243 if (argc < i + 2) {
1244 show_usage(argc, argv);
1245 goto error;
1246 }
1247 opt_reps = atoll(argv[i + 1]);
1248 if (opt_reps < 0) {
1249 show_usage(argc, argv);
1250 goto error;
1251 }
1252 i++;
1253 break;
1254 case 'h':
1255 show_usage(argc, argv);
1256 goto end;
1257 case 'T':
1258 if (argc < i + 2) {
1259 show_usage(argc, argv);
1260 goto error;
1261 }
1262 opt_test = *argv[i + 1];
1263 switch (opt_test) {
1264 case 's':
1265 case 'l':
1266 case 'i':
1267 case 'b':
1268 case 'm':
1269 break;
1270 default:
1271 show_usage(argc, argv);
1272 goto error;
1273 }
1274 i++;
1275 break;
1276 case 'v':
1277 verbose = 1;
1278 break;
1279 case 'M':
1280 opt_mb = 1;
1281 break;
1282 default:
1283 show_usage(argc, argv);
1284 goto error;
1285 }
1286 }
1287
1288 loop_cnt_1 = loop_cnt[1];
1289 loop_cnt_2 = loop_cnt[2];
1290 loop_cnt_3 = loop_cnt[3];
1291 loop_cnt_4 = loop_cnt[4];
1292 loop_cnt_5 = loop_cnt[5];
1293 loop_cnt_6 = loop_cnt[6];
1294
1295 if (set_signal_handler())
1296 goto error;
1297
1298 if (!opt_disable_rseq && rseq_register_current_thread())
1299 goto error;
1300 switch (opt_test) {
1301 case 's':
1302 printf_verbose("spinlock\n");
1303 test_percpu_spinlock();
1304 break;
1305 case 'l':
1306 printf_verbose("linked list\n");
1307 test_percpu_list();
1308 break;
1309 case 'b':
1310 printf_verbose("buffer\n");
1311 test_percpu_buffer();
1312 break;
1313 case 'm':
1314 printf_verbose("memcpy buffer\n");
1315 test_percpu_memcpy_buffer();
1316 break;
1317 case 'i':
1318 printf_verbose("counter increment\n");
1319 test_percpu_inc();
1320 break;
1321 }
1322 if (!opt_disable_rseq && rseq_unregister_current_thread())
1323 abort();
1324 end:
1325 return 0;
1326
1327 error:
1328 return -1;
1329 }
1330