1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Stress userfaultfd syscall.
4 *
5 * Copyright (C) 2015 Red Hat, Inc.
6 *
7 * This test allocates two virtual areas and bounces the physical
8 * memory across the two virtual areas (from area_src to area_dst)
9 * using userfaultfd.
10 *
11 * There are three threads running per CPU:
12 *
13 * 1) one per-CPU thread takes a per-page pthread_mutex in a random
14 * page of the area_dst (while the physical page may still be in
15 * area_src), and increments a per-page counter in the same page,
16 * and checks its value against a verification region.
17 *
18 * 2) another per-CPU thread handles the userfaults generated by
19 * thread 1 above. userfaultfd blocking reads or poll() modes are
20 * exercised interleaved.
21 *
22 * 3) one last per-CPU thread transfers the memory in the background
23 * at maximum bandwidth (if not already transferred by thread
24 * 2). Each cpu thread takes cares of transferring a portion of the
25 * area.
26 *
27 * When all threads of type 3 completed the transfer, one bounce is
28 * complete. area_src and area_dst are then swapped. All threads are
29 * respawned and so the bounce is immediately restarted in the
30 * opposite direction.
31 *
32 * per-CPU threads 1 by triggering userfaults inside
33 * pthread_mutex_lock will also verify the atomicity of the memory
34 * transfer (UFFDIO_COPY).
35 */
36
37 #define _GNU_SOURCE
38 #include <stdio.h>
39 #include <errno.h>
40 #include <unistd.h>
41 #include <stdlib.h>
42 #include <sys/types.h>
43 #include <sys/stat.h>
44 #include <fcntl.h>
45 #include <time.h>
46 #include <signal.h>
47 #include <poll.h>
48 #include <string.h>
49 #include <sys/mman.h>
50 #include <sys/syscall.h>
51 #include <sys/ioctl.h>
52 #include <sys/wait.h>
53 #include <pthread.h>
54 #include <linux/userfaultfd.h>
55 #include <setjmp.h>
56 #include <stdbool.h>
57 #include <assert.h>
58
59 #include "../kselftest.h"
60
61 #ifdef __NR_userfaultfd
62
63 static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
64
65 #define BOUNCE_RANDOM (1<<0)
66 #define BOUNCE_RACINGFAULTS (1<<1)
67 #define BOUNCE_VERIFY (1<<2)
68 #define BOUNCE_POLL (1<<3)
69 static int bounces;
70
71 #define TEST_ANON 1
72 #define TEST_HUGETLB 2
73 #define TEST_SHMEM 3
74 static int test_type;
75
76 /* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
77 #define ALARM_INTERVAL_SECS 10
78 static volatile bool test_uffdio_copy_eexist = true;
79 static volatile bool test_uffdio_zeropage_eexist = true;
80 /* Whether to test uffd write-protection */
81 static bool test_uffdio_wp = false;
82
83 static bool map_shared;
84 static int huge_fd;
85 static char *huge_fd_off0;
86 static unsigned long long *count_verify;
87 static int uffd, uffd_flags, finished, *pipefd;
88 static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
89 static char *zeropage;
90 pthread_attr_t attr;
91
92 /* Userfaultfd test statistics */
93 struct uffd_stats {
94 int cpu;
95 unsigned long missing_faults;
96 unsigned long wp_faults;
97 };
98
99 /* pthread_mutex_t starts at page offset 0 */
100 #define area_mutex(___area, ___nr) \
101 ((pthread_mutex_t *) ((___area) + (___nr)*page_size))
102 /*
103 * count is placed in the page after pthread_mutex_t naturally aligned
104 * to avoid non alignment faults on non-x86 archs.
105 */
106 #define area_count(___area, ___nr) \
107 ((volatile unsigned long long *) ((unsigned long) \
108 ((___area) + (___nr)*page_size + \
109 sizeof(pthread_mutex_t) + \
110 sizeof(unsigned long long) - 1) & \
111 ~(unsigned long)(sizeof(unsigned long long) \
112 - 1)))
113
114 const char *examples =
115 "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
116 "./userfaultfd anon 100 99999\n\n"
117 "# Run share memory test on 1GiB region with 99 bounces:\n"
118 "./userfaultfd shmem 1000 99\n\n"
119 "# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n"
120 "./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n"
121 "# Run the same hugetlb test but using shmem:\n"
122 "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n"
123 "# 10MiB-~6GiB 999 bounces anonymous test, "
124 "continue forever unless an error triggers\n"
125 "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
126
usage(void)127 static void usage(void)
128 {
129 fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces> "
130 "[hugetlbfs_file]\n\n");
131 fprintf(stderr, "Supported <test type>: anon, hugetlb, "
132 "hugetlb_shared, shmem\n\n");
133 fprintf(stderr, "Examples:\n\n");
134 fprintf(stderr, "%s", examples);
135 exit(1);
136 }
137
uffd_stats_reset(struct uffd_stats * uffd_stats,unsigned long n_cpus)138 static void uffd_stats_reset(struct uffd_stats *uffd_stats,
139 unsigned long n_cpus)
140 {
141 int i;
142
143 for (i = 0; i < n_cpus; i++) {
144 uffd_stats[i].cpu = i;
145 uffd_stats[i].missing_faults = 0;
146 uffd_stats[i].wp_faults = 0;
147 }
148 }
149
uffd_stats_report(struct uffd_stats * stats,int n_cpus)150 static void uffd_stats_report(struct uffd_stats *stats, int n_cpus)
151 {
152 int i;
153 unsigned long long miss_total = 0, wp_total = 0;
154
155 for (i = 0; i < n_cpus; i++) {
156 miss_total += stats[i].missing_faults;
157 wp_total += stats[i].wp_faults;
158 }
159
160 printf("userfaults: %llu missing (", miss_total);
161 for (i = 0; i < n_cpus; i++)
162 printf("%lu+", stats[i].missing_faults);
163 printf("\b), %llu wp (", wp_total);
164 for (i = 0; i < n_cpus; i++)
165 printf("%lu+", stats[i].wp_faults);
166 printf("\b)\n");
167 }
168
anon_release_pages(char * rel_area)169 static int anon_release_pages(char *rel_area)
170 {
171 int ret = 0;
172
173 if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) {
174 perror("madvise");
175 ret = 1;
176 }
177
178 return ret;
179 }
180
anon_allocate_area(void ** alloc_area)181 static void anon_allocate_area(void **alloc_area)
182 {
183 *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
184 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
185 if (*alloc_area == MAP_FAILED) {
186 fprintf(stderr, "mmap of anonymous memory failed");
187 *alloc_area = NULL;
188 }
189 }
190
noop_alias_mapping(__u64 * start,size_t len,unsigned long offset)191 static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
192 {
193 }
194
195 /* HugeTLB memory */
hugetlb_release_pages(char * rel_area)196 static int hugetlb_release_pages(char *rel_area)
197 {
198 int ret = 0;
199
200 if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
201 rel_area == huge_fd_off0 ? 0 :
202 nr_pages * page_size,
203 nr_pages * page_size)) {
204 perror("fallocate");
205 ret = 1;
206 }
207
208 return ret;
209 }
210
hugetlb_allocate_area(void ** alloc_area)211 static void hugetlb_allocate_area(void **alloc_area)
212 {
213 void *area_alias = NULL;
214 char **alloc_area_alias;
215
216 *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
217 (map_shared ? MAP_SHARED : MAP_PRIVATE) |
218 MAP_HUGETLB,
219 huge_fd, *alloc_area == area_src ? 0 :
220 nr_pages * page_size);
221 if (*alloc_area == MAP_FAILED) {
222 perror("mmap of hugetlbfs file failed");
223 goto fail;
224 }
225
226 if (map_shared) {
227 area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
228 MAP_SHARED | MAP_HUGETLB,
229 huge_fd, *alloc_area == area_src ? 0 :
230 nr_pages * page_size);
231 if (area_alias == MAP_FAILED) {
232 perror("mmap of hugetlb file alias failed");
233 goto fail_munmap;
234 }
235 }
236
237 if (*alloc_area == area_src) {
238 huge_fd_off0 = *alloc_area;
239 alloc_area_alias = &area_src_alias;
240 } else {
241 alloc_area_alias = &area_dst_alias;
242 }
243 if (area_alias)
244 *alloc_area_alias = area_alias;
245
246 return;
247
248 fail_munmap:
249 if (munmap(*alloc_area, nr_pages * page_size) < 0) {
250 perror("hugetlb munmap");
251 exit(1);
252 }
253 fail:
254 *alloc_area = NULL;
255 }
256
hugetlb_alias_mapping(__u64 * start,size_t len,unsigned long offset)257 static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
258 {
259 if (!map_shared)
260 return;
261 /*
262 * We can't zap just the pagetable with hugetlbfs because
263 * MADV_DONTEED won't work. So exercise -EEXIST on a alias
264 * mapping where the pagetables are not established initially,
265 * this way we'll exercise the -EEXEC at the fs level.
266 */
267 *start = (unsigned long) area_dst_alias + offset;
268 }
269
270 /* Shared memory */
shmem_release_pages(char * rel_area)271 static int shmem_release_pages(char *rel_area)
272 {
273 int ret = 0;
274
275 if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) {
276 perror("madvise");
277 ret = 1;
278 }
279
280 return ret;
281 }
282
shmem_allocate_area(void ** alloc_area)283 static void shmem_allocate_area(void **alloc_area)
284 {
285 *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
286 MAP_ANONYMOUS | MAP_SHARED, -1, 0);
287 if (*alloc_area == MAP_FAILED) {
288 fprintf(stderr, "shared memory mmap failed\n");
289 *alloc_area = NULL;
290 }
291 }
292
293 struct uffd_test_ops {
294 unsigned long expected_ioctls;
295 void (*allocate_area)(void **alloc_area);
296 int (*release_pages)(char *rel_area);
297 void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
298 };
299
300 #define SHMEM_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
301 (1 << _UFFDIO_COPY) | \
302 (1 << _UFFDIO_ZEROPAGE))
303
304 #define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
305 (1 << _UFFDIO_COPY) | \
306 (1 << _UFFDIO_ZEROPAGE) | \
307 (1 << _UFFDIO_WRITEPROTECT))
308
309 static struct uffd_test_ops anon_uffd_test_ops = {
310 .expected_ioctls = ANON_EXPECTED_IOCTLS,
311 .allocate_area = anon_allocate_area,
312 .release_pages = anon_release_pages,
313 .alias_mapping = noop_alias_mapping,
314 };
315
316 static struct uffd_test_ops shmem_uffd_test_ops = {
317 .expected_ioctls = SHMEM_EXPECTED_IOCTLS,
318 .allocate_area = shmem_allocate_area,
319 .release_pages = shmem_release_pages,
320 .alias_mapping = noop_alias_mapping,
321 };
322
323 static struct uffd_test_ops hugetlb_uffd_test_ops = {
324 .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC,
325 .allocate_area = hugetlb_allocate_area,
326 .release_pages = hugetlb_release_pages,
327 .alias_mapping = hugetlb_alias_mapping,
328 };
329
330 static struct uffd_test_ops *uffd_test_ops;
331
my_bcmp(char * str1,char * str2,size_t n)332 static int my_bcmp(char *str1, char *str2, size_t n)
333 {
334 unsigned long i;
335 for (i = 0; i < n; i++)
336 if (str1[i] != str2[i])
337 return 1;
338 return 0;
339 }
340
wp_range(int ufd,__u64 start,__u64 len,bool wp)341 static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
342 {
343 struct uffdio_writeprotect prms = { 0 };
344
345 /* Write protection page faults */
346 prms.range.start = start;
347 prms.range.len = len;
348 /* Undo write-protect, do wakeup after that */
349 prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
350
351 if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) {
352 fprintf(stderr, "clear WP failed for address 0x%Lx\n", start);
353 exit(1);
354 }
355 }
356
locking_thread(void * arg)357 static void *locking_thread(void *arg)
358 {
359 unsigned long cpu = (unsigned long) arg;
360 struct random_data rand;
361 unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */
362 int32_t rand_nr;
363 unsigned long long count;
364 char randstate[64];
365 unsigned int seed;
366 time_t start;
367
368 if (bounces & BOUNCE_RANDOM) {
369 seed = (unsigned int) time(NULL) - bounces;
370 if (!(bounces & BOUNCE_RACINGFAULTS))
371 seed += cpu;
372 bzero(&rand, sizeof(rand));
373 bzero(&randstate, sizeof(randstate));
374 if (initstate_r(seed, randstate, sizeof(randstate), &rand)) {
375 fprintf(stderr, "srandom_r error\n");
376 exit(1);
377 }
378 } else {
379 page_nr = -bounces;
380 if (!(bounces & BOUNCE_RACINGFAULTS))
381 page_nr += cpu * nr_pages_per_cpu;
382 }
383
384 while (!finished) {
385 if (bounces & BOUNCE_RANDOM) {
386 if (random_r(&rand, &rand_nr)) {
387 fprintf(stderr, "random_r 1 error\n");
388 exit(1);
389 }
390 page_nr = rand_nr;
391 if (sizeof(page_nr) > sizeof(rand_nr)) {
392 if (random_r(&rand, &rand_nr)) {
393 fprintf(stderr, "random_r 2 error\n");
394 exit(1);
395 }
396 page_nr |= (((unsigned long) rand_nr) << 16) <<
397 16;
398 }
399 } else
400 page_nr += 1;
401 page_nr %= nr_pages;
402
403 start = time(NULL);
404 if (bounces & BOUNCE_VERIFY) {
405 count = *area_count(area_dst, page_nr);
406 if (!count) {
407 fprintf(stderr,
408 "page_nr %lu wrong count %Lu %Lu\n",
409 page_nr, count,
410 count_verify[page_nr]);
411 exit(1);
412 }
413
414
415 /*
416 * We can't use bcmp (or memcmp) because that
417 * returns 0 erroneously if the memory is
418 * changing under it (even if the end of the
419 * page is never changing and always
420 * different).
421 */
422 #if 1
423 if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
424 page_size)) {
425 fprintf(stderr,
426 "my_bcmp page_nr %lu wrong count %Lu %Lu\n",
427 page_nr, count, count_verify[page_nr]);
428 exit(1);
429 }
430 #else
431 unsigned long loops;
432
433 loops = 0;
434 /* uncomment the below line to test with mutex */
435 /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
436 while (!bcmp(area_dst + page_nr * page_size, zeropage,
437 page_size)) {
438 loops += 1;
439 if (loops > 10)
440 break;
441 }
442 /* uncomment below line to test with mutex */
443 /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
444 if (loops) {
445 fprintf(stderr,
446 "page_nr %lu all zero thread %lu %p %lu\n",
447 page_nr, cpu, area_dst + page_nr * page_size,
448 loops);
449 if (loops > 10)
450 exit(1);
451 }
452 #endif
453 }
454
455 pthread_mutex_lock(area_mutex(area_dst, page_nr));
456 count = *area_count(area_dst, page_nr);
457 if (count != count_verify[page_nr]) {
458 fprintf(stderr,
459 "page_nr %lu memory corruption %Lu %Lu\n",
460 page_nr, count,
461 count_verify[page_nr]); exit(1);
462 }
463 count++;
464 *area_count(area_dst, page_nr) = count_verify[page_nr] = count;
465 pthread_mutex_unlock(area_mutex(area_dst, page_nr));
466
467 if (time(NULL) - start > 1)
468 fprintf(stderr,
469 "userfault too slow %ld "
470 "possible false positive with overcommit\n",
471 time(NULL) - start);
472 }
473
474 return NULL;
475 }
476
retry_copy_page(int ufd,struct uffdio_copy * uffdio_copy,unsigned long offset)477 static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
478 unsigned long offset)
479 {
480 uffd_test_ops->alias_mapping(&uffdio_copy->dst,
481 uffdio_copy->len,
482 offset);
483 if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
484 /* real retval in ufdio_copy.copy */
485 if (uffdio_copy->copy != -EEXIST) {
486 fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
487 uffdio_copy->copy);
488 exit(1);
489 }
490 } else {
491 fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n",
492 uffdio_copy->copy); exit(1);
493 }
494 }
495
__copy_page(int ufd,unsigned long offset,bool retry)496 static int __copy_page(int ufd, unsigned long offset, bool retry)
497 {
498 struct uffdio_copy uffdio_copy;
499
500 if (offset >= nr_pages * page_size) {
501 fprintf(stderr, "unexpected offset %lu\n", offset);
502 exit(1);
503 }
504 uffdio_copy.dst = (unsigned long) area_dst + offset;
505 uffdio_copy.src = (unsigned long) area_src + offset;
506 uffdio_copy.len = page_size;
507 if (test_uffdio_wp)
508 uffdio_copy.mode = UFFDIO_COPY_MODE_WP;
509 else
510 uffdio_copy.mode = 0;
511 uffdio_copy.copy = 0;
512 if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
513 /* real retval in ufdio_copy.copy */
514 if (uffdio_copy.copy != -EEXIST) {
515 fprintf(stderr, "UFFDIO_COPY error %Ld\n",
516 uffdio_copy.copy);
517 exit(1);
518 }
519 } else if (uffdio_copy.copy != page_size) {
520 fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
521 uffdio_copy.copy); exit(1);
522 } else {
523 if (test_uffdio_copy_eexist && retry) {
524 test_uffdio_copy_eexist = false;
525 retry_copy_page(ufd, &uffdio_copy, offset);
526 }
527 return 1;
528 }
529 return 0;
530 }
531
copy_page_retry(int ufd,unsigned long offset)532 static int copy_page_retry(int ufd, unsigned long offset)
533 {
534 return __copy_page(ufd, offset, true);
535 }
536
copy_page(int ufd,unsigned long offset)537 static int copy_page(int ufd, unsigned long offset)
538 {
539 return __copy_page(ufd, offset, false);
540 }
541
uffd_read_msg(int ufd,struct uffd_msg * msg)542 static int uffd_read_msg(int ufd, struct uffd_msg *msg)
543 {
544 int ret = read(uffd, msg, sizeof(*msg));
545
546 if (ret != sizeof(*msg)) {
547 if (ret < 0) {
548 if (errno == EAGAIN)
549 return 1;
550 perror("blocking read error");
551 } else {
552 fprintf(stderr, "short read\n");
553 }
554 exit(1);
555 }
556
557 return 0;
558 }
559
uffd_handle_page_fault(struct uffd_msg * msg,struct uffd_stats * stats)560 static void uffd_handle_page_fault(struct uffd_msg *msg,
561 struct uffd_stats *stats)
562 {
563 unsigned long offset;
564
565 if (msg->event != UFFD_EVENT_PAGEFAULT) {
566 fprintf(stderr, "unexpected msg event %u\n", msg->event);
567 exit(1);
568 }
569
570 if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
571 wp_range(uffd, msg->arg.pagefault.address, page_size, false);
572 stats->wp_faults++;
573 } else {
574 /* Missing page faults */
575 if (bounces & BOUNCE_VERIFY &&
576 msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) {
577 fprintf(stderr, "unexpected write fault\n");
578 exit(1);
579 }
580
581 offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
582 offset &= ~(page_size-1);
583
584 if (copy_page(uffd, offset))
585 stats->missing_faults++;
586 }
587 }
588
uffd_poll_thread(void * arg)589 static void *uffd_poll_thread(void *arg)
590 {
591 struct uffd_stats *stats = (struct uffd_stats *)arg;
592 unsigned long cpu = stats->cpu;
593 struct pollfd pollfd[2];
594 struct uffd_msg msg;
595 struct uffdio_register uffd_reg;
596 int ret;
597 char tmp_chr;
598
599 pollfd[0].fd = uffd;
600 pollfd[0].events = POLLIN;
601 pollfd[1].fd = pipefd[cpu*2];
602 pollfd[1].events = POLLIN;
603
604 for (;;) {
605 ret = poll(pollfd, 2, -1);
606 if (!ret) {
607 fprintf(stderr, "poll error %d\n", ret);
608 exit(1);
609 }
610 if (ret < 0) {
611 perror("poll");
612 exit(1);
613 }
614 if (pollfd[1].revents & POLLIN) {
615 if (read(pollfd[1].fd, &tmp_chr, 1) != 1) {
616 fprintf(stderr, "read pipefd error\n");
617 exit(1);
618 }
619 break;
620 }
621 if (!(pollfd[0].revents & POLLIN)) {
622 fprintf(stderr, "pollfd[0].revents %d\n",
623 pollfd[0].revents);
624 exit(1);
625 }
626 if (uffd_read_msg(uffd, &msg))
627 continue;
628 switch (msg.event) {
629 default:
630 fprintf(stderr, "unexpected msg event %u\n",
631 msg.event); exit(1);
632 break;
633 case UFFD_EVENT_PAGEFAULT:
634 uffd_handle_page_fault(&msg, stats);
635 break;
636 case UFFD_EVENT_FORK:
637 close(uffd);
638 uffd = msg.arg.fork.ufd;
639 pollfd[0].fd = uffd;
640 break;
641 case UFFD_EVENT_REMOVE:
642 uffd_reg.range.start = msg.arg.remove.start;
643 uffd_reg.range.len = msg.arg.remove.end -
644 msg.arg.remove.start;
645 if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) {
646 fprintf(stderr, "remove failure\n");
647 exit(1);
648 }
649 break;
650 case UFFD_EVENT_REMAP:
651 area_dst = (char *)(unsigned long)msg.arg.remap.to;
652 break;
653 }
654 }
655
656 return NULL;
657 }
658
659 pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
660
uffd_read_thread(void * arg)661 static void *uffd_read_thread(void *arg)
662 {
663 struct uffd_stats *stats = (struct uffd_stats *)arg;
664 struct uffd_msg msg;
665
666 pthread_mutex_unlock(&uffd_read_mutex);
667 /* from here cancellation is ok */
668
669 for (;;) {
670 if (uffd_read_msg(uffd, &msg))
671 continue;
672 uffd_handle_page_fault(&msg, stats);
673 }
674
675 return NULL;
676 }
677
background_thread(void * arg)678 static void *background_thread(void *arg)
679 {
680 unsigned long cpu = (unsigned long) arg;
681 unsigned long page_nr, start_nr, mid_nr, end_nr;
682
683 start_nr = cpu * nr_pages_per_cpu;
684 end_nr = (cpu+1) * nr_pages_per_cpu;
685 mid_nr = (start_nr + end_nr) / 2;
686
687 /* Copy the first half of the pages */
688 for (page_nr = start_nr; page_nr < mid_nr; page_nr++)
689 copy_page_retry(uffd, page_nr * page_size);
690
691 /*
692 * If we need to test uffd-wp, set it up now. Then we'll have
693 * at least the first half of the pages mapped already which
694 * can be write-protected for testing
695 */
696 if (test_uffdio_wp)
697 wp_range(uffd, (unsigned long)area_dst + start_nr * page_size,
698 nr_pages_per_cpu * page_size, true);
699
700 /*
701 * Continue the 2nd half of the page copying, handling write
702 * protection faults if any
703 */
704 for (page_nr = mid_nr; page_nr < end_nr; page_nr++)
705 copy_page_retry(uffd, page_nr * page_size);
706
707 return NULL;
708 }
709
stress(struct uffd_stats * uffd_stats)710 static int stress(struct uffd_stats *uffd_stats)
711 {
712 unsigned long cpu;
713 pthread_t locking_threads[nr_cpus];
714 pthread_t uffd_threads[nr_cpus];
715 pthread_t background_threads[nr_cpus];
716
717 finished = 0;
718 for (cpu = 0; cpu < nr_cpus; cpu++) {
719 if (pthread_create(&locking_threads[cpu], &attr,
720 locking_thread, (void *)cpu))
721 return 1;
722 if (bounces & BOUNCE_POLL) {
723 if (pthread_create(&uffd_threads[cpu], &attr,
724 uffd_poll_thread,
725 (void *)&uffd_stats[cpu]))
726 return 1;
727 } else {
728 if (pthread_create(&uffd_threads[cpu], &attr,
729 uffd_read_thread,
730 (void *)&uffd_stats[cpu]))
731 return 1;
732 pthread_mutex_lock(&uffd_read_mutex);
733 }
734 if (pthread_create(&background_threads[cpu], &attr,
735 background_thread, (void *)cpu))
736 return 1;
737 }
738 for (cpu = 0; cpu < nr_cpus; cpu++)
739 if (pthread_join(background_threads[cpu], NULL))
740 return 1;
741
742 /*
743 * Be strict and immediately zap area_src, the whole area has
744 * been transferred already by the background treads. The
745 * area_src could then be faulted in in a racy way by still
746 * running uffdio_threads reading zeropages after we zapped
747 * area_src (but they're guaranteed to get -EEXIST from
748 * UFFDIO_COPY without writing zero pages into area_dst
749 * because the background threads already completed).
750 */
751 if (uffd_test_ops->release_pages(area_src))
752 return 1;
753
754
755 finished = 1;
756 for (cpu = 0; cpu < nr_cpus; cpu++)
757 if (pthread_join(locking_threads[cpu], NULL))
758 return 1;
759
760 for (cpu = 0; cpu < nr_cpus; cpu++) {
761 char c;
762 if (bounces & BOUNCE_POLL) {
763 if (write(pipefd[cpu*2+1], &c, 1) != 1) {
764 fprintf(stderr, "pipefd write error\n");
765 return 1;
766 }
767 if (pthread_join(uffd_threads[cpu],
768 (void *)&uffd_stats[cpu]))
769 return 1;
770 } else {
771 if (pthread_cancel(uffd_threads[cpu]))
772 return 1;
773 if (pthread_join(uffd_threads[cpu], NULL))
774 return 1;
775 }
776 }
777
778 return 0;
779 }
780
userfaultfd_open(int features)781 static int userfaultfd_open(int features)
782 {
783 struct uffdio_api uffdio_api;
784
785 uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
786 if (uffd < 0) {
787 fprintf(stderr,
788 "userfaultfd syscall not available in this kernel\n");
789 return 1;
790 }
791 uffd_flags = fcntl(uffd, F_GETFD, NULL);
792
793 uffdio_api.api = UFFD_API;
794 uffdio_api.features = features;
795 if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
796 fprintf(stderr, "UFFDIO_API\n");
797 return 1;
798 }
799 if (uffdio_api.api != UFFD_API) {
800 fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api);
801 return 1;
802 }
803
804 return 0;
805 }
806
807 sigjmp_buf jbuf, *sigbuf;
808
sighndl(int sig,siginfo_t * siginfo,void * ptr)809 static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
810 {
811 if (sig == SIGBUS) {
812 if (sigbuf)
813 siglongjmp(*sigbuf, 1);
814 abort();
815 }
816 }
817
818 /*
819 * For non-cooperative userfaultfd test we fork() a process that will
820 * generate pagefaults, will mremap the area monitored by the
821 * userfaultfd and at last this process will release the monitored
822 * area.
823 * For the anonymous and shared memory the area is divided into two
824 * parts, the first part is accessed before mremap, and the second
825 * part is accessed after mremap. Since hugetlbfs does not support
826 * mremap, the entire monitored area is accessed in a single pass for
827 * HUGETLB_TEST.
828 * The release of the pages currently generates event for shmem and
829 * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
830 * for hugetlb.
831 * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
832 * monitored area, generate pagefaults and test that signal is delivered.
833 * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
834 * test robustness use case - we release monitored area, fork a process
835 * that will generate pagefaults and verify signal is generated.
836 * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
837 * feature. Using monitor thread, verify no userfault events are generated.
838 */
faulting_process(int signal_test)839 static int faulting_process(int signal_test)
840 {
841 unsigned long nr;
842 unsigned long long count;
843 unsigned long split_nr_pages;
844 unsigned long lastnr;
845 struct sigaction act;
846 unsigned long signalled = 0;
847
848 if (test_type != TEST_HUGETLB)
849 split_nr_pages = (nr_pages + 1) / 2;
850 else
851 split_nr_pages = nr_pages;
852
853 if (signal_test) {
854 sigbuf = &jbuf;
855 memset(&act, 0, sizeof(act));
856 act.sa_sigaction = sighndl;
857 act.sa_flags = SA_SIGINFO;
858 if (sigaction(SIGBUS, &act, 0)) {
859 perror("sigaction");
860 return 1;
861 }
862 lastnr = (unsigned long)-1;
863 }
864
865 for (nr = 0; nr < split_nr_pages; nr++) {
866 int steps = 1;
867 unsigned long offset = nr * page_size;
868
869 if (signal_test) {
870 if (sigsetjmp(*sigbuf, 1) != 0) {
871 if (steps == 1 && nr == lastnr) {
872 fprintf(stderr, "Signal repeated\n");
873 return 1;
874 }
875
876 lastnr = nr;
877 if (signal_test == 1) {
878 if (steps == 1) {
879 /* This is a MISSING request */
880 steps++;
881 if (copy_page(uffd, offset))
882 signalled++;
883 } else {
884 /* This is a WP request */
885 assert(steps == 2);
886 wp_range(uffd,
887 (__u64)area_dst +
888 offset,
889 page_size, false);
890 }
891 } else {
892 signalled++;
893 continue;
894 }
895 }
896 }
897
898 count = *area_count(area_dst, nr);
899 if (count != count_verify[nr]) {
900 fprintf(stderr,
901 "nr %lu memory corruption %Lu %Lu\n",
902 nr, count,
903 count_verify[nr]);
904 }
905 /*
906 * Trigger write protection if there is by writting
907 * the same value back.
908 */
909 *area_count(area_dst, nr) = count;
910 }
911
912 if (signal_test)
913 return signalled != split_nr_pages;
914
915 if (test_type == TEST_HUGETLB)
916 return 0;
917
918 area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
919 MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
920 if (area_dst == MAP_FAILED) {
921 perror("mremap");
922 exit(1);
923 }
924
925 for (; nr < nr_pages; nr++) {
926 count = *area_count(area_dst, nr);
927 if (count != count_verify[nr]) {
928 fprintf(stderr,
929 "nr %lu memory corruption %Lu %Lu\n",
930 nr, count,
931 count_verify[nr]); exit(1);
932 }
933 /*
934 * Trigger write protection if there is by writting
935 * the same value back.
936 */
937 *area_count(area_dst, nr) = count;
938 }
939
940 if (uffd_test_ops->release_pages(area_dst))
941 return 1;
942
943 for (nr = 0; nr < nr_pages; nr++) {
944 if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) {
945 fprintf(stderr, "nr %lu is not zero\n", nr);
946 exit(1);
947 }
948 }
949
950 return 0;
951 }
952
retry_uffdio_zeropage(int ufd,struct uffdio_zeropage * uffdio_zeropage,unsigned long offset)953 static void retry_uffdio_zeropage(int ufd,
954 struct uffdio_zeropage *uffdio_zeropage,
955 unsigned long offset)
956 {
957 uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start,
958 uffdio_zeropage->range.len,
959 offset);
960 if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
961 if (uffdio_zeropage->zeropage != -EEXIST) {
962 fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
963 uffdio_zeropage->zeropage);
964 exit(1);
965 }
966 } else {
967 fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
968 uffdio_zeropage->zeropage); exit(1);
969 }
970 }
971
__uffdio_zeropage(int ufd,unsigned long offset,bool retry)972 static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
973 {
974 struct uffdio_zeropage uffdio_zeropage;
975 int ret;
976 unsigned long has_zeropage;
977
978 has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
979
980 if (offset >= nr_pages * page_size) {
981 fprintf(stderr, "unexpected offset %lu\n", offset);
982 exit(1);
983 }
984 uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
985 uffdio_zeropage.range.len = page_size;
986 uffdio_zeropage.mode = 0;
987 ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
988 if (ret) {
989 /* real retval in ufdio_zeropage.zeropage */
990 if (has_zeropage) {
991 if (uffdio_zeropage.zeropage == -EEXIST) {
992 fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n");
993 exit(1);
994 } else {
995 fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
996 uffdio_zeropage.zeropage);
997 exit(1);
998 }
999 } else {
1000 if (uffdio_zeropage.zeropage != -EINVAL) {
1001 fprintf(stderr,
1002 "UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
1003 uffdio_zeropage.zeropage);
1004 exit(1);
1005 }
1006 }
1007 } else if (has_zeropage) {
1008 if (uffdio_zeropage.zeropage != page_size) {
1009 fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
1010 uffdio_zeropage.zeropage); exit(1);
1011 } else {
1012 if (test_uffdio_zeropage_eexist && retry) {
1013 test_uffdio_zeropage_eexist = false;
1014 retry_uffdio_zeropage(ufd, &uffdio_zeropage,
1015 offset);
1016 }
1017 return 1;
1018 }
1019 } else {
1020 fprintf(stderr,
1021 "UFFDIO_ZEROPAGE succeeded %Ld\n",
1022 uffdio_zeropage.zeropage); exit(1);
1023 }
1024
1025 return 0;
1026 }
1027
uffdio_zeropage(int ufd,unsigned long offset)1028 static int uffdio_zeropage(int ufd, unsigned long offset)
1029 {
1030 return __uffdio_zeropage(ufd, offset, false);
1031 }
1032
1033 /* exercise UFFDIO_ZEROPAGE */
userfaultfd_zeropage_test(void)1034 static int userfaultfd_zeropage_test(void)
1035 {
1036 struct uffdio_register uffdio_register;
1037 unsigned long expected_ioctls;
1038
1039 printf("testing UFFDIO_ZEROPAGE: ");
1040 fflush(stdout);
1041
1042 if (uffd_test_ops->release_pages(area_dst))
1043 return 1;
1044
1045 if (userfaultfd_open(0) < 0)
1046 return 1;
1047 uffdio_register.range.start = (unsigned long) area_dst;
1048 uffdio_register.range.len = nr_pages * page_size;
1049 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
1050 if (test_uffdio_wp)
1051 uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
1052 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
1053 fprintf(stderr, "register failure\n");
1054 exit(1);
1055 }
1056
1057 expected_ioctls = uffd_test_ops->expected_ioctls;
1058 if ((uffdio_register.ioctls & expected_ioctls) !=
1059 expected_ioctls) {
1060 fprintf(stderr,
1061 "unexpected missing ioctl for anon memory\n");
1062 exit(1);
1063 }
1064
1065 if (uffdio_zeropage(uffd, 0)) {
1066 if (my_bcmp(area_dst, zeropage, page_size)) {
1067 fprintf(stderr, "zeropage is not zero\n");
1068 exit(1);
1069 }
1070 }
1071
1072 close(uffd);
1073 printf("done.\n");
1074 return 0;
1075 }
1076
userfaultfd_events_test(void)1077 static int userfaultfd_events_test(void)
1078 {
1079 struct uffdio_register uffdio_register;
1080 unsigned long expected_ioctls;
1081 pthread_t uffd_mon;
1082 int err, features;
1083 pid_t pid;
1084 char c;
1085 struct uffd_stats stats = { 0 };
1086
1087 printf("testing events (fork, remap, remove): ");
1088 fflush(stdout);
1089
1090 if (uffd_test_ops->release_pages(area_dst))
1091 return 1;
1092
1093 features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
1094 UFFD_FEATURE_EVENT_REMOVE;
1095 if (userfaultfd_open(features) < 0)
1096 return 1;
1097 fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
1098
1099 uffdio_register.range.start = (unsigned long) area_dst;
1100 uffdio_register.range.len = nr_pages * page_size;
1101 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
1102 if (test_uffdio_wp)
1103 uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
1104 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
1105 fprintf(stderr, "register failure\n");
1106 exit(1);
1107 }
1108
1109 expected_ioctls = uffd_test_ops->expected_ioctls;
1110 if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
1111 fprintf(stderr, "unexpected missing ioctl for anon memory\n");
1112 exit(1);
1113 }
1114
1115 if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
1116 perror("uffd_poll_thread create");
1117 exit(1);
1118 }
1119
1120 pid = fork();
1121 if (pid < 0) {
1122 perror("fork");
1123 exit(1);
1124 }
1125
1126 if (!pid)
1127 return faulting_process(0);
1128
1129 waitpid(pid, &err, 0);
1130 if (err) {
1131 fprintf(stderr, "faulting process failed\n");
1132 exit(1);
1133 }
1134
1135 if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
1136 perror("pipe write");
1137 exit(1);
1138 }
1139 if (pthread_join(uffd_mon, NULL))
1140 return 1;
1141
1142 close(uffd);
1143
1144 uffd_stats_report(&stats, 1);
1145
1146 return stats.missing_faults != nr_pages;
1147 }
1148
userfaultfd_sig_test(void)1149 static int userfaultfd_sig_test(void)
1150 {
1151 struct uffdio_register uffdio_register;
1152 unsigned long expected_ioctls;
1153 unsigned long userfaults;
1154 pthread_t uffd_mon;
1155 int err, features;
1156 pid_t pid;
1157 char c;
1158 struct uffd_stats stats = { 0 };
1159
1160 printf("testing signal delivery: ");
1161 fflush(stdout);
1162
1163 if (uffd_test_ops->release_pages(area_dst))
1164 return 1;
1165
1166 features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
1167 if (userfaultfd_open(features) < 0)
1168 return 1;
1169 fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
1170
1171 uffdio_register.range.start = (unsigned long) area_dst;
1172 uffdio_register.range.len = nr_pages * page_size;
1173 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
1174 if (test_uffdio_wp)
1175 uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
1176 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
1177 fprintf(stderr, "register failure\n");
1178 exit(1);
1179 }
1180
1181 expected_ioctls = uffd_test_ops->expected_ioctls;
1182 if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
1183 fprintf(stderr, "unexpected missing ioctl for anon memory\n");
1184 exit(1);
1185 }
1186
1187 if (faulting_process(1)) {
1188 fprintf(stderr, "faulting process failed\n");
1189 exit(1);
1190 }
1191
1192 if (uffd_test_ops->release_pages(area_dst))
1193 return 1;
1194
1195 if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
1196 perror("uffd_poll_thread create");
1197 exit(1);
1198 }
1199
1200 pid = fork();
1201 if (pid < 0) {
1202 perror("fork");
1203 exit(1);
1204 }
1205
1206 if (!pid)
1207 exit(faulting_process(2));
1208
1209 waitpid(pid, &err, 0);
1210 if (err) {
1211 fprintf(stderr, "faulting process failed\n");
1212 exit(1);
1213 }
1214
1215 if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
1216 perror("pipe write");
1217 exit(1);
1218 }
1219 if (pthread_join(uffd_mon, (void **)&userfaults))
1220 return 1;
1221
1222 printf("done.\n");
1223 if (userfaults)
1224 fprintf(stderr, "Signal test failed, userfaults: %ld\n",
1225 userfaults);
1226 close(uffd);
1227 return userfaults != 0;
1228 }
1229
userfaultfd_stress(void)1230 static int userfaultfd_stress(void)
1231 {
1232 void *area;
1233 char *tmp_area;
1234 unsigned long nr;
1235 struct uffdio_register uffdio_register;
1236 unsigned long cpu;
1237 int err;
1238 struct uffd_stats uffd_stats[nr_cpus];
1239
1240 uffd_test_ops->allocate_area((void **)&area_src);
1241 if (!area_src)
1242 return 1;
1243 uffd_test_ops->allocate_area((void **)&area_dst);
1244 if (!area_dst)
1245 return 1;
1246
1247 if (userfaultfd_open(0) < 0)
1248 return 1;
1249
1250 count_verify = malloc(nr_pages * sizeof(unsigned long long));
1251 if (!count_verify) {
1252 perror("count_verify");
1253 return 1;
1254 }
1255
1256 for (nr = 0; nr < nr_pages; nr++) {
1257 *area_mutex(area_src, nr) = (pthread_mutex_t)
1258 PTHREAD_MUTEX_INITIALIZER;
1259 count_verify[nr] = *area_count(area_src, nr) = 1;
1260 /*
1261 * In the transition between 255 to 256, powerpc will
1262 * read out of order in my_bcmp and see both bytes as
1263 * zero, so leave a placeholder below always non-zero
1264 * after the count, to avoid my_bcmp to trigger false
1265 * positives.
1266 */
1267 *(area_count(area_src, nr) + 1) = 1;
1268 }
1269
1270 pipefd = malloc(sizeof(int) * nr_cpus * 2);
1271 if (!pipefd) {
1272 perror("pipefd");
1273 return 1;
1274 }
1275 for (cpu = 0; cpu < nr_cpus; cpu++) {
1276 if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) {
1277 perror("pipe");
1278 return 1;
1279 }
1280 }
1281
1282 if (posix_memalign(&area, page_size, page_size)) {
1283 fprintf(stderr, "out of memory\n");
1284 return 1;
1285 }
1286 zeropage = area;
1287 bzero(zeropage, page_size);
1288
1289 pthread_mutex_lock(&uffd_read_mutex);
1290
1291 pthread_attr_init(&attr);
1292 pthread_attr_setstacksize(&attr, 16*1024*1024);
1293
1294 err = 0;
1295 while (bounces--) {
1296 unsigned long expected_ioctls;
1297
1298 printf("bounces: %d, mode:", bounces);
1299 if (bounces & BOUNCE_RANDOM)
1300 printf(" rnd");
1301 if (bounces & BOUNCE_RACINGFAULTS)
1302 printf(" racing");
1303 if (bounces & BOUNCE_VERIFY)
1304 printf(" ver");
1305 if (bounces & BOUNCE_POLL)
1306 printf(" poll");
1307 printf(", ");
1308 fflush(stdout);
1309
1310 if (bounces & BOUNCE_POLL)
1311 fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
1312 else
1313 fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK);
1314
1315 /* register */
1316 uffdio_register.range.start = (unsigned long) area_dst;
1317 uffdio_register.range.len = nr_pages * page_size;
1318 uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
1319 if (test_uffdio_wp)
1320 uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
1321 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
1322 fprintf(stderr, "register failure\n");
1323 return 1;
1324 }
1325 expected_ioctls = uffd_test_ops->expected_ioctls;
1326 if ((uffdio_register.ioctls & expected_ioctls) !=
1327 expected_ioctls) {
1328 fprintf(stderr,
1329 "unexpected missing ioctl for anon memory\n");
1330 return 1;
1331 }
1332
1333 if (area_dst_alias) {
1334 uffdio_register.range.start = (unsigned long)
1335 area_dst_alias;
1336 if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
1337 fprintf(stderr, "register failure alias\n");
1338 return 1;
1339 }
1340 }
1341
1342 /*
1343 * The madvise done previously isn't enough: some
1344 * uffd_thread could have read userfaults (one of
1345 * those already resolved by the background thread)
1346 * and it may be in the process of calling
1347 * UFFDIO_COPY. UFFDIO_COPY will read the zapped
1348 * area_src and it would map a zero page in it (of
1349 * course such a UFFDIO_COPY is perfectly safe as it'd
1350 * return -EEXIST). The problem comes at the next
1351 * bounce though: that racing UFFDIO_COPY would
1352 * generate zeropages in the area_src, so invalidating
1353 * the previous MADV_DONTNEED. Without this additional
1354 * MADV_DONTNEED those zeropages leftovers in the
1355 * area_src would lead to -EEXIST failure during the
1356 * next bounce, effectively leaving a zeropage in the
1357 * area_dst.
1358 *
1359 * Try to comment this out madvise to see the memory
1360 * corruption being caught pretty quick.
1361 *
1362 * khugepaged is also inhibited to collapse THP after
1363 * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
1364 * required to MADV_DONTNEED here.
1365 */
1366 if (uffd_test_ops->release_pages(area_dst))
1367 return 1;
1368
1369 uffd_stats_reset(uffd_stats, nr_cpus);
1370
1371 /* bounce pass */
1372 if (stress(uffd_stats))
1373 return 1;
1374
1375 /* Clear all the write protections if there is any */
1376 if (test_uffdio_wp)
1377 wp_range(uffd, (unsigned long)area_dst,
1378 nr_pages * page_size, false);
1379
1380 /* unregister */
1381 if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
1382 fprintf(stderr, "unregister failure\n");
1383 return 1;
1384 }
1385 if (area_dst_alias) {
1386 uffdio_register.range.start = (unsigned long) area_dst;
1387 if (ioctl(uffd, UFFDIO_UNREGISTER,
1388 &uffdio_register.range)) {
1389 fprintf(stderr, "unregister failure alias\n");
1390 return 1;
1391 }
1392 }
1393
1394 /* verification */
1395 if (bounces & BOUNCE_VERIFY) {
1396 for (nr = 0; nr < nr_pages; nr++) {
1397 if (*area_count(area_dst, nr) != count_verify[nr]) {
1398 fprintf(stderr,
1399 "error area_count %Lu %Lu %lu\n",
1400 *area_count(area_src, nr),
1401 count_verify[nr],
1402 nr);
1403 err = 1;
1404 bounces = 0;
1405 }
1406 }
1407 }
1408
1409 /* prepare next bounce */
1410 tmp_area = area_src;
1411 area_src = area_dst;
1412 area_dst = tmp_area;
1413
1414 tmp_area = area_src_alias;
1415 area_src_alias = area_dst_alias;
1416 area_dst_alias = tmp_area;
1417
1418 uffd_stats_report(uffd_stats, nr_cpus);
1419 }
1420
1421 if (err)
1422 return err;
1423
1424 close(uffd);
1425 return userfaultfd_zeropage_test() || userfaultfd_sig_test()
1426 || userfaultfd_events_test();
1427 }
1428
1429 /*
1430 * Copied from mlock2-tests.c
1431 */
default_huge_page_size(void)1432 unsigned long default_huge_page_size(void)
1433 {
1434 unsigned long hps = 0;
1435 char *line = NULL;
1436 size_t linelen = 0;
1437 FILE *f = fopen("/proc/meminfo", "r");
1438
1439 if (!f)
1440 return 0;
1441 while (getline(&line, &linelen, f) > 0) {
1442 if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
1443 hps <<= 10;
1444 break;
1445 }
1446 }
1447
1448 free(line);
1449 fclose(f);
1450 return hps;
1451 }
1452
set_test_type(const char * type)1453 static void set_test_type(const char *type)
1454 {
1455 if (!strcmp(type, "anon")) {
1456 test_type = TEST_ANON;
1457 uffd_test_ops = &anon_uffd_test_ops;
1458 /* Only enable write-protect test for anonymous test */
1459 test_uffdio_wp = true;
1460 } else if (!strcmp(type, "hugetlb")) {
1461 test_type = TEST_HUGETLB;
1462 uffd_test_ops = &hugetlb_uffd_test_ops;
1463 } else if (!strcmp(type, "hugetlb_shared")) {
1464 map_shared = true;
1465 test_type = TEST_HUGETLB;
1466 uffd_test_ops = &hugetlb_uffd_test_ops;
1467 } else if (!strcmp(type, "shmem")) {
1468 map_shared = true;
1469 test_type = TEST_SHMEM;
1470 uffd_test_ops = &shmem_uffd_test_ops;
1471 } else {
1472 fprintf(stderr, "Unknown test type: %s\n", type); exit(1);
1473 }
1474
1475 if (test_type == TEST_HUGETLB)
1476 page_size = default_huge_page_size();
1477 else
1478 page_size = sysconf(_SC_PAGE_SIZE);
1479
1480 if (!page_size) {
1481 fprintf(stderr, "Unable to determine page size\n");
1482 exit(2);
1483 }
1484 if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
1485 > page_size) {
1486 fprintf(stderr, "Impossible to run this test\n");
1487 exit(2);
1488 }
1489 }
1490
sigalrm(int sig)1491 static void sigalrm(int sig)
1492 {
1493 if (sig != SIGALRM)
1494 abort();
1495 test_uffdio_copy_eexist = true;
1496 test_uffdio_zeropage_eexist = true;
1497 alarm(ALARM_INTERVAL_SECS);
1498 }
1499
main(int argc,char ** argv)1500 int main(int argc, char **argv)
1501 {
1502 if (argc < 4)
1503 usage();
1504
1505 if (signal(SIGALRM, sigalrm) == SIG_ERR) {
1506 fprintf(stderr, "failed to arm SIGALRM");
1507 exit(1);
1508 }
1509 alarm(ALARM_INTERVAL_SECS);
1510
1511 set_test_type(argv[1]);
1512
1513 nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
1514 nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size /
1515 nr_cpus;
1516 if (!nr_pages_per_cpu) {
1517 fprintf(stderr, "invalid MiB\n");
1518 usage();
1519 }
1520
1521 bounces = atoi(argv[3]);
1522 if (bounces <= 0) {
1523 fprintf(stderr, "invalid bounces\n");
1524 usage();
1525 }
1526 nr_pages = nr_pages_per_cpu * nr_cpus;
1527
1528 if (test_type == TEST_HUGETLB) {
1529 if (argc < 5)
1530 usage();
1531 huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
1532 if (huge_fd < 0) {
1533 fprintf(stderr, "Open of %s failed", argv[3]);
1534 perror("open");
1535 exit(1);
1536 }
1537 if (ftruncate(huge_fd, 0)) {
1538 fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]);
1539 perror("ftruncate");
1540 exit(1);
1541 }
1542 }
1543 printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
1544 nr_pages, nr_pages_per_cpu);
1545 return userfaultfd_stress();
1546 }
1547
1548 #else /* __NR_userfaultfd */
1549
1550 #warning "missing __NR_userfaultfd definition"
1551
main(void)1552 int main(void)
1553 {
1554 printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
1555 return KSFT_SKIP;
1556 }
1557
1558 #endif /* __NR_userfaultfd */
1559