• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Stress userfaultfd syscall.
4  *
5  *  Copyright (C) 2015  Red Hat, Inc.
6  *
7  * This test allocates two virtual areas and bounces the physical
8  * memory across the two virtual areas (from area_src to area_dst)
9  * using userfaultfd.
10  *
11  * There are three threads running per CPU:
12  *
13  * 1) one per-CPU thread takes a per-page pthread_mutex in a random
14  *    page of the area_dst (while the physical page may still be in
15  *    area_src), and increments a per-page counter in the same page,
16  *    and checks its value against a verification region.
17  *
18  * 2) another per-CPU thread handles the userfaults generated by
19  *    thread 1 above. userfaultfd blocking reads or poll() modes are
20  *    exercised interleaved.
21  *
22  * 3) one last per-CPU thread transfers the memory in the background
23  *    at maximum bandwidth (if not already transferred by thread
24  *    2). Each cpu thread takes cares of transferring a portion of the
25  *    area.
26  *
27  * When all threads of type 3 completed the transfer, one bounce is
28  * complete. area_src and area_dst are then swapped. All threads are
29  * respawned and so the bounce is immediately restarted in the
30  * opposite direction.
31  *
32  * per-CPU threads 1 by triggering userfaults inside
33  * pthread_mutex_lock will also verify the atomicity of the memory
34  * transfer (UFFDIO_COPY).
35  */
36 
37 #define _GNU_SOURCE
38 #include <stdio.h>
39 #include <errno.h>
40 #include <unistd.h>
41 #include <stdlib.h>
42 #include <sys/types.h>
43 #include <sys/stat.h>
44 #include <fcntl.h>
45 #include <time.h>
46 #include <signal.h>
47 #include <poll.h>
48 #include <string.h>
49 #include <sys/mman.h>
50 #include <sys/syscall.h>
51 #include <sys/ioctl.h>
52 #include <sys/wait.h>
53 #include <pthread.h>
54 #include <linux/userfaultfd.h>
55 #include <setjmp.h>
56 #include <stdbool.h>
57 
58 #include "../kselftest.h"
59 
60 #ifdef __NR_userfaultfd
61 
62 static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
63 
64 #define BOUNCE_RANDOM		(1<<0)
65 #define BOUNCE_RACINGFAULTS	(1<<1)
66 #define BOUNCE_VERIFY		(1<<2)
67 #define BOUNCE_POLL		(1<<3)
68 static int bounces;
69 
70 #define TEST_ANON	1
71 #define TEST_HUGETLB	2
72 #define TEST_SHMEM	3
73 static int test_type;
74 
75 /* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
76 #define ALARM_INTERVAL_SECS 10
77 static volatile bool test_uffdio_copy_eexist = true;
78 static volatile bool test_uffdio_zeropage_eexist = true;
79 
80 static bool map_shared;
81 static int huge_fd;
82 static char *huge_fd_off0;
83 static unsigned long long *count_verify;
84 static int uffd, uffd_flags, finished, *pipefd;
85 static volatile bool ready_for_fork;
86 static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
87 static char *zeropage;
88 pthread_attr_t attr;
89 pthread_key_t long_jmp_key;
90 
91 /* pthread_mutex_t starts at page offset 0 */
92 #define area_mutex(___area, ___nr)					\
93 	((pthread_mutex_t *) ((___area) + (___nr)*page_size))
94 /*
95  * count is placed in the page after pthread_mutex_t naturally aligned
96  * to avoid non alignment faults on non-x86 archs.
97  */
98 #define area_count(___area, ___nr)					\
99 	((volatile unsigned long long *) ((unsigned long)		\
100 				 ((___area) + (___nr)*page_size +	\
101 				  sizeof(pthread_mutex_t) +		\
102 				  sizeof(unsigned long long) - 1) &	\
103 				 ~(unsigned long)(sizeof(unsigned long long) \
104 						  -  1)))
105 
106 const char *examples =
107     "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
108     "./userfaultfd anon 100 99999\n\n"
109     "# Run share memory test on 1GiB region with 99 bounces:\n"
110     "./userfaultfd shmem 1000 99\n\n"
111     "# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n"
112     "./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n"
113     "# Run the same hugetlb test but using shmem:\n"
114     "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n"
115     "# 10MiB-~6GiB 999 bounces anonymous test, "
116     "continue forever unless an error triggers\n"
117     "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
118 
usage(void)119 static void usage(void)
120 {
121 	fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces> "
122 		"[hugetlbfs_file]\n\n");
123 	fprintf(stderr, "Supported <test type>: anon, hugetlb, "
124 		"hugetlb_shared, shmem\n\n");
125 	fprintf(stderr, "Examples:\n\n");
126 	fprintf(stderr, "%s", examples);
127 	exit(1);
128 }
129 
anon_release_pages(char * rel_area)130 static int anon_release_pages(char *rel_area)
131 {
132 	int ret = 0;
133 
134 	if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) {
135 		perror("madvise");
136 		ret = 1;
137 	}
138 
139 	return ret;
140 }
141 
anon_allocate_area(void ** alloc_area)142 static void anon_allocate_area(void **alloc_area)
143 {
144 	// We can't use posix_memalign due to pointer-tagging used in bionic.
145 	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
146 			   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
147 	if (*alloc_area == MAP_FAILED) {
148 		fprintf(stderr, "anon memory mmap failed\n");
149 		*alloc_area = NULL;
150 	}
151 }
152 
noop_alias_mapping(__u64 * start,size_t len,unsigned long offset)153 static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
154 {
155 }
156 
157 /* HugeTLB memory */
hugetlb_release_pages(char * rel_area)158 static int hugetlb_release_pages(char *rel_area)
159 {
160 	int ret = 0;
161 
162 	if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
163 				rel_area == huge_fd_off0 ? 0 :
164 				nr_pages * page_size,
165 				nr_pages * page_size)) {
166 		perror("fallocate");
167 		ret = 1;
168 	}
169 
170 	return ret;
171 }
172 
173 
hugetlb_allocate_area(void ** alloc_area)174 static void hugetlb_allocate_area(void **alloc_area)
175 {
176 	void *area_alias = NULL;
177 	char **alloc_area_alias;
178 	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
179 			   (map_shared ? MAP_SHARED : MAP_PRIVATE) |
180 			   MAP_HUGETLB,
181 			   huge_fd, *alloc_area == area_src ? 0 :
182 			   nr_pages * page_size);
183 	if (*alloc_area == MAP_FAILED) {
184 		fprintf(stderr, "mmap of hugetlbfs file failed\n");
185 		*alloc_area = NULL;
186 	}
187 
188 	if (map_shared) {
189 		area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
190 				  MAP_SHARED | MAP_HUGETLB,
191 				  huge_fd, *alloc_area == area_src ? 0 :
192 				  nr_pages * page_size);
193 		if (area_alias == MAP_FAILED) {
194 			if (munmap(*alloc_area, nr_pages * page_size) < 0)
195 				perror("hugetlb munmap"), exit(1);
196 			*alloc_area = NULL;
197 			return;
198 		}
199 	}
200 	if (*alloc_area == area_src) {
201 		huge_fd_off0 = *alloc_area;
202 		alloc_area_alias = &area_src_alias;
203 	} else {
204 		alloc_area_alias = &area_dst_alias;
205 	}
206 	if (area_alias)
207 		*alloc_area_alias = area_alias;
208 }
209 
hugetlb_alias_mapping(__u64 * start,size_t len,unsigned long offset)210 static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
211 {
212 	if (!map_shared)
213 		return;
214 	/*
215 	 * We can't zap just the pagetable with hugetlbfs because
216 	 * MADV_DONTEED won't work. So exercise -EEXIST on a alias
217 	 * mapping where the pagetables are not established initially,
218 	 * this way we'll exercise the -EEXEC at the fs level.
219 	 */
220 	*start = (unsigned long) area_dst_alias + offset;
221 }
222 
223 /* Shared memory */
shmem_release_pages(char * rel_area)224 static int shmem_release_pages(char *rel_area)
225 {
226 	int ret = 0;
227 
228 	if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) {
229 		perror("madvise");
230 		ret = 1;
231 	}
232 
233 	return ret;
234 }
235 
shmem_allocate_area(void ** alloc_area)236 static void shmem_allocate_area(void **alloc_area)
237 {
238 	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
239 			   MAP_ANONYMOUS | MAP_SHARED, -1, 0);
240 	if (*alloc_area == MAP_FAILED) {
241 		fprintf(stderr, "shared memory mmap failed\n");
242 		*alloc_area = NULL;
243 	}
244 }
245 
246 struct uffd_test_ops {
247 	unsigned long expected_ioctls;
248 	void (*allocate_area)(void **alloc_area);
249 	int (*release_pages)(char *rel_area);
250 	void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
251 };
252 
253 #define ANON_EXPECTED_IOCTLS		((1 << _UFFDIO_WAKE) | \
254 					 (1 << _UFFDIO_COPY) | \
255 					 (1 << _UFFDIO_ZEROPAGE))
256 
257 static struct uffd_test_ops anon_uffd_test_ops = {
258 	.expected_ioctls = ANON_EXPECTED_IOCTLS,
259 	.allocate_area	= anon_allocate_area,
260 	.release_pages	= anon_release_pages,
261 	.alias_mapping = noop_alias_mapping,
262 };
263 
264 static struct uffd_test_ops shmem_uffd_test_ops = {
265 	.expected_ioctls = ANON_EXPECTED_IOCTLS,
266 	.allocate_area	= shmem_allocate_area,
267 	.release_pages	= shmem_release_pages,
268 	.alias_mapping = noop_alias_mapping,
269 };
270 
271 static struct uffd_test_ops hugetlb_uffd_test_ops = {
272 	.expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC,
273 	.allocate_area	= hugetlb_allocate_area,
274 	.release_pages	= hugetlb_release_pages,
275 	.alias_mapping = hugetlb_alias_mapping,
276 };
277 
278 static struct uffd_test_ops *uffd_test_ops;
279 
my_bcmp(char * str1,char * str2,size_t n)280 static int my_bcmp(char *str1, char *str2, size_t n)
281 {
282 	unsigned long i;
283 	for (i = 0; i < n; i++)
284 		if (str1[i] != str2[i])
285 			return 1;
286 	return 0;
287 }
288 
locking_thread(void * arg)289 static void *locking_thread(void *arg)
290 {
291 	unsigned long cpu = (unsigned long) arg;
292 	unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */
293 	unsigned long long count;
294 	time_t start;
295 
296 	if (!(bounces & BOUNCE_RANDOM)) {
297 		page_nr = -bounces;
298 		if (!(bounces & BOUNCE_RACINGFAULTS))
299 			page_nr += cpu * nr_pages_per_cpu;
300 	}
301 
302 	while (!finished) {
303 		if (bounces & BOUNCE_RANDOM) {
304 			page_nr = random();
305 			if (sizeof(page_nr) > sizeof(uint32_t)) {
306 				page_nr |= (((unsigned long) random()) << 16) <<
307 					   16;
308 			}
309 		} else
310 			page_nr += 1;
311 		page_nr %= nr_pages;
312 
313 		start = time(NULL);
314 		if (bounces & BOUNCE_VERIFY) {
315 			count = *area_count(area_dst, page_nr);
316 			if (!count)
317 				fprintf(stderr,
318 					"page_nr %lu wrong count %Lu %Lu\n",
319 					page_nr, count,
320 					count_verify[page_nr]), exit(1);
321 
322 
323 			/*
324 			 * We can't use bcmp (or memcmp) because that
325 			 * returns 0 erroneously if the memory is
326 			 * changing under it (even if the end of the
327 			 * page is never changing and always
328 			 * different).
329 			 */
330 #if 1
331 			if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
332 				     page_size))
333 				fprintf(stderr,
334 					"my_bcmp page_nr %lu wrong count %Lu %Lu\n",
335 					page_nr, count,
336 					count_verify[page_nr]), exit(1);
337 #else
338 			unsigned long loops;
339 
340 			loops = 0;
341 			/* uncomment the below line to test with mutex */
342 			/* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
343 			while (!bcmp(area_dst + page_nr * page_size, zeropage,
344 				     page_size)) {
345 				loops += 1;
346 				if (loops > 10)
347 					break;
348 			}
349 			/* uncomment below line to test with mutex */
350 			/* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
351 			if (loops) {
352 				fprintf(stderr,
353 					"page_nr %lu all zero thread %lu %p %lu\n",
354 					page_nr, cpu, area_dst + page_nr * page_size,
355 					loops);
356 				if (loops > 10)
357 					exit(1);
358 			}
359 #endif
360 		}
361 
362 		pthread_mutex_lock(area_mutex(area_dst, page_nr));
363 		count = *area_count(area_dst, page_nr);
364 		if (count != count_verify[page_nr]) {
365 			fprintf(stderr,
366 				"page_nr %lu memory corruption %Lu %Lu\n",
367 				page_nr, count,
368 				count_verify[page_nr]), exit(1);
369 		}
370 		count++;
371 		*area_count(area_dst, page_nr) = count_verify[page_nr] = count;
372 		pthread_mutex_unlock(area_mutex(area_dst, page_nr));
373 
374 		if (time(NULL) - start > 1)
375 			fprintf(stderr,
376 				"userfault too slow %ld "
377 				"possible false positive with overcommit\n",
378 				time(NULL) - start);
379 	}
380 
381 	return NULL;
382 }
383 
retry_copy_page(int ufd,struct uffdio_copy * uffdio_copy,unsigned long offset)384 static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
385 			    unsigned long offset)
386 {
387 	uffd_test_ops->alias_mapping(&uffdio_copy->dst,
388 				     uffdio_copy->len,
389 				     offset);
390 	if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
391 		/* real retval in ufdio_copy.copy */
392 		if (uffdio_copy->copy != -EEXIST)
393 			fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
394 				uffdio_copy->copy), exit(1);
395 	} else {
396 		fprintf(stderr,	"UFFDIO_COPY retry unexpected %Ld\n",
397 			uffdio_copy->copy), exit(1);
398 	}
399 }
400 
__copy_page(int ufd,unsigned long offset,bool retry)401 static int __copy_page(int ufd, unsigned long offset, bool retry)
402 {
403 	struct uffdio_copy uffdio_copy;
404 
405 	if (offset >= nr_pages * page_size)
406 		fprintf(stderr, "unexpected offset %lu\n",
407 			offset), exit(1);
408 	uffdio_copy.dst = (unsigned long) area_dst + offset;
409 	uffdio_copy.src = (unsigned long) area_src + offset;
410 	uffdio_copy.len = page_size;
411 	uffdio_copy.mode = 0;
412 	uffdio_copy.copy = 0;
413 	if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
414 		/* real retval in ufdio_copy.copy */
415 		if (uffdio_copy.copy != -EEXIST)
416 			fprintf(stderr, "UFFDIO_COPY error %Ld\n",
417 				uffdio_copy.copy), exit(1);
418 	} else if (uffdio_copy.copy != page_size) {
419 		fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
420 			uffdio_copy.copy), exit(1);
421 	} else {
422 		if (test_uffdio_copy_eexist && retry) {
423 			test_uffdio_copy_eexist = false;
424 			retry_copy_page(ufd, &uffdio_copy, offset);
425 		}
426 		return 1;
427 	}
428 	return 0;
429 }
430 
copy_page_retry(int ufd,unsigned long offset)431 static int copy_page_retry(int ufd, unsigned long offset)
432 {
433 	return __copy_page(ufd, offset, true);
434 }
435 
copy_page(int ufd,unsigned long offset)436 static int copy_page(int ufd, unsigned long offset)
437 {
438 	return __copy_page(ufd, offset, false);
439 }
440 
uffd_read_msg(int ufd,struct uffd_msg * msg)441 static int uffd_read_msg(int ufd, struct uffd_msg *msg)
442 {
443 	int ret = read(uffd, msg, sizeof(*msg));
444 
445 	if (ret != sizeof(*msg)) {
446 		if (ret < 0) {
447 			if (errno == EAGAIN)
448 				return 1;
449 			else
450 				perror("blocking read error"), exit(1);
451 		} else {
452 			fprintf(stderr, "short read\n"), exit(1);
453 		}
454 	}
455 
456 	return 0;
457 }
458 
459 /* Return 1 if page fault handled by us; otherwise 0 */
uffd_handle_page_fault(struct uffd_msg * msg)460 static int uffd_handle_page_fault(struct uffd_msg *msg)
461 {
462 	unsigned long offset;
463 
464 	if (msg->event != UFFD_EVENT_PAGEFAULT)
465 		fprintf(stderr, "unexpected msg event %u\n",
466 			msg->event), exit(1);
467 
468 	if (bounces & BOUNCE_VERIFY &&
469 	    msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
470 		fprintf(stderr, "unexpected write fault\n"), exit(1);
471 
472 	offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
473 	offset &= ~(page_size-1);
474 
475 	return copy_page(uffd, offset);
476 }
477 
uffd_poll_thread(void * arg)478 static void *uffd_poll_thread(void *arg)
479 {
480 	unsigned long cpu = (unsigned long) arg;
481 	struct pollfd pollfd[2];
482 	struct uffd_msg msg;
483 	struct uffdio_register uffd_reg;
484 	int ret;
485 	char tmp_chr;
486 	unsigned long userfaults = 0;
487 
488 	pollfd[0].fd = uffd;
489 	pollfd[0].events = POLLIN;
490 	pollfd[1].fd = pipefd[cpu*2];
491 	pollfd[1].events = POLLIN;
492 
493 	// Notify the main thread that it can now fork.
494 	ready_for_fork = true;
495 
496 	for (;;) {
497 		ret = poll(pollfd, 2, -1);
498 		if (!ret)
499 			fprintf(stderr, "poll error %d\n", ret), exit(1);
500 		if (ret < 0)
501 			perror("poll"), exit(1);
502 		if (pollfd[1].revents & POLLIN) {
503 			if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
504 				fprintf(stderr, "read pipefd error\n"),
505 					exit(1);
506 			break;
507 		}
508 		if (!(pollfd[0].revents & POLLIN))
509 			fprintf(stderr, "pollfd[0].revents %d\n",
510 				pollfd[0].revents), exit(1);
511 		if (uffd_read_msg(uffd, &msg))
512 			continue;
513 		switch (msg.event) {
514 		default:
515 			fprintf(stderr, "unexpected msg event %u\n",
516 				msg.event), exit(1);
517 			break;
518 		case UFFD_EVENT_PAGEFAULT:
519 			userfaults += uffd_handle_page_fault(&msg);
520 			break;
521 		case UFFD_EVENT_FORK:
522 			close(uffd);
523 			uffd = msg.arg.fork.ufd;
524 			pollfd[0].fd = uffd;
525 			break;
526 		case UFFD_EVENT_REMOVE:
527 			uffd_reg.range.start = msg.arg.remove.start;
528 			uffd_reg.range.len = msg.arg.remove.end -
529 				msg.arg.remove.start;
530 			if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
531 				fprintf(stderr, "remove failure\n"), exit(1);
532 			break;
533 		case UFFD_EVENT_REMAP:
534 			area_dst = (char *)(unsigned long)msg.arg.remap.to;
535 			break;
536 		}
537 	}
538 	return (void *)userfaults;
539 }
540 
541 pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
542 
sigusr1_handler(int signum,siginfo_t * siginfo,void * ptr)543 static void sigusr1_handler(int signum, siginfo_t *siginfo, void *ptr)
544 {
545 	jmp_buf *env;
546 	env = pthread_getspecific(long_jmp_key);
547 	longjmp(*env, 1);
548 }
549 
uffd_read_thread(void * arg)550 static void *uffd_read_thread(void *arg)
551 {
552 	unsigned long *this_cpu_userfaults;
553 	struct uffd_msg msg;
554 	jmp_buf env;
555 	int setjmp_ret;
556 
557 	pthread_setspecific(long_jmp_key, &env);
558 
559 	this_cpu_userfaults = (unsigned long *) arg;
560 	*this_cpu_userfaults = 0;
561 
562 	pthread_mutex_unlock(&uffd_read_mutex);
563 
564 	// One first return setjmp return 0. On second (fake) return from
565 	// longjmp() it returns the provided value, which will be 1 in our case.
566 	setjmp_ret = setjmp(env);
567 	while (!setjmp_ret) {
568 		if (uffd_read_msg(uffd, &msg))
569 			continue;
570 		(*this_cpu_userfaults) += uffd_handle_page_fault(&msg);
571 	}
572 	return (void *)NULL;
573 }
574 
background_thread(void * arg)575 static void *background_thread(void *arg)
576 {
577 	unsigned long cpu = (unsigned long) arg;
578 	unsigned long page_nr;
579 
580 	for (page_nr = cpu * nr_pages_per_cpu;
581 	     page_nr < (cpu+1) * nr_pages_per_cpu;
582 	     page_nr++)
583 		copy_page_retry(uffd, page_nr * page_size);
584 
585 	return NULL;
586 }
587 
stress(unsigned long * userfaults)588 static int stress(unsigned long *userfaults)
589 {
590 	unsigned long cpu;
591 	pthread_t locking_threads[nr_cpus];
592 	pthread_t uffd_threads[nr_cpus];
593 	pthread_t background_threads[nr_cpus];
594 	void **_userfaults = (void **) userfaults;
595 
596 	finished = 0;
597 	for (cpu = 0; cpu < nr_cpus; cpu++) {
598 		if (pthread_create(&locking_threads[cpu], &attr,
599 				   locking_thread, (void *)cpu))
600 			return 1;
601 		if (bounces & BOUNCE_POLL) {
602 			if (pthread_create(&uffd_threads[cpu], &attr,
603 					   uffd_poll_thread, (void *)cpu))
604 				return 1;
605 		} else {
606 			if (pthread_create(&uffd_threads[cpu], &attr,
607 					   uffd_read_thread,
608 					   &_userfaults[cpu]))
609 				return 1;
610 			pthread_mutex_lock(&uffd_read_mutex);
611 		}
612 		if (pthread_create(&background_threads[cpu], &attr,
613 				   background_thread, (void *)cpu))
614 			return 1;
615 	}
616 
617 	for (cpu = 0; cpu < nr_cpus; cpu++)
618 		if (pthread_join(background_threads[cpu], NULL))
619 			return 1;
620 
621 	/*
622 	 * Be strict and immediately zap area_src, the whole area has
623 	 * been transferred already by the background treads. The
624 	 * area_src could then be faulted in in a racy way by still
625 	 * running uffdio_threads reading zeropages after we zapped
626 	 * area_src (but they're guaranteed to get -EEXIST from
627 	 * UFFDIO_COPY without writing zero pages into area_dst
628 	 * because the background threads already completed).
629 	 */
630 	if (uffd_test_ops->release_pages(area_src))
631 		return 1;
632 
633 
634 	finished = 1;
635 	for (cpu = 0; cpu < nr_cpus; cpu++)
636 		if (pthread_join(locking_threads[cpu], NULL))
637 			return 1;
638 
639 	for (cpu = 0; cpu < nr_cpus; cpu++) {
640 		char c;
641 		if (bounces & BOUNCE_POLL) {
642 			if (write(pipefd[cpu*2+1], &c, 1) != 1) {
643 				fprintf(stderr, "pipefd write error\n");
644 				return 1;
645 			}
646 			if (pthread_join(uffd_threads[cpu], &_userfaults[cpu]))
647 				return 1;
648 		} else {
649 			if (pthread_kill(uffd_threads[cpu], SIGUSR1))
650 				return 1;
651 			if (pthread_join(uffd_threads[cpu], NULL))
652 				return 1;
653 		}
654 	}
655 
656 	return 0;
657 }
658 
userfaultfd_open(int features)659 static int userfaultfd_open(int features)
660 {
661 	struct uffdio_api uffdio_api;
662 
663 	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
664 	if (uffd < 0) {
665 		if (errno == ENOSYS) {
666 			fprintf(stderr,
667 				"userfaultfd syscall not available in this kernel\n");
668 			exit(KSFT_SKIP);
669 		}
670 		fprintf(stderr,
671 			"userfaultfd syscall failed with errno: %d\n", errno);
672 		return 1;
673 	}
674 	uffd_flags = fcntl(uffd, F_GETFD, NULL);
675 
676 	uffdio_api.api = UFFD_API;
677 	uffdio_api.features = features;
678 	if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
679 		fprintf(stderr, "UFFDIO_API\n");
680 		return 1;
681 	}
682 	if (uffdio_api.api != UFFD_API) {
683 		fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api);
684 		return 1;
685 	}
686 
687 	return 0;
688 }
689 
690 sigjmp_buf jbuf, *sigbuf;
691 
sighndl(int sig,siginfo_t * siginfo,void * ptr)692 static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
693 {
694 	if (sig == SIGBUS) {
695 		if (sigbuf)
696 			siglongjmp(*sigbuf, 1);
697 		abort();
698 	}
699 }
700 
701 /*
702  * For non-cooperative userfaultfd test we fork() a process that will
703  * generate pagefaults, will mremap the area monitored by the
704  * userfaultfd and at last this process will release the monitored
705  * area.
706  * For the anonymous and shared memory the area is divided into two
707  * parts, the first part is accessed before mremap, and the second
708  * part is accessed after mremap. Since hugetlbfs does not support
709  * mremap, the entire monitored area is accessed in a single pass for
710  * HUGETLB_TEST.
711  * The release of the pages currently generates event for shmem and
712  * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
713  * for hugetlb.
714  * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
715  * monitored area, generate pagefaults and test that signal is delivered.
716  * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
717  * test robustness use case - we release monitored area, fork a process
718  * that will generate pagefaults and verify signal is generated.
719  * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
720  * feature. Using monitor thread, verify no userfault events are generated.
721  */
faulting_process(int signal_test)722 static int faulting_process(int signal_test)
723 {
724 	unsigned long nr;
725 	unsigned long long count;
726 	unsigned long split_nr_pages;
727 	unsigned long lastnr;
728 	struct sigaction act;
729 	unsigned long signalled = 0;
730 
731 	if (test_type != TEST_HUGETLB)
732 		split_nr_pages = (nr_pages + 1) / 2;
733 	else
734 		split_nr_pages = nr_pages;
735 
736 	if (signal_test) {
737 		sigbuf = &jbuf;
738 		memset(&act, 0, sizeof(act));
739 		act.sa_sigaction = sighndl;
740 		act.sa_flags = SA_SIGINFO;
741 		if (sigaction(SIGBUS, &act, 0)) {
742 			perror("sigaction");
743 			return 1;
744 		}
745 		lastnr = (unsigned long)-1;
746 	}
747 
748 	for (nr = 0; nr < split_nr_pages; nr++) {
749 		if (signal_test) {
750 			if (sigsetjmp(*sigbuf, 1) != 0) {
751 				if (nr == lastnr) {
752 					fprintf(stderr, "Signal repeated\n");
753 					return 1;
754 				}
755 
756 				lastnr = nr;
757 				if (signal_test == 1) {
758 					if (copy_page(uffd, nr * page_size))
759 						signalled++;
760 				} else {
761 					signalled++;
762 					continue;
763 				}
764 			}
765 		}
766 
767 		count = *area_count(area_dst, nr);
768 		if (count != count_verify[nr]) {
769 			fprintf(stderr,
770 				"nr %lu memory corruption %Lu %Lu\n",
771 				nr, count,
772 				count_verify[nr]), exit(1);
773 		}
774 	}
775 
776 	if (signal_test)
777 		return signalled != split_nr_pages;
778 
779 	if (test_type == TEST_HUGETLB)
780 		return 0;
781 
782 	area_dst = mremap(area_dst, nr_pages * page_size,  nr_pages * page_size,
783 			  MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
784 	if (area_dst == MAP_FAILED)
785 		perror("mremap"), exit(1);
786 
787 	for (; nr < nr_pages; nr++) {
788 		count = *area_count(area_dst, nr);
789 		if (count != count_verify[nr]) {
790 			fprintf(stderr,
791 				"nr %lu memory corruption %Lu %Lu\n",
792 				nr, count,
793 				count_verify[nr]), exit(1);
794 		}
795 	}
796 
797 	if (uffd_test_ops->release_pages(area_dst))
798 		return 1;
799 
800 	for (nr = 0; nr < nr_pages; nr++) {
801 		if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
802 			fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
803 	}
804 
805 	return 0;
806 }
807 
retry_uffdio_zeropage(int ufd,struct uffdio_zeropage * uffdio_zeropage,unsigned long offset)808 static void retry_uffdio_zeropage(int ufd,
809 				  struct uffdio_zeropage *uffdio_zeropage,
810 				  unsigned long offset)
811 {
812 	uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start,
813 				     uffdio_zeropage->range.len,
814 				     offset);
815 	if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
816 		if (uffdio_zeropage->zeropage != -EEXIST)
817 			fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
818 				uffdio_zeropage->zeropage), exit(1);
819 	} else {
820 		fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
821 			uffdio_zeropage->zeropage), exit(1);
822 	}
823 }
824 
__uffdio_zeropage(int ufd,unsigned long offset,bool retry)825 static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
826 {
827 	struct uffdio_zeropage uffdio_zeropage;
828 	int ret;
829 	unsigned long has_zeropage;
830 
831 	has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
832 
833 	if (offset >= nr_pages * page_size)
834 		fprintf(stderr, "unexpected offset %lu\n",
835 			offset), exit(1);
836 	uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
837 	uffdio_zeropage.range.len = page_size;
838 	uffdio_zeropage.mode = 0;
839 	ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
840 	if (ret) {
841 		/* real retval in ufdio_zeropage.zeropage */
842 		if (has_zeropage) {
843 			if (uffdio_zeropage.zeropage == -EEXIST)
844 				fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"),
845 					exit(1);
846 			else
847 				fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
848 					uffdio_zeropage.zeropage), exit(1);
849 		} else {
850 			if (uffdio_zeropage.zeropage != -EINVAL)
851 				fprintf(stderr,
852 					"UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
853 					uffdio_zeropage.zeropage), exit(1);
854 		}
855 	} else if (has_zeropage) {
856 		if (uffdio_zeropage.zeropage != page_size) {
857 			fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
858 				uffdio_zeropage.zeropage), exit(1);
859 		} else {
860 			if (test_uffdio_zeropage_eexist && retry) {
861 				test_uffdio_zeropage_eexist = false;
862 				retry_uffdio_zeropage(ufd, &uffdio_zeropage,
863 						      offset);
864 			}
865 			return 1;
866 		}
867 	} else {
868 		fprintf(stderr,
869 			"UFFDIO_ZEROPAGE succeeded %Ld\n",
870 			uffdio_zeropage.zeropage), exit(1);
871 	}
872 
873 	return 0;
874 }
875 
uffdio_zeropage(int ufd,unsigned long offset)876 static int uffdio_zeropage(int ufd, unsigned long offset)
877 {
878 	return __uffdio_zeropage(ufd, offset, false);
879 }
880 
881 /* exercise UFFDIO_ZEROPAGE */
userfaultfd_zeropage_test(void)882 static int userfaultfd_zeropage_test(void)
883 {
884 	struct uffdio_register uffdio_register;
885 	unsigned long expected_ioctls;
886 
887 	printf("testing UFFDIO_ZEROPAGE: ");
888 	fflush(stdout);
889 
890 	if (uffd_test_ops->release_pages(area_dst))
891 		return 1;
892 
893 	if (userfaultfd_open(0) < 0)
894 		return 1;
895 	uffdio_register.range.start = (unsigned long) area_dst;
896 	uffdio_register.range.len = nr_pages * page_size;
897 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
898 	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
899 		fprintf(stderr, "register failure\n"), exit(1);
900 
901 	expected_ioctls = uffd_test_ops->expected_ioctls;
902 	if ((uffdio_register.ioctls & expected_ioctls) !=
903 	    expected_ioctls)
904 		fprintf(stderr,
905 			"unexpected missing ioctl for anon memory\n"),
906 			exit(1);
907 
908 	if (uffdio_zeropage(uffd, 0)) {
909 		if (my_bcmp(area_dst, zeropage, page_size))
910 			fprintf(stderr, "zeropage is not zero\n"), exit(1);
911 	}
912 
913 	close(uffd);
914 	printf("done.\n");
915 	return 0;
916 }
917 
userfaultfd_events_test(void)918 static int userfaultfd_events_test(void)
919 {
920 	struct uffdio_register uffdio_register;
921 	unsigned long expected_ioctls;
922 	unsigned long userfaults;
923 	pthread_t uffd_mon;
924 	int err, features;
925 	pid_t pid;
926 	char c;
927 
928 	// All the syscalls below up to pthread_create will ensure that this
929 	// write is completed before, the uffd_thread sets it to true.
930 	ready_for_fork = false;
931 
932 	printf("testing events (fork, remap, remove): ");
933 	fflush(stdout);
934 
935 	if (uffd_test_ops->release_pages(area_dst))
936 		return 1;
937 
938 	features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
939 		UFFD_FEATURE_EVENT_REMOVE;
940 	if (userfaultfd_open(features) < 0)
941 		return 1;
942 	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
943 
944 	uffdio_register.range.start = (unsigned long) area_dst;
945 	uffdio_register.range.len = nr_pages * page_size;
946 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
947 	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
948 		fprintf(stderr, "register failure\n"), exit(1);
949 
950 	expected_ioctls = uffd_test_ops->expected_ioctls;
951 	if ((uffdio_register.ioctls & expected_ioctls) !=
952 	    expected_ioctls)
953 		fprintf(stderr,
954 			"unexpected missing ioctl for anon memory\n"),
955 			exit(1);
956 
957 	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
958 		perror("uffd_poll_thread create"), exit(1);
959 
960 	// Wait for the poll_thread to start executing before forking. This is
961 	// required to avoid a deadlock, which can happen if poll_thread doesn't
962 	// start getting executed by the time fork is invoked.
963 	while (!ready_for_fork);
964 
965 	pid = fork();
966 	if (pid < 0)
967 		perror("fork"), exit(1);
968 
969 	if (!pid)
970 		return faulting_process(0);
971 
972 	waitpid(pid, &err, 0);
973 	if (err)
974 		fprintf(stderr, "faulting process failed\n"), exit(1);
975 
976 	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
977 		perror("pipe write"), exit(1);
978 	if (pthread_join(uffd_mon, (void **)&userfaults))
979 		return 1;
980 
981 	close(uffd);
982 	printf("userfaults: %ld\n", userfaults);
983 
984 	return userfaults != nr_pages;
985 }
986 
userfaultfd_sig_test(void)987 static int userfaultfd_sig_test(void)
988 {
989 	struct uffdio_register uffdio_register;
990 	unsigned long expected_ioctls;
991 	unsigned long userfaults;
992 	pthread_t uffd_mon;
993 	int err, features;
994 	pid_t pid;
995 	char c;
996 
997 	// All the syscalls below up to pthread_create will ensure that this
998 	// write is completed before, the uffd_thread sets it to true.
999 	ready_for_fork = false;
1000 
1001 	printf("testing signal delivery: ");
1002 	fflush(stdout);
1003 
1004 	if (uffd_test_ops->release_pages(area_dst))
1005 		return 1;
1006 
1007 	features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
1008 	if (userfaultfd_open(features) < 0)
1009 		return 1;
1010 	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
1011 
1012 	uffdio_register.range.start = (unsigned long) area_dst;
1013 	uffdio_register.range.len = nr_pages * page_size;
1014 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
1015 	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
1016 		fprintf(stderr, "register failure\n"), exit(1);
1017 
1018 	expected_ioctls = uffd_test_ops->expected_ioctls;
1019 	if ((uffdio_register.ioctls & expected_ioctls) !=
1020 	    expected_ioctls)
1021 		fprintf(stderr,
1022 			"unexpected missing ioctl for anon memory\n"),
1023 			exit(1);
1024 
1025 	if (faulting_process(1))
1026 		fprintf(stderr, "faulting process failed\n"), exit(1);
1027 
1028 	if (uffd_test_ops->release_pages(area_dst))
1029 		return 1;
1030 
1031 	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
1032 		perror("uffd_poll_thread create"), exit(1);
1033 
1034 	// Wait for the poll_thread to start executing before forking. This is
1035 	// required to avoid a deadlock, which can happen if poll_thread doesn't
1036 	// start getting executed by the time fork is invoked.
1037 	while (!ready_for_fork);
1038 
1039 	pid = fork();
1040 	if (pid < 0)
1041 		perror("fork"), exit(1);
1042 
1043 	if (!pid)
1044 		exit(faulting_process(2));
1045 
1046 	waitpid(pid, &err, 0);
1047 	if (err)
1048 		fprintf(stderr, "faulting process failed\n"), exit(1);
1049 
1050 	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
1051 		perror("pipe write"), exit(1);
1052 	if (pthread_join(uffd_mon, (void **)&userfaults))
1053 		return 1;
1054 
1055 	printf("done.\n");
1056 	if (userfaults)
1057 		fprintf(stderr, "Signal test failed, userfaults: %ld\n",
1058 			userfaults);
1059 	close(uffd);
1060 	return userfaults != 0;
1061 }
userfaultfd_stress(void)1062 static int userfaultfd_stress(void)
1063 {
1064 	void *area;
1065 	char *tmp_area;
1066 	unsigned long nr;
1067 	struct uffdio_register uffdio_register;
1068 	struct sigaction act;
1069 	unsigned long cpu;
1070 	int err;
1071 	unsigned long userfaults[nr_cpus];
1072 
1073 	uffd_test_ops->allocate_area((void **)&area_src);
1074 	if (!area_src)
1075 		return 1;
1076 	uffd_test_ops->allocate_area((void **)&area_dst);
1077 	if (!area_dst)
1078 		return 1;
1079 
1080 	if (userfaultfd_open(0) < 0)
1081 		return 1;
1082 
1083 	count_verify = malloc(nr_pages * sizeof(unsigned long long));
1084 	if (!count_verify) {
1085 		perror("count_verify");
1086 		return 1;
1087 	}
1088 
1089 	for (nr = 0; nr < nr_pages; nr++) {
1090 		*area_mutex(area_src, nr) = (pthread_mutex_t)
1091 			PTHREAD_MUTEX_INITIALIZER;
1092 		count_verify[nr] = *area_count(area_src, nr) = 1;
1093 		/*
1094 		 * In the transition between 255 to 256, powerpc will
1095 		 * read out of order in my_bcmp and see both bytes as
1096 		 * zero, so leave a placeholder below always non-zero
1097 		 * after the count, to avoid my_bcmp to trigger false
1098 		 * positives.
1099 		 */
1100 		*(area_count(area_src, nr) + 1) = 1;
1101 	}
1102 
1103 	pipefd = malloc(sizeof(int) * nr_cpus * 2);
1104 	if (!pipefd) {
1105 		perror("pipefd");
1106 		return 1;
1107 	}
1108 	for (cpu = 0; cpu < nr_cpus; cpu++) {
1109 		if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) {
1110 			perror("pipe");
1111 			return 1;
1112 		}
1113 	}
1114 
1115 	if (posix_memalign(&area, page_size, page_size)) {
1116 		fprintf(stderr, "out of memory\n");
1117 		return 1;
1118 	}
1119 	zeropage = area;
1120 	bzero(zeropage, page_size);
1121 
1122 	pthread_mutex_lock(&uffd_read_mutex);
1123 
1124 	pthread_attr_init(&attr);
1125 	pthread_attr_setstacksize(&attr, 16*1024*1024);
1126 
1127 	// For handling thread termination of read thread in the absense of
1128 	// pthread_cancel().
1129 	pthread_key_create(&long_jmp_key, NULL);
1130 	memset(&act, 0, sizeof(act));
1131 	act.sa_sigaction = sigusr1_handler;
1132 	act.sa_flags = SA_SIGINFO;
1133 	if (sigaction(SIGUSR1, &act, 0)) {
1134 		perror("sigaction");
1135 		return 1;
1136 	}
1137 
1138 	err = 0;
1139 	while (bounces--) {
1140 		unsigned long expected_ioctls;
1141 
1142 		printf("bounces: %d, mode:", bounces);
1143 		if (bounces & BOUNCE_RANDOM)
1144 			printf(" rnd");
1145 		if (bounces & BOUNCE_RACINGFAULTS)
1146 			printf(" racing");
1147 		if (bounces & BOUNCE_VERIFY)
1148 			printf(" ver");
1149 		if (bounces & BOUNCE_POLL)
1150 			printf(" poll");
1151 		printf(", ");
1152 		fflush(stdout);
1153 
1154 		if (bounces & BOUNCE_POLL)
1155 			fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
1156 		else
1157 			fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK);
1158 
1159 		/* register */
1160 		uffdio_register.range.start = (unsigned long) area_dst;
1161 		uffdio_register.range.len = nr_pages * page_size;
1162 		uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
1163 		if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
1164 			fprintf(stderr, "register failure\n");
1165 			return 1;
1166 		}
1167 		expected_ioctls = uffd_test_ops->expected_ioctls;
1168 		if ((uffdio_register.ioctls & expected_ioctls) !=
1169 		    expected_ioctls) {
1170 			fprintf(stderr,
1171 				"unexpected missing ioctl for anon memory\n");
1172 			return 1;
1173 		}
1174 
1175 		if (area_dst_alias) {
1176 			uffdio_register.range.start = (unsigned long)
1177 				area_dst_alias;
1178 			if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
1179 				fprintf(stderr, "register failure alias\n");
1180 				return 1;
1181 			}
1182 		}
1183 
1184 		/*
1185 		 * The madvise done previously isn't enough: some
1186 		 * uffd_thread could have read userfaults (one of
1187 		 * those already resolved by the background thread)
1188 		 * and it may be in the process of calling
1189 		 * UFFDIO_COPY. UFFDIO_COPY will read the zapped
1190 		 * area_src and it would map a zero page in it (of
1191 		 * course such a UFFDIO_COPY is perfectly safe as it'd
1192 		 * return -EEXIST). The problem comes at the next
1193 		 * bounce though: that racing UFFDIO_COPY would
1194 		 * generate zeropages in the area_src, so invalidating
1195 		 * the previous MADV_DONTNEED. Without this additional
1196 		 * MADV_DONTNEED those zeropages leftovers in the
1197 		 * area_src would lead to -EEXIST failure during the
1198 		 * next bounce, effectively leaving a zeropage in the
1199 		 * area_dst.
1200 		 *
1201 		 * Try to comment this out madvise to see the memory
1202 		 * corruption being caught pretty quick.
1203 		 *
1204 		 * khugepaged is also inhibited to collapse THP after
1205 		 * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
1206 		 * required to MADV_DONTNEED here.
1207 		 */
1208 		if (uffd_test_ops->release_pages(area_dst))
1209 			return 1;
1210 
1211 		/* bounce pass */
1212 		if (stress(userfaults))
1213 			return 1;
1214 
1215 		/* unregister */
1216 		if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
1217 			fprintf(stderr, "unregister failure\n");
1218 			return 1;
1219 		}
1220 		if (area_dst_alias) {
1221 			uffdio_register.range.start = (unsigned long) area_dst;
1222 			if (ioctl(uffd, UFFDIO_UNREGISTER,
1223 				  &uffdio_register.range)) {
1224 				fprintf(stderr, "unregister failure alias\n");
1225 				return 1;
1226 			}
1227 		}
1228 
1229 		/* verification */
1230 		if (bounces & BOUNCE_VERIFY) {
1231 			for (nr = 0; nr < nr_pages; nr++) {
1232 				if (*area_count(area_dst, nr) != count_verify[nr]) {
1233 					fprintf(stderr,
1234 						"error area_count %Lu %Lu %lu\n",
1235 						*area_count(area_src, nr),
1236 						count_verify[nr],
1237 						nr);
1238 					err = 1;
1239 					bounces = 0;
1240 				}
1241 			}
1242 		}
1243 
1244 		/* prepare next bounce */
1245 		tmp_area = area_src;
1246 		area_src = area_dst;
1247 		area_dst = tmp_area;
1248 
1249 		tmp_area = area_src_alias;
1250 		area_src_alias = area_dst_alias;
1251 		area_dst_alias = tmp_area;
1252 
1253 		printf("userfaults:");
1254 		for (cpu = 0; cpu < nr_cpus; cpu++)
1255 			printf(" %lu", userfaults[cpu]);
1256 		printf("\n");
1257 	}
1258 
1259 	pthread_key_delete(long_jmp_key);
1260 
1261 	if (err)
1262 		return err;
1263 
1264 	close(uffd);
1265 	return userfaultfd_zeropage_test() || userfaultfd_sig_test()
1266 		|| userfaultfd_events_test();
1267 }
1268 
1269 /*
1270  * Copied from mlock2-tests.c
1271  */
default_huge_page_size(void)1272 unsigned long default_huge_page_size(void)
1273 {
1274 	unsigned long hps = 0;
1275 	char *line = NULL;
1276 	size_t linelen = 0;
1277 	FILE *f = fopen("/proc/meminfo", "r");
1278 
1279 	if (!f)
1280 		return 0;
1281 	while (getline(&line, &linelen, f) > 0) {
1282 		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
1283 			hps <<= 10;
1284 			break;
1285 		}
1286 	}
1287 
1288 	free(line);
1289 	fclose(f);
1290 	return hps;
1291 }
1292 
set_test_type(const char * type)1293 static void set_test_type(const char *type)
1294 {
1295 	if (!strcmp(type, "anon")) {
1296 		test_type = TEST_ANON;
1297 		uffd_test_ops = &anon_uffd_test_ops;
1298 	} else if (!strcmp(type, "hugetlb")) {
1299 		test_type = TEST_HUGETLB;
1300 		uffd_test_ops = &hugetlb_uffd_test_ops;
1301 	} else if (!strcmp(type, "hugetlb_shared")) {
1302 		map_shared = true;
1303 		test_type = TEST_HUGETLB;
1304 		uffd_test_ops = &hugetlb_uffd_test_ops;
1305 	} else if (!strcmp(type, "shmem")) {
1306 		map_shared = true;
1307 		test_type = TEST_SHMEM;
1308 		uffd_test_ops = &shmem_uffd_test_ops;
1309 	} else {
1310 		fprintf(stderr, "Unknown test type: %s\n", type), exit(1);
1311 	}
1312 
1313 	if (test_type == TEST_HUGETLB)
1314 		page_size = default_huge_page_size();
1315 	else
1316 		page_size = sysconf(_SC_PAGE_SIZE);
1317 
1318 	if (!page_size)
1319 		fprintf(stderr, "Unable to determine page size\n"),
1320 				exit(2);
1321 	if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
1322 	    > page_size)
1323 		fprintf(stderr, "Impossible to run this test\n"), exit(2);
1324 }
1325 
sigalrm(int sig)1326 static void sigalrm(int sig)
1327 {
1328 	if (sig != SIGALRM)
1329 		abort();
1330 	test_uffdio_copy_eexist = true;
1331 	test_uffdio_zeropage_eexist = true;
1332 	alarm(ALARM_INTERVAL_SECS);
1333 }
1334 
main(int argc,char ** argv)1335 int main(int argc, char **argv)
1336 {
1337 	char randstate[64];
1338 	unsigned int seed;
1339 
1340 	if (argc < 4)
1341 		usage();
1342 
1343 	if (signal(SIGALRM, sigalrm) == SIG_ERR)
1344 		fprintf(stderr, "failed to arm SIGALRM"), exit(1);
1345 	alarm(ALARM_INTERVAL_SECS);
1346 
1347 	set_test_type(argv[1]);
1348 
1349 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
1350 	nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size /
1351 		nr_cpus;
1352 	if (!nr_pages_per_cpu) {
1353 		fprintf(stderr, "invalid MiB\n");
1354 		usage();
1355 	}
1356 
1357 	bounces = atoi(argv[3]);
1358 	if (bounces <= 0) {
1359 		fprintf(stderr, "invalid bounces\n");
1360 		usage();
1361 	}
1362 	nr_pages = nr_pages_per_cpu * nr_cpus;
1363 
1364 	if (test_type == TEST_HUGETLB) {
1365 		if (argc < 5)
1366 			usage();
1367 		huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
1368 		if (huge_fd < 0) {
1369 			fprintf(stderr, "Open of %s failed", argv[3]);
1370 			perror("open");
1371 			exit(1);
1372 		}
1373 		if (ftruncate(huge_fd, 0)) {
1374 			fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]);
1375 			perror("ftruncate");
1376 			exit(1);
1377 		}
1378 	}
1379 	printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
1380 	       nr_pages, nr_pages_per_cpu);
1381 
1382 	seed = (unsigned int) time(NULL);
1383 	bzero(&randstate, sizeof(randstate));
1384 	if (!initstate(seed, randstate, sizeof(randstate)))
1385 		fprintf(stderr, "srandom_r error\n"), exit(1);
1386 
1387 	return userfaultfd_stress();
1388 }
1389 
1390 #else /* __NR_userfaultfd */
1391 
1392 #warning "missing __NR_userfaultfd definition"
1393 
main(void)1394 int main(void)
1395 {
1396 	printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
1397 	return KSFT_SKIP;
1398 }
1399 
1400 #endif /* __NR_userfaultfd */
1401