• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Stress userfaultfd syscall.
4  *
5  *  Copyright (C) 2015  Red Hat, Inc.
6  *
7  * This test allocates two virtual areas and bounces the physical
8  * memory across the two virtual areas (from area_src to area_dst)
9  * using userfaultfd.
10  *
11  * There are three threads running per CPU:
12  *
13  * 1) one per-CPU thread takes a per-page pthread_mutex in a random
14  *    page of the area_dst (while the physical page may still be in
15  *    area_src), and increments a per-page counter in the same page,
16  *    and checks its value against a verification region.
17  *
18  * 2) another per-CPU thread handles the userfaults generated by
19  *    thread 1 above. userfaultfd blocking reads or poll() modes are
20  *    exercised interleaved.
21  *
22  * 3) one last per-CPU thread transfers the memory in the background
23  *    at maximum bandwidth (if not already transferred by thread
24  *    2). Each cpu thread takes cares of transferring a portion of the
25  *    area.
26  *
27  * When all threads of type 3 completed the transfer, one bounce is
28  * complete. area_src and area_dst are then swapped. All threads are
29  * respawned and so the bounce is immediately restarted in the
30  * opposite direction.
31  *
32  * per-CPU threads 1 by triggering userfaults inside
33  * pthread_mutex_lock will also verify the atomicity of the memory
34  * transfer (UFFDIO_COPY).
35  */
36 
37 #define _GNU_SOURCE
38 #include <stdio.h>
39 #include <errno.h>
40 #include <unistd.h>
41 #include <stdlib.h>
42 #include <sys/types.h>
43 #include <sys/stat.h>
44 #include <fcntl.h>
45 #include <time.h>
46 #include <signal.h>
47 #include <poll.h>
48 #include <string.h>
49 #include <linux/mman.h>
50 #include <sys/mman.h>
51 #include <sys/syscall.h>
52 #include <sys/ioctl.h>
53 #include <sys/wait.h>
54 #include <pthread.h>
55 #include <linux/userfaultfd.h>
56 #include <setjmp.h>
57 #include <stdbool.h>
58 
59 #include "../kselftest.h"
60 
61 #ifdef __NR_userfaultfd
62 
63 static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
64 
65 #define BOUNCE_RANDOM		(1<<0)
66 #define BOUNCE_RACINGFAULTS	(1<<1)
67 #define BOUNCE_VERIFY		(1<<2)
68 #define BOUNCE_POLL		(1<<3)
69 static int bounces;
70 
71 #define TEST_ANON	1
72 #define TEST_HUGETLB	2
73 #define TEST_SHMEM	3
74 static int test_type;
75 
76 /* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
77 #define ALARM_INTERVAL_SECS 10
78 static volatile bool test_uffdio_copy_eexist = true;
79 static volatile bool test_uffdio_zeropage_eexist = true;
80 
81 static bool map_shared;
82 static int huge_fd;
83 static char *huge_fd_off0;
84 static unsigned long long *count_verify;
85 static int uffd, uffd_flags, finished, *pipefd;
86 static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
87 static char *zeropage;
88 pthread_attr_t attr;
89 
90 /* pthread_mutex_t starts at page offset 0 */
91 #define area_mutex(___area, ___nr)					\
92 	((pthread_mutex_t *) ((___area) + (___nr)*page_size))
93 /*
94  * count is placed in the page after pthread_mutex_t naturally aligned
95  * to avoid non alignment faults on non-x86 archs.
96  */
97 #define area_count(___area, ___nr)					\
98 	((volatile unsigned long long *) ((unsigned long)		\
99 				 ((___area) + (___nr)*page_size +	\
100 				  sizeof(pthread_mutex_t) +		\
101 				  sizeof(unsigned long long) - 1) &	\
102 				 ~(unsigned long)(sizeof(unsigned long long) \
103 						  -  1)))
104 
105 const char *examples =
106     "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
107     "./userfaultfd anon 100 99999\n\n"
108     "# Run share memory test on 1GiB region with 99 bounces:\n"
109     "./userfaultfd shmem 1000 99\n\n"
110     "# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n"
111     "./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n"
112     "# Run the same hugetlb test but using shmem:\n"
113     "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n"
114     "# 10MiB-~6GiB 999 bounces anonymous test, "
115     "continue forever unless an error triggers\n"
116     "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
117 
usage(void)118 static void usage(void)
119 {
120 	fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces> "
121 		"[hugetlbfs_file]\n\n");
122 	fprintf(stderr, "Supported <test type>: anon, hugetlb, "
123 		"hugetlb_shared, shmem\n\n");
124 	fprintf(stderr, "Examples:\n\n");
125 	fprintf(stderr, "%s", examples);
126 	exit(1);
127 }
128 
anon_release_pages(char * rel_area)129 static int anon_release_pages(char *rel_area)
130 {
131 	int ret = 0;
132 
133 	if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) {
134 		perror("madvise");
135 		ret = 1;
136 	}
137 
138 	return ret;
139 }
140 
anon_allocate_area(void ** alloc_area)141 static void anon_allocate_area(void **alloc_area)
142 {
143 	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
144 			   MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
145 	if (*alloc_area == MAP_FAILED) {
146 		fprintf(stderr, "mmap of anonymous memory failed");
147 		*alloc_area = NULL;
148 	}
149 }
150 
noop_alias_mapping(__u64 * start,size_t len,unsigned long offset)151 static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
152 {
153 }
154 
155 /* HugeTLB memory */
hugetlb_release_pages(char * rel_area)156 static int hugetlb_release_pages(char *rel_area)
157 {
158 	int ret = 0;
159 
160 	if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
161 				rel_area == huge_fd_off0 ? 0 :
162 				nr_pages * page_size,
163 				nr_pages * page_size)) {
164 		perror("fallocate");
165 		ret = 1;
166 	}
167 
168 	return ret;
169 }
170 
171 
hugetlb_allocate_area(void ** alloc_area)172 static void hugetlb_allocate_area(void **alloc_area)
173 {
174 	void *area_alias = NULL;
175 	char **alloc_area_alias;
176 	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
177 			   (map_shared ? MAP_SHARED : MAP_PRIVATE) |
178 			   MAP_HUGETLB,
179 			   huge_fd, *alloc_area == area_src ? 0 :
180 			   nr_pages * page_size);
181 	if (*alloc_area == MAP_FAILED) {
182 		fprintf(stderr, "mmap of hugetlbfs file failed\n");
183 		*alloc_area = NULL;
184 	}
185 
186 	if (map_shared) {
187 		area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
188 				  MAP_SHARED | MAP_HUGETLB,
189 				  huge_fd, *alloc_area == area_src ? 0 :
190 				  nr_pages * page_size);
191 		if (area_alias == MAP_FAILED) {
192 			if (munmap(*alloc_area, nr_pages * page_size) < 0)
193 				perror("hugetlb munmap"), exit(1);
194 			*alloc_area = NULL;
195 			return;
196 		}
197 	}
198 	if (*alloc_area == area_src) {
199 		huge_fd_off0 = *alloc_area;
200 		alloc_area_alias = &area_src_alias;
201 	} else {
202 		alloc_area_alias = &area_dst_alias;
203 	}
204 	if (area_alias)
205 		*alloc_area_alias = area_alias;
206 }
207 
hugetlb_alias_mapping(__u64 * start,size_t len,unsigned long offset)208 static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
209 {
210 	if (!map_shared)
211 		return;
212 	/*
213 	 * We can't zap just the pagetable with hugetlbfs because
214 	 * MADV_DONTEED won't work. So exercise -EEXIST on a alias
215 	 * mapping where the pagetables are not established initially,
216 	 * this way we'll exercise the -EEXEC at the fs level.
217 	 */
218 	*start = (unsigned long) area_dst_alias + offset;
219 }
220 
221 /* Shared memory */
shmem_release_pages(char * rel_area)222 static int shmem_release_pages(char *rel_area)
223 {
224 	int ret = 0;
225 
226 	if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) {
227 		perror("madvise");
228 		ret = 1;
229 	}
230 
231 	return ret;
232 }
233 
shmem_allocate_area(void ** alloc_area)234 static void shmem_allocate_area(void **alloc_area)
235 {
236 	*alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
237 			   MAP_ANONYMOUS | MAP_SHARED, -1, 0);
238 	if (*alloc_area == MAP_FAILED) {
239 		fprintf(stderr, "shared memory mmap failed\n");
240 		*alloc_area = NULL;
241 	}
242 }
243 
244 struct uffd_test_ops {
245 	unsigned long expected_ioctls;
246 	void (*allocate_area)(void **alloc_area);
247 	int (*release_pages)(char *rel_area);
248 	void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
249 };
250 
251 #define ANON_EXPECTED_IOCTLS		((1 << _UFFDIO_WAKE) | \
252 					 (1 << _UFFDIO_COPY) | \
253 					 (1 << _UFFDIO_ZEROPAGE))
254 
255 static struct uffd_test_ops anon_uffd_test_ops = {
256 	.expected_ioctls = ANON_EXPECTED_IOCTLS,
257 	.allocate_area	= anon_allocate_area,
258 	.release_pages	= anon_release_pages,
259 	.alias_mapping = noop_alias_mapping,
260 };
261 
262 static struct uffd_test_ops shmem_uffd_test_ops = {
263 	.expected_ioctls = ANON_EXPECTED_IOCTLS,
264 	.allocate_area	= shmem_allocate_area,
265 	.release_pages	= shmem_release_pages,
266 	.alias_mapping = noop_alias_mapping,
267 };
268 
269 static struct uffd_test_ops hugetlb_uffd_test_ops = {
270 	.expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC,
271 	.allocate_area	= hugetlb_allocate_area,
272 	.release_pages	= hugetlb_release_pages,
273 	.alias_mapping = hugetlb_alias_mapping,
274 };
275 
276 static struct uffd_test_ops *uffd_test_ops;
277 
my_bcmp(char * str1,char * str2,size_t n)278 static int my_bcmp(char *str1, char *str2, size_t n)
279 {
280 	unsigned long i;
281 	for (i = 0; i < n; i++)
282 		if (str1[i] != str2[i])
283 			return 1;
284 	return 0;
285 }
286 
locking_thread(void * arg)287 static void *locking_thread(void *arg)
288 {
289 	unsigned long cpu = (unsigned long) arg;
290 	struct random_data rand;
291 	unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */
292 	int32_t rand_nr;
293 	unsigned long long count;
294 	char randstate[64];
295 	unsigned int seed;
296 	time_t start;
297 
298 	if (bounces & BOUNCE_RANDOM) {
299 		seed = (unsigned int) time(NULL) - bounces;
300 		if (!(bounces & BOUNCE_RACINGFAULTS))
301 			seed += cpu;
302 		bzero(&rand, sizeof(rand));
303 		bzero(&randstate, sizeof(randstate));
304 		if (initstate_r(seed, randstate, sizeof(randstate), &rand))
305 			fprintf(stderr, "srandom_r error\n"), exit(1);
306 	} else {
307 		page_nr = -bounces;
308 		if (!(bounces & BOUNCE_RACINGFAULTS))
309 			page_nr += cpu * nr_pages_per_cpu;
310 	}
311 
312 	while (!finished) {
313 		if (bounces & BOUNCE_RANDOM) {
314 			if (random_r(&rand, &rand_nr))
315 				fprintf(stderr, "random_r 1 error\n"), exit(1);
316 			page_nr = rand_nr;
317 			if (sizeof(page_nr) > sizeof(rand_nr)) {
318 				if (random_r(&rand, &rand_nr))
319 					fprintf(stderr, "random_r 2 error\n"), exit(1);
320 				page_nr |= (((unsigned long) rand_nr) << 16) <<
321 					   16;
322 			}
323 		} else
324 			page_nr += 1;
325 		page_nr %= nr_pages;
326 
327 		start = time(NULL);
328 		if (bounces & BOUNCE_VERIFY) {
329 			count = *area_count(area_dst, page_nr);
330 			if (!count)
331 				fprintf(stderr,
332 					"page_nr %lu wrong count %Lu %Lu\n",
333 					page_nr, count,
334 					count_verify[page_nr]), exit(1);
335 
336 
337 			/*
338 			 * We can't use bcmp (or memcmp) because that
339 			 * returns 0 erroneously if the memory is
340 			 * changing under it (even if the end of the
341 			 * page is never changing and always
342 			 * different).
343 			 */
344 #if 1
345 			if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
346 				     page_size))
347 				fprintf(stderr,
348 					"my_bcmp page_nr %lu wrong count %Lu %Lu\n",
349 					page_nr, count,
350 					count_verify[page_nr]), exit(1);
351 #else
352 			unsigned long loops;
353 
354 			loops = 0;
355 			/* uncomment the below line to test with mutex */
356 			/* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
357 			while (!bcmp(area_dst + page_nr * page_size, zeropage,
358 				     page_size)) {
359 				loops += 1;
360 				if (loops > 10)
361 					break;
362 			}
363 			/* uncomment below line to test with mutex */
364 			/* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
365 			if (loops) {
366 				fprintf(stderr,
367 					"page_nr %lu all zero thread %lu %p %lu\n",
368 					page_nr, cpu, area_dst + page_nr * page_size,
369 					loops);
370 				if (loops > 10)
371 					exit(1);
372 			}
373 #endif
374 		}
375 
376 		pthread_mutex_lock(area_mutex(area_dst, page_nr));
377 		count = *area_count(area_dst, page_nr);
378 		if (count != count_verify[page_nr]) {
379 			fprintf(stderr,
380 				"page_nr %lu memory corruption %Lu %Lu\n",
381 				page_nr, count,
382 				count_verify[page_nr]), exit(1);
383 		}
384 		count++;
385 		*area_count(area_dst, page_nr) = count_verify[page_nr] = count;
386 		pthread_mutex_unlock(area_mutex(area_dst, page_nr));
387 
388 		if (time(NULL) - start > 1)
389 			fprintf(stderr,
390 				"userfault too slow %ld "
391 				"possible false positive with overcommit\n",
392 				time(NULL) - start);
393 	}
394 
395 	return NULL;
396 }
397 
retry_copy_page(int ufd,struct uffdio_copy * uffdio_copy,unsigned long offset)398 static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
399 			    unsigned long offset)
400 {
401 	uffd_test_ops->alias_mapping(&uffdio_copy->dst,
402 				     uffdio_copy->len,
403 				     offset);
404 	if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
405 		/* real retval in ufdio_copy.copy */
406 		if (uffdio_copy->copy != -EEXIST)
407 			fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
408 				uffdio_copy->copy), exit(1);
409 	} else {
410 		fprintf(stderr,	"UFFDIO_COPY retry unexpected %Ld\n",
411 			uffdio_copy->copy), exit(1);
412 	}
413 }
414 
__copy_page(int ufd,unsigned long offset,bool retry)415 static int __copy_page(int ufd, unsigned long offset, bool retry)
416 {
417 	struct uffdio_copy uffdio_copy;
418 
419 	if (offset >= nr_pages * page_size)
420 		fprintf(stderr, "unexpected offset %lu\n",
421 			offset), exit(1);
422 	uffdio_copy.dst = (unsigned long) area_dst + offset;
423 	uffdio_copy.src = (unsigned long) area_src + offset;
424 	uffdio_copy.len = page_size;
425 	uffdio_copy.mode = 0;
426 	uffdio_copy.copy = 0;
427 	if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
428 		/* real retval in ufdio_copy.copy */
429 		if (uffdio_copy.copy != -EEXIST)
430 			fprintf(stderr, "UFFDIO_COPY error %Ld\n",
431 				uffdio_copy.copy), exit(1);
432 	} else if (uffdio_copy.copy != page_size) {
433 		fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
434 			uffdio_copy.copy), exit(1);
435 	} else {
436 		if (test_uffdio_copy_eexist && retry) {
437 			test_uffdio_copy_eexist = false;
438 			retry_copy_page(ufd, &uffdio_copy, offset);
439 		}
440 		return 1;
441 	}
442 	return 0;
443 }
444 
copy_page_retry(int ufd,unsigned long offset)445 static int copy_page_retry(int ufd, unsigned long offset)
446 {
447 	return __copy_page(ufd, offset, true);
448 }
449 
copy_page(int ufd,unsigned long offset)450 static int copy_page(int ufd, unsigned long offset)
451 {
452 	return __copy_page(ufd, offset, false);
453 }
454 
uffd_read_msg(int ufd,struct uffd_msg * msg)455 static int uffd_read_msg(int ufd, struct uffd_msg *msg)
456 {
457 	int ret = read(uffd, msg, sizeof(*msg));
458 
459 	if (ret != sizeof(*msg)) {
460 		if (ret < 0) {
461 			if (errno == EAGAIN)
462 				return 1;
463 			else
464 				perror("blocking read error"), exit(1);
465 		} else {
466 			fprintf(stderr, "short read\n"), exit(1);
467 		}
468 	}
469 
470 	return 0;
471 }
472 
473 /* Return 1 if page fault handled by us; otherwise 0 */
uffd_handle_page_fault(struct uffd_msg * msg)474 static int uffd_handle_page_fault(struct uffd_msg *msg)
475 {
476 	unsigned long offset;
477 
478 	if (msg->event != UFFD_EVENT_PAGEFAULT)
479 		fprintf(stderr, "unexpected msg event %u\n",
480 			msg->event), exit(1);
481 
482 	if (bounces & BOUNCE_VERIFY &&
483 	    msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
484 		fprintf(stderr, "unexpected write fault\n"), exit(1);
485 
486 	offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
487 	offset &= ~(page_size-1);
488 
489 	return copy_page(uffd, offset);
490 }
491 
uffd_poll_thread(void * arg)492 static void *uffd_poll_thread(void *arg)
493 {
494 	unsigned long cpu = (unsigned long) arg;
495 	struct pollfd pollfd[2];
496 	struct uffd_msg msg;
497 	struct uffdio_register uffd_reg;
498 	int ret;
499 	char tmp_chr;
500 	unsigned long userfaults = 0;
501 
502 	pollfd[0].fd = uffd;
503 	pollfd[0].events = POLLIN;
504 	pollfd[1].fd = pipefd[cpu*2];
505 	pollfd[1].events = POLLIN;
506 
507 	for (;;) {
508 		ret = poll(pollfd, 2, -1);
509 		if (!ret)
510 			fprintf(stderr, "poll error %d\n", ret), exit(1);
511 		if (ret < 0)
512 			perror("poll"), exit(1);
513 		if (pollfd[1].revents & POLLIN) {
514 			if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
515 				fprintf(stderr, "read pipefd error\n"),
516 					exit(1);
517 			break;
518 		}
519 		if (!(pollfd[0].revents & POLLIN))
520 			fprintf(stderr, "pollfd[0].revents %d\n",
521 				pollfd[0].revents), exit(1);
522 		if (uffd_read_msg(uffd, &msg))
523 			continue;
524 		switch (msg.event) {
525 		default:
526 			fprintf(stderr, "unexpected msg event %u\n",
527 				msg.event), exit(1);
528 			break;
529 		case UFFD_EVENT_PAGEFAULT:
530 			userfaults += uffd_handle_page_fault(&msg);
531 			break;
532 		case UFFD_EVENT_FORK:
533 			close(uffd);
534 			uffd = msg.arg.fork.ufd;
535 			pollfd[0].fd = uffd;
536 			break;
537 		case UFFD_EVENT_REMOVE:
538 			uffd_reg.range.start = msg.arg.remove.start;
539 			uffd_reg.range.len = msg.arg.remove.end -
540 				msg.arg.remove.start;
541 			if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
542 				fprintf(stderr, "remove failure\n"), exit(1);
543 			break;
544 		case UFFD_EVENT_REMAP:
545 			area_dst = (char *)(unsigned long)msg.arg.remap.to;
546 			break;
547 		}
548 	}
549 	return (void *)userfaults;
550 }
551 
552 pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
553 
uffd_read_thread(void * arg)554 static void *uffd_read_thread(void *arg)
555 {
556 	unsigned long *this_cpu_userfaults;
557 	struct uffd_msg msg;
558 
559 	this_cpu_userfaults = (unsigned long *) arg;
560 	*this_cpu_userfaults = 0;
561 
562 	pthread_mutex_unlock(&uffd_read_mutex);
563 	/* from here cancellation is ok */
564 
565 	for (;;) {
566 		if (uffd_read_msg(uffd, &msg))
567 			continue;
568 		(*this_cpu_userfaults) += uffd_handle_page_fault(&msg);
569 	}
570 	return (void *)NULL;
571 }
572 
background_thread(void * arg)573 static void *background_thread(void *arg)
574 {
575 	unsigned long cpu = (unsigned long) arg;
576 	unsigned long page_nr;
577 
578 	for (page_nr = cpu * nr_pages_per_cpu;
579 	     page_nr < (cpu+1) * nr_pages_per_cpu;
580 	     page_nr++)
581 		copy_page_retry(uffd, page_nr * page_size);
582 
583 	return NULL;
584 }
585 
stress(unsigned long * userfaults)586 static int stress(unsigned long *userfaults)
587 {
588 	unsigned long cpu;
589 	pthread_t locking_threads[nr_cpus];
590 	pthread_t uffd_threads[nr_cpus];
591 	pthread_t background_threads[nr_cpus];
592 	void **_userfaults = (void **) userfaults;
593 
594 	finished = 0;
595 	for (cpu = 0; cpu < nr_cpus; cpu++) {
596 		if (pthread_create(&locking_threads[cpu], &attr,
597 				   locking_thread, (void *)cpu))
598 			return 1;
599 		if (bounces & BOUNCE_POLL) {
600 			if (pthread_create(&uffd_threads[cpu], &attr,
601 					   uffd_poll_thread, (void *)cpu))
602 				return 1;
603 		} else {
604 			if (pthread_create(&uffd_threads[cpu], &attr,
605 					   uffd_read_thread,
606 					   &_userfaults[cpu]))
607 				return 1;
608 			pthread_mutex_lock(&uffd_read_mutex);
609 		}
610 		if (pthread_create(&background_threads[cpu], &attr,
611 				   background_thread, (void *)cpu))
612 			return 1;
613 	}
614 	for (cpu = 0; cpu < nr_cpus; cpu++)
615 		if (pthread_join(background_threads[cpu], NULL))
616 			return 1;
617 
618 	/*
619 	 * Be strict and immediately zap area_src, the whole area has
620 	 * been transferred already by the background treads. The
621 	 * area_src could then be faulted in in a racy way by still
622 	 * running uffdio_threads reading zeropages after we zapped
623 	 * area_src (but they're guaranteed to get -EEXIST from
624 	 * UFFDIO_COPY without writing zero pages into area_dst
625 	 * because the background threads already completed).
626 	 */
627 	if (uffd_test_ops->release_pages(area_src))
628 		return 1;
629 
630 
631 	finished = 1;
632 	for (cpu = 0; cpu < nr_cpus; cpu++)
633 		if (pthread_join(locking_threads[cpu], NULL))
634 			return 1;
635 
636 	for (cpu = 0; cpu < nr_cpus; cpu++) {
637 		char c;
638 		if (bounces & BOUNCE_POLL) {
639 			if (write(pipefd[cpu*2+1], &c, 1) != 1) {
640 				fprintf(stderr, "pipefd write error\n");
641 				return 1;
642 			}
643 			if (pthread_join(uffd_threads[cpu], &_userfaults[cpu]))
644 				return 1;
645 		} else {
646 			if (pthread_cancel(uffd_threads[cpu]))
647 				return 1;
648 			if (pthread_join(uffd_threads[cpu], NULL))
649 				return 1;
650 		}
651 	}
652 
653 	return 0;
654 }
655 
userfaultfd_open(int features)656 static int userfaultfd_open(int features)
657 {
658 	struct uffdio_api uffdio_api;
659 
660 	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
661 	if (uffd < 0) {
662 		fprintf(stderr,
663 			"userfaultfd syscall not available in this kernel\n");
664 		return 1;
665 	}
666 	uffd_flags = fcntl(uffd, F_GETFD, NULL);
667 
668 	uffdio_api.api = UFFD_API;
669 	uffdio_api.features = features;
670 	if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
671 		fprintf(stderr, "UFFDIO_API\n");
672 		return 1;
673 	}
674 	if (uffdio_api.api != UFFD_API) {
675 		fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api);
676 		return 1;
677 	}
678 
679 	return 0;
680 }
681 
682 sigjmp_buf jbuf, *sigbuf;
683 
sighndl(int sig,siginfo_t * siginfo,void * ptr)684 static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
685 {
686 	if (sig == SIGBUS) {
687 		if (sigbuf)
688 			siglongjmp(*sigbuf, 1);
689 		abort();
690 	}
691 }
692 
693 /*
694  * For non-cooperative userfaultfd test we fork() a process that will
695  * generate pagefaults, will mremap the area monitored by the
696  * userfaultfd and at last this process will release the monitored
697  * area.
698  * For the anonymous and shared memory the area is divided into two
699  * parts, the first part is accessed before mremap, and the second
700  * part is accessed after mremap. Since hugetlbfs does not support
701  * mremap, the entire monitored area is accessed in a single pass for
702  * HUGETLB_TEST.
703  * The release of the pages currently generates event for shmem and
704  * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
705  * for hugetlb.
706  * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
707  * monitored area, generate pagefaults and test that signal is delivered.
708  * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
709  * test robustness use case - we release monitored area, fork a process
710  * that will generate pagefaults and verify signal is generated.
711  * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
712  * feature. Using monitor thread, verify no userfault events are generated.
713  */
faulting_process(int signal_test)714 static int faulting_process(int signal_test)
715 {
716 	unsigned long nr;
717 	unsigned long long count;
718 	unsigned long split_nr_pages;
719 	unsigned long lastnr;
720 	struct sigaction act;
721 	unsigned long signalled = 0;
722 
723 	if (test_type != TEST_HUGETLB)
724 		split_nr_pages = (nr_pages + 1) / 2;
725 	else
726 		split_nr_pages = nr_pages;
727 
728 	if (signal_test) {
729 		sigbuf = &jbuf;
730 		memset(&act, 0, sizeof(act));
731 		act.sa_sigaction = sighndl;
732 		act.sa_flags = SA_SIGINFO;
733 		if (sigaction(SIGBUS, &act, 0)) {
734 			perror("sigaction");
735 			return 1;
736 		}
737 		lastnr = (unsigned long)-1;
738 	}
739 
740 	for (nr = 0; nr < split_nr_pages; nr++) {
741 		if (signal_test) {
742 			if (sigsetjmp(*sigbuf, 1) != 0) {
743 				if (nr == lastnr) {
744 					fprintf(stderr, "Signal repeated\n");
745 					return 1;
746 				}
747 
748 				lastnr = nr;
749 				if (signal_test == 1) {
750 					if (copy_page(uffd, nr * page_size))
751 						signalled++;
752 				} else {
753 					signalled++;
754 					continue;
755 				}
756 			}
757 		}
758 
759 		count = *area_count(area_dst, nr);
760 		if (count != count_verify[nr]) {
761 			fprintf(stderr,
762 				"nr %lu memory corruption %Lu %Lu\n",
763 				nr, count,
764 				count_verify[nr]), exit(1);
765 		}
766 	}
767 
768 	if (signal_test)
769 		return signalled != split_nr_pages;
770 
771 	if (test_type == TEST_HUGETLB)
772 		return 0;
773 
774 	area_dst = mremap(area_dst, nr_pages * page_size,  nr_pages * page_size,
775 			  MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
776 	if (area_dst == MAP_FAILED)
777 		perror("mremap"), exit(1);
778 
779 	for (; nr < nr_pages; nr++) {
780 		count = *area_count(area_dst, nr);
781 		if (count != count_verify[nr]) {
782 			fprintf(stderr,
783 				"nr %lu memory corruption %Lu %Lu\n",
784 				nr, count,
785 				count_verify[nr]), exit(1);
786 		}
787 	}
788 
789 	if (uffd_test_ops->release_pages(area_dst))
790 		return 1;
791 
792 	for (nr = 0; nr < nr_pages; nr++) {
793 		if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
794 			fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
795 	}
796 
797 	return 0;
798 }
799 
retry_uffdio_zeropage(int ufd,struct uffdio_zeropage * uffdio_zeropage,unsigned long offset)800 static void retry_uffdio_zeropage(int ufd,
801 				  struct uffdio_zeropage *uffdio_zeropage,
802 				  unsigned long offset)
803 {
804 	uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start,
805 				     uffdio_zeropage->range.len,
806 				     offset);
807 	if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
808 		if (uffdio_zeropage->zeropage != -EEXIST)
809 			fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
810 				uffdio_zeropage->zeropage), exit(1);
811 	} else {
812 		fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
813 			uffdio_zeropage->zeropage), exit(1);
814 	}
815 }
816 
__uffdio_zeropage(int ufd,unsigned long offset,bool retry)817 static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
818 {
819 	struct uffdio_zeropage uffdio_zeropage;
820 	int ret;
821 	unsigned long has_zeropage;
822 
823 	has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
824 
825 	if (offset >= nr_pages * page_size)
826 		fprintf(stderr, "unexpected offset %lu\n",
827 			offset), exit(1);
828 	uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
829 	uffdio_zeropage.range.len = page_size;
830 	uffdio_zeropage.mode = 0;
831 	ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
832 	if (ret) {
833 		/* real retval in ufdio_zeropage.zeropage */
834 		if (has_zeropage) {
835 			if (uffdio_zeropage.zeropage == -EEXIST)
836 				fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"),
837 					exit(1);
838 			else
839 				fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
840 					uffdio_zeropage.zeropage), exit(1);
841 		} else {
842 			if (uffdio_zeropage.zeropage != -EINVAL)
843 				fprintf(stderr,
844 					"UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
845 					uffdio_zeropage.zeropage), exit(1);
846 		}
847 	} else if (has_zeropage) {
848 		if (uffdio_zeropage.zeropage != page_size) {
849 			fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
850 				uffdio_zeropage.zeropage), exit(1);
851 		} else {
852 			if (test_uffdio_zeropage_eexist && retry) {
853 				test_uffdio_zeropage_eexist = false;
854 				retry_uffdio_zeropage(ufd, &uffdio_zeropage,
855 						      offset);
856 			}
857 			return 1;
858 		}
859 	} else {
860 		fprintf(stderr,
861 			"UFFDIO_ZEROPAGE succeeded %Ld\n",
862 			uffdio_zeropage.zeropage), exit(1);
863 	}
864 
865 	return 0;
866 }
867 
uffdio_zeropage(int ufd,unsigned long offset)868 static int uffdio_zeropage(int ufd, unsigned long offset)
869 {
870 	return __uffdio_zeropage(ufd, offset, false);
871 }
872 
873 /* exercise UFFDIO_ZEROPAGE */
userfaultfd_zeropage_test(void)874 static int userfaultfd_zeropage_test(void)
875 {
876 	struct uffdio_register uffdio_register;
877 	unsigned long expected_ioctls;
878 
879 	printf("testing UFFDIO_ZEROPAGE: ");
880 	fflush(stdout);
881 
882 	if (uffd_test_ops->release_pages(area_dst))
883 		return 1;
884 
885 	if (userfaultfd_open(0) < 0)
886 		return 1;
887 	uffdio_register.range.start = (unsigned long) area_dst;
888 	uffdio_register.range.len = nr_pages * page_size;
889 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
890 	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
891 		fprintf(stderr, "register failure\n"), exit(1);
892 
893 	expected_ioctls = uffd_test_ops->expected_ioctls;
894 	if ((uffdio_register.ioctls & expected_ioctls) !=
895 	    expected_ioctls)
896 		fprintf(stderr,
897 			"unexpected missing ioctl for anon memory\n"),
898 			exit(1);
899 
900 	if (uffdio_zeropage(uffd, 0)) {
901 		if (my_bcmp(area_dst, zeropage, page_size))
902 			fprintf(stderr, "zeropage is not zero\n"), exit(1);
903 	}
904 
905 	close(uffd);
906 	printf("done.\n");
907 	return 0;
908 }
909 
userfaultfd_events_test(void)910 static int userfaultfd_events_test(void)
911 {
912 	struct uffdio_register uffdio_register;
913 	unsigned long expected_ioctls;
914 	unsigned long userfaults;
915 	pthread_t uffd_mon;
916 	int err, features;
917 	pid_t pid;
918 	char c;
919 
920 	printf("testing events (fork, remap, remove): ");
921 	fflush(stdout);
922 
923 	if (uffd_test_ops->release_pages(area_dst))
924 		return 1;
925 
926 	features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
927 		UFFD_FEATURE_EVENT_REMOVE;
928 	if (userfaultfd_open(features) < 0)
929 		return 1;
930 	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
931 
932 	uffdio_register.range.start = (unsigned long) area_dst;
933 	uffdio_register.range.len = nr_pages * page_size;
934 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
935 	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
936 		fprintf(stderr, "register failure\n"), exit(1);
937 
938 	expected_ioctls = uffd_test_ops->expected_ioctls;
939 	if ((uffdio_register.ioctls & expected_ioctls) !=
940 	    expected_ioctls)
941 		fprintf(stderr,
942 			"unexpected missing ioctl for anon memory\n"),
943 			exit(1);
944 
945 	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
946 		perror("uffd_poll_thread create"), exit(1);
947 
948 	pid = fork();
949 	if (pid < 0)
950 		perror("fork"), exit(1);
951 
952 	if (!pid)
953 		return faulting_process(0);
954 
955 	waitpid(pid, &err, 0);
956 	if (err)
957 		fprintf(stderr, "faulting process failed\n"), exit(1);
958 
959 	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
960 		perror("pipe write"), exit(1);
961 	if (pthread_join(uffd_mon, (void **)&userfaults))
962 		return 1;
963 
964 	close(uffd);
965 	printf("userfaults: %ld\n", userfaults);
966 
967 	return userfaults != nr_pages;
968 }
969 
userfaultfd_sig_test(void)970 static int userfaultfd_sig_test(void)
971 {
972 	struct uffdio_register uffdio_register;
973 	unsigned long expected_ioctls;
974 	unsigned long userfaults;
975 	pthread_t uffd_mon;
976 	int err, features;
977 	pid_t pid;
978 	char c;
979 
980 	printf("testing signal delivery: ");
981 	fflush(stdout);
982 
983 	if (uffd_test_ops->release_pages(area_dst))
984 		return 1;
985 
986 	features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
987 	if (userfaultfd_open(features) < 0)
988 		return 1;
989 	fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
990 
991 	uffdio_register.range.start = (unsigned long) area_dst;
992 	uffdio_register.range.len = nr_pages * page_size;
993 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
994 	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
995 		fprintf(stderr, "register failure\n"), exit(1);
996 
997 	expected_ioctls = uffd_test_ops->expected_ioctls;
998 	if ((uffdio_register.ioctls & expected_ioctls) !=
999 	    expected_ioctls)
1000 		fprintf(stderr,
1001 			"unexpected missing ioctl for anon memory\n"),
1002 			exit(1);
1003 
1004 	if (faulting_process(1))
1005 		fprintf(stderr, "faulting process failed\n"), exit(1);
1006 
1007 	if (uffd_test_ops->release_pages(area_dst))
1008 		return 1;
1009 
1010 	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
1011 		perror("uffd_poll_thread create"), exit(1);
1012 
1013 	pid = fork();
1014 	if (pid < 0)
1015 		perror("fork"), exit(1);
1016 
1017 	if (!pid)
1018 		exit(faulting_process(2));
1019 
1020 	waitpid(pid, &err, 0);
1021 	if (err)
1022 		fprintf(stderr, "faulting process failed\n"), exit(1);
1023 
1024 	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
1025 		perror("pipe write"), exit(1);
1026 	if (pthread_join(uffd_mon, (void **)&userfaults))
1027 		return 1;
1028 
1029 	printf("done.\n");
1030 	if (userfaults)
1031 		fprintf(stderr, "Signal test failed, userfaults: %ld\n",
1032 			userfaults);
1033 	close(uffd);
1034 	return userfaults != 0;
1035 }
userfaultfd_stress(void)1036 static int userfaultfd_stress(void)
1037 {
1038 	void *area;
1039 	char *tmp_area;
1040 	unsigned long nr;
1041 	struct uffdio_register uffdio_register;
1042 	unsigned long cpu;
1043 	int err;
1044 	unsigned long userfaults[nr_cpus];
1045 
1046 	uffd_test_ops->allocate_area((void **)&area_src);
1047 	if (!area_src)
1048 		return 1;
1049 	uffd_test_ops->allocate_area((void **)&area_dst);
1050 	if (!area_dst)
1051 		return 1;
1052 
1053 	if (userfaultfd_open(0) < 0)
1054 		return 1;
1055 
1056 	count_verify = malloc(nr_pages * sizeof(unsigned long long));
1057 	if (!count_verify) {
1058 		perror("count_verify");
1059 		return 1;
1060 	}
1061 
1062 	for (nr = 0; nr < nr_pages; nr++) {
1063 		*area_mutex(area_src, nr) = (pthread_mutex_t)
1064 			PTHREAD_MUTEX_INITIALIZER;
1065 		count_verify[nr] = *area_count(area_src, nr) = 1;
1066 		/*
1067 		 * In the transition between 255 to 256, powerpc will
1068 		 * read out of order in my_bcmp and see both bytes as
1069 		 * zero, so leave a placeholder below always non-zero
1070 		 * after the count, to avoid my_bcmp to trigger false
1071 		 * positives.
1072 		 */
1073 		*(area_count(area_src, nr) + 1) = 1;
1074 	}
1075 
1076 	pipefd = malloc(sizeof(int) * nr_cpus * 2);
1077 	if (!pipefd) {
1078 		perror("pipefd");
1079 		return 1;
1080 	}
1081 	for (cpu = 0; cpu < nr_cpus; cpu++) {
1082 		if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) {
1083 			perror("pipe");
1084 			return 1;
1085 		}
1086 	}
1087 
1088 	if (posix_memalign(&area, page_size, page_size)) {
1089 		fprintf(stderr, "out of memory\n");
1090 		return 1;
1091 	}
1092 	zeropage = area;
1093 	bzero(zeropage, page_size);
1094 
1095 	pthread_mutex_lock(&uffd_read_mutex);
1096 
1097 	pthread_attr_init(&attr);
1098 	pthread_attr_setstacksize(&attr, 16*1024*1024);
1099 
1100 	err = 0;
1101 	while (bounces--) {
1102 		unsigned long expected_ioctls;
1103 
1104 		printf("bounces: %d, mode:", bounces);
1105 		if (bounces & BOUNCE_RANDOM)
1106 			printf(" rnd");
1107 		if (bounces & BOUNCE_RACINGFAULTS)
1108 			printf(" racing");
1109 		if (bounces & BOUNCE_VERIFY)
1110 			printf(" ver");
1111 		if (bounces & BOUNCE_POLL)
1112 			printf(" poll");
1113 		printf(", ");
1114 		fflush(stdout);
1115 
1116 		if (bounces & BOUNCE_POLL)
1117 			fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
1118 		else
1119 			fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK);
1120 
1121 		/* register */
1122 		uffdio_register.range.start = (unsigned long) area_dst;
1123 		uffdio_register.range.len = nr_pages * page_size;
1124 		uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
1125 		if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
1126 			fprintf(stderr, "register failure\n");
1127 			return 1;
1128 		}
1129 		expected_ioctls = uffd_test_ops->expected_ioctls;
1130 		if ((uffdio_register.ioctls & expected_ioctls) !=
1131 		    expected_ioctls) {
1132 			fprintf(stderr,
1133 				"unexpected missing ioctl for anon memory\n");
1134 			return 1;
1135 		}
1136 
1137 		if (area_dst_alias) {
1138 			uffdio_register.range.start = (unsigned long)
1139 				area_dst_alias;
1140 			if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
1141 				fprintf(stderr, "register failure alias\n");
1142 				return 1;
1143 			}
1144 		}
1145 
1146 		/*
1147 		 * The madvise done previously isn't enough: some
1148 		 * uffd_thread could have read userfaults (one of
1149 		 * those already resolved by the background thread)
1150 		 * and it may be in the process of calling
1151 		 * UFFDIO_COPY. UFFDIO_COPY will read the zapped
1152 		 * area_src and it would map a zero page in it (of
1153 		 * course such a UFFDIO_COPY is perfectly safe as it'd
1154 		 * return -EEXIST). The problem comes at the next
1155 		 * bounce though: that racing UFFDIO_COPY would
1156 		 * generate zeropages in the area_src, so invalidating
1157 		 * the previous MADV_DONTNEED. Without this additional
1158 		 * MADV_DONTNEED those zeropages leftovers in the
1159 		 * area_src would lead to -EEXIST failure during the
1160 		 * next bounce, effectively leaving a zeropage in the
1161 		 * area_dst.
1162 		 *
1163 		 * Try to comment this out madvise to see the memory
1164 		 * corruption being caught pretty quick.
1165 		 *
1166 		 * khugepaged is also inhibited to collapse THP after
1167 		 * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
1168 		 * required to MADV_DONTNEED here.
1169 		 */
1170 		if (uffd_test_ops->release_pages(area_dst))
1171 			return 1;
1172 
1173 		/* bounce pass */
1174 		if (stress(userfaults))
1175 			return 1;
1176 
1177 		/* unregister */
1178 		if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
1179 			fprintf(stderr, "unregister failure\n");
1180 			return 1;
1181 		}
1182 		if (area_dst_alias) {
1183 			uffdio_register.range.start = (unsigned long) area_dst;
1184 			if (ioctl(uffd, UFFDIO_UNREGISTER,
1185 				  &uffdio_register.range)) {
1186 				fprintf(stderr, "unregister failure alias\n");
1187 				return 1;
1188 			}
1189 		}
1190 
1191 		/* verification */
1192 		if (bounces & BOUNCE_VERIFY) {
1193 			for (nr = 0; nr < nr_pages; nr++) {
1194 				if (*area_count(area_dst, nr) != count_verify[nr]) {
1195 					fprintf(stderr,
1196 						"error area_count %Lu %Lu %lu\n",
1197 						*area_count(area_src, nr),
1198 						count_verify[nr],
1199 						nr);
1200 					err = 1;
1201 					bounces = 0;
1202 				}
1203 			}
1204 		}
1205 
1206 		/* prepare next bounce */
1207 		tmp_area = area_src;
1208 		area_src = area_dst;
1209 		area_dst = tmp_area;
1210 
1211 		tmp_area = area_src_alias;
1212 		area_src_alias = area_dst_alias;
1213 		area_dst_alias = tmp_area;
1214 
1215 		printf("userfaults:");
1216 		for (cpu = 0; cpu < nr_cpus; cpu++)
1217 			printf(" %lu", userfaults[cpu]);
1218 		printf("\n");
1219 	}
1220 
1221 	if (err)
1222 		return err;
1223 
1224 	close(uffd);
1225 	return userfaultfd_zeropage_test() || userfaultfd_sig_test()
1226 		|| userfaultfd_events_test();
1227 }
1228 
1229 /*
1230  * Copied from mlock2-tests.c
1231  */
default_huge_page_size(void)1232 unsigned long default_huge_page_size(void)
1233 {
1234 	unsigned long hps = 0;
1235 	char *line = NULL;
1236 	size_t linelen = 0;
1237 	FILE *f = fopen("/proc/meminfo", "r");
1238 
1239 	if (!f)
1240 		return 0;
1241 	while (getline(&line, &linelen, f) > 0) {
1242 		if (sscanf(line, "Hugepagesize:       %lu kB", &hps) == 1) {
1243 			hps <<= 10;
1244 			break;
1245 		}
1246 	}
1247 
1248 	free(line);
1249 	fclose(f);
1250 	return hps;
1251 }
1252 
set_test_type(const char * type)1253 static void set_test_type(const char *type)
1254 {
1255 	if (!strcmp(type, "anon")) {
1256 		test_type = TEST_ANON;
1257 		uffd_test_ops = &anon_uffd_test_ops;
1258 	} else if (!strcmp(type, "hugetlb")) {
1259 		test_type = TEST_HUGETLB;
1260 		uffd_test_ops = &hugetlb_uffd_test_ops;
1261 	} else if (!strcmp(type, "hugetlb_shared")) {
1262 		map_shared = true;
1263 		test_type = TEST_HUGETLB;
1264 		uffd_test_ops = &hugetlb_uffd_test_ops;
1265 	} else if (!strcmp(type, "shmem")) {
1266 		map_shared = true;
1267 		test_type = TEST_SHMEM;
1268 		uffd_test_ops = &shmem_uffd_test_ops;
1269 	} else {
1270 		fprintf(stderr, "Unknown test type: %s\n", type), exit(1);
1271 	}
1272 
1273 	if (test_type == TEST_HUGETLB)
1274 		page_size = default_huge_page_size();
1275 	else
1276 		page_size = sysconf(_SC_PAGE_SIZE);
1277 
1278 	if (!page_size)
1279 		fprintf(stderr, "Unable to determine page size\n"),
1280 				exit(2);
1281 	if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
1282 	    > page_size)
1283 		fprintf(stderr, "Impossible to run this test\n"), exit(2);
1284 }
1285 
sigalrm(int sig)1286 static void sigalrm(int sig)
1287 {
1288 	if (sig != SIGALRM)
1289 		abort();
1290 	test_uffdio_copy_eexist = true;
1291 	test_uffdio_zeropage_eexist = true;
1292 	alarm(ALARM_INTERVAL_SECS);
1293 }
1294 
main(int argc,char ** argv)1295 int main(int argc, char **argv)
1296 {
1297 	if (argc < 4)
1298 		usage();
1299 
1300 	if (signal(SIGALRM, sigalrm) == SIG_ERR)
1301 		fprintf(stderr, "failed to arm SIGALRM"), exit(1);
1302 	alarm(ALARM_INTERVAL_SECS);
1303 
1304 	set_test_type(argv[1]);
1305 
1306 	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
1307 	nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size /
1308 		nr_cpus;
1309 	if (!nr_pages_per_cpu) {
1310 		fprintf(stderr, "invalid MiB\n");
1311 		usage();
1312 	}
1313 
1314 	bounces = atoi(argv[3]);
1315 	if (bounces <= 0) {
1316 		fprintf(stderr, "invalid bounces\n");
1317 		usage();
1318 	}
1319 	nr_pages = nr_pages_per_cpu * nr_cpus;
1320 
1321 	if (test_type == TEST_HUGETLB) {
1322 		if (argc < 5)
1323 			usage();
1324 		huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
1325 		if (huge_fd < 0) {
1326 			fprintf(stderr, "Open of %s failed", argv[3]);
1327 			perror("open");
1328 			exit(1);
1329 		}
1330 		if (ftruncate(huge_fd, 0)) {
1331 			fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]);
1332 			perror("ftruncate");
1333 			exit(1);
1334 		}
1335 	}
1336 	printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
1337 	       nr_pages, nr_pages_per_cpu);
1338 	return userfaultfd_stress();
1339 }
1340 
1341 #else /* __NR_userfaultfd */
1342 
1343 #warning "missing __NR_userfaultfd definition"
1344 
main(void)1345 int main(void)
1346 {
1347 	printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
1348 	return KSFT_SKIP;
1349 }
1350 
1351 #endif /* __NR_userfaultfd */
1352