• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * io_uring_register.c
4  *
5  * Description: Unit tests for the io_uring_register system call.
6  *
7  * Copyright 2019, Red Hat, Inc.
8  * Author: Jeff Moyer <jmoyer@redhat.com>
9  */
10 #include <stdio.h>
11 #include <fcntl.h>
12 #include <string.h>
13 #include <stdlib.h>
14 #include <unistd.h>
15 #include <errno.h>
16 #include <sys/sysinfo.h>
17 #include <poll.h>
18 #include <assert.h>
19 #include <sys/uio.h>
20 #include <sys/mman.h>
21 #include <linux/mman.h>
22 #include <sys/time.h>
23 #include <sys/resource.h>
24 #include <limits.h>
25 
26 #include "helpers.h"
27 #include "liburing.h"
28 #include "../src/syscall.h"
29 
30 static int pagesize;
31 static rlim_t mlock_limit;
32 static int devnull;
33 
expect_fail(int fd,unsigned int opcode,void * arg,unsigned int nr_args,int error,int error2)34 static int expect_fail(int fd, unsigned int opcode, void *arg,
35 		       unsigned int nr_args, int error, int error2)
36 {
37 	int ret;
38 
39 	ret = io_uring_register(fd, opcode, arg, nr_args);
40 	if (ret >= 0) {
41 		int ret2 = 0;
42 
43 		fprintf(stderr, "expected %s, but call succeeded\n", strerror(error));
44 		if (opcode == IORING_REGISTER_BUFFERS) {
45 			ret2 = io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
46 						 0, 0);
47 		} else if (opcode == IORING_REGISTER_FILES) {
48 			ret2 = io_uring_register(fd, IORING_UNREGISTER_FILES, 0,
49 						 0);
50 		}
51 		if (ret2) {
52 			fprintf(stderr, "internal error: failed to unregister\n");
53 			exit(1);
54 		}
55 		return 1;
56 	}
57 
58 	if (ret != error && (error2 && ret != error2)) {
59 		fprintf(stderr, "expected %d/%d, got %d\n", error, error2, ret);
60 		return 1;
61 	}
62 	return 0;
63 }
64 
new_io_uring(int entries,struct io_uring_params * p)65 static int new_io_uring(int entries, struct io_uring_params *p)
66 {
67 	int fd;
68 
69 	fd = io_uring_setup(entries, p);
70 	if (fd < 0) {
71 		perror("io_uring_setup");
72 		exit(1);
73 	}
74 	return fd;
75 }
76 
77 #define MAXFDS (UINT_MAX * sizeof(int))
78 
map_filebacked(size_t size)79 static void *map_filebacked(size_t size)
80 {
81 	int fd, ret;
82 	void *addr;
83 	char template[32] = "io_uring_register-test-XXXXXXXX";
84 
85 	fd = mkstemp(template);
86 	if (fd < 0) {
87 		perror("mkstemp");
88 		return NULL;
89 	}
90 	unlink(template);
91 
92 	ret = ftruncate(fd, size);
93 	if (ret < 0) {
94 		perror("ftruncate");
95 		close(fd);
96 		return NULL;
97 	}
98 
99 	addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
100 	if (addr == MAP_FAILED) {
101 		perror("mmap");
102 		close(fd);
103 		return NULL;
104 	}
105 
106 	close(fd);
107 	return addr;
108 }
109 
110 /*
111  * NOTE: this is now limited by SCM_MAX_FD (253).  Keep the code for now,
112  * but probably should augment it to test 253 and 254, specifically.
113  */
test_max_fds(int uring_fd)114 static int test_max_fds(int uring_fd)
115 {
116 	int status = 1;
117 	int ret;
118 	void *fd_as; /* file descriptor address space */
119 	int fdtable_fd; /* fd for the file that will be mapped over and over */
120 	int io_fd; /* the valid fd for I/O -- /dev/null */
121 	int *fds; /* used to map the file into the address space */
122 	char template[32] = "io_uring_register-test-XXXXXXXX";
123 	unsigned long long i, nr_maps, nr_fds;
124 
125 	/*
126 	 * First, mmap anonymous the full size.  That will guarantee the
127 	 * mapping will fit in the memory area selected by mmap.  Then,
128 	 * over-write that mapping using a file-backed mapping, 128MiB at
129 	 * a time using MAP_FIXED.
130 	 */
131 	fd_as = mmap(NULL, UINT_MAX * sizeof(int), PROT_READ|PROT_WRITE,
132 		     MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
133 	if (fd_as == MAP_FAILED) {
134 		if (errno == ENOMEM)
135 			return 0;
136 		perror("mmap fd_as");
137 		exit(1);
138 	}
139 
140 	fdtable_fd = mkstemp(template);
141 	if (fdtable_fd < 0) {
142 		perror("mkstemp");
143 		exit(1);
144 	}
145 	unlink(template);
146 	ret = ftruncate(fdtable_fd, 128*1024*1024);
147 	if (ret < 0) {
148 		perror("ftruncate");
149 		exit(1);
150 	}
151 
152 	io_fd = open("/dev/null", O_RDWR);
153 	if (io_fd < 0) {
154 		perror("open /dev/null");
155 		exit(1);
156 	}
157 	fds = mmap(fd_as, 128*1024*1024, PROT_READ|PROT_WRITE,
158 		   MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
159 	if (fds == MAP_FAILED) {
160 		perror("mmap fdtable");
161 		exit(1);
162 	}
163 
164 	/* fill the fd table */
165 	nr_fds = 128*1024*1024 / sizeof(int);
166 	for (i = 0; i < nr_fds; i++)
167 		fds[i] = io_fd;
168 
169 	/* map the file through the rest of the address space */
170 	nr_maps = (UINT_MAX * sizeof(int)) / (128*1024*1024);
171 	for (i = 0; i < nr_maps; i++) {
172 		fds = &fds[nr_fds]; /* advance fds by 128MiB */
173 		fds = mmap(fds, 128*1024*1024, PROT_READ|PROT_WRITE,
174 			   MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
175 		if (fds == MAP_FAILED) {
176 			fprintf(stderr, "mmap failed at offset %lu\n",
177 			       (unsigned long)((char *)fd_as - (char *)fds));
178 			exit(1);
179 		}
180 	}
181 
182 	/* Now fd_as points to the file descriptor array. */
183 	/*
184 	 * We may not be able to map all of these files.  Let's back off
185 	 * until success.
186 	 */
187 	nr_fds = UINT_MAX;
188 	while (nr_fds) {
189 		ret = io_uring_register(uring_fd, IORING_REGISTER_FILES, fd_as,
190 					nr_fds);
191 		if (ret != 0) {
192 			nr_fds /= 2;
193 			continue;
194 		}
195 		status = 0;
196 		ret = io_uring_register(uring_fd, IORING_UNREGISTER_FILES, 0, 0);
197 		if (ret < 0) {
198 			errno = -ret;
199 			perror("io_uring_register UNREGISTER_FILES");
200 			exit(1);
201 		}
202 		break;
203 	}
204 
205 	close(io_fd);
206 	close(fdtable_fd);
207 	ret = munmap(fd_as, UINT_MAX * sizeof(int));
208 	if (ret != 0) {
209 		fprintf(stderr, "munmap(%zu) failed\n", UINT_MAX * sizeof(int));
210 		exit(1);
211 	}
212 
213 	return status;
214 }
215 
test_memlock_exceeded(int fd)216 static int test_memlock_exceeded(int fd)
217 {
218 	int ret;
219 	void *buf;
220 	struct iovec iov;
221 
222 	/* if limit is larger than 2gb, just skip this test */
223 	if (mlock_limit >= 2 * 1024 * 1024 * 1024ULL)
224 		return 0;
225 
226 	iov.iov_len = mlock_limit * 2;
227 	buf = t_malloc(iov.iov_len);
228 	iov.iov_base = buf;
229 
230 	while (iov.iov_len) {
231 		ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
232 		if (ret == -ENOMEM) {
233 			iov.iov_len /= 2;
234 			continue;
235 		} else if (ret == -EFAULT) {
236 			free(buf);
237 			return 0;
238 		} else if (ret) {
239 			fprintf(stderr, "expected success or EFAULT, got %d\n", ret);
240 			free(buf);
241 			return 1;
242 		}
243 		ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS, NULL, 0);
244 		if (ret != 0) {
245 			fprintf(stderr, "error: unregister failed with %d\n", ret);
246 			free(buf);
247 			return 1;
248 		}
249 		break;
250 	}
251 	if (!iov.iov_len)
252 		printf("Unable to register buffers.  Check memlock rlimit.\n");
253 
254 	free(buf);
255 	return 0;
256 }
257 
test_iovec_nr(int fd)258 static int test_iovec_nr(int fd)
259 {
260 	int i, ret, status = 0;
261 	unsigned int nr = 1000000;
262 	struct iovec *iovs;
263 	void *buf;
264 
265 	iovs = malloc(nr * sizeof(struct iovec));
266 	if (!iovs) {
267 		fprintf(stdout, "can't allocate iovecs, skip\n");
268 		return 0;
269 	}
270 	buf = t_malloc(pagesize);
271 
272 	for (i = 0; i < nr; i++) {
273 		iovs[i].iov_base = buf;
274 		iovs[i].iov_len = pagesize;
275 	}
276 
277 	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, -EINVAL, 0);
278 
279 	/* reduce to UIO_MAXIOV */
280 	nr = UIO_MAXIOV;
281 	ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr);
282 	if ((ret == -ENOMEM || ret == -EPERM) && geteuid()) {
283 		fprintf(stderr, "can't register large iovec for regular users, skip\n");
284 	} else if (ret != 0) {
285 		fprintf(stderr, "expected success, got %d\n", ret);
286 		status = 1;
287 	} else {
288 		io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0);
289 	}
290 	free(buf);
291 	free(iovs);
292 	return status;
293 }
294 
295 /*
296  * io_uring limit is 1G.  iov_len limit is ~OUL, I think
297  */
test_iovec_size(int fd)298 static int test_iovec_size(int fd)
299 {
300 	unsigned int status = 0;
301 	int ret;
302 	struct iovec iov;
303 	void *buf;
304 
305 	/* NULL pointer for base */
306 	iov.iov_base = 0;
307 	iov.iov_len = 4096;
308 	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
309 
310 	/* valid base, 0 length */
311 	iov.iov_base = &buf;
312 	iov.iov_len = 0;
313 	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
314 
315 	/* valid base, length exceeds size */
316 	/* this requires an unampped page directly after buf */
317 	buf = mmap(NULL, 2 * pagesize, PROT_READ|PROT_WRITE,
318 		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
319 	assert(buf != MAP_FAILED);
320 	ret = munmap(buf + pagesize, pagesize);
321 	assert(ret == 0);
322 	iov.iov_base = buf;
323 	iov.iov_len = 2 * pagesize;
324 	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
325 	munmap(buf, pagesize);
326 
327 	/* huge page */
328 	buf = mmap(NULL, 2*1024*1024, PROT_READ|PROT_WRITE,
329 		   MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
330 		   -1, 0);
331 	if (buf == MAP_FAILED) {
332 		printf("Unable to map a huge page.  Try increasing "
333 		       "/proc/sys/vm/nr_hugepages by at least 1.\n");
334 		printf("Skipping the hugepage test\n");
335 	} else {
336 		/*
337 		 * This should succeed, so long as RLIMIT_MEMLOCK is
338 		 * not exceeded
339 		 */
340 		iov.iov_base = buf;
341 		iov.iov_len = 2*1024*1024;
342 		ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
343 		if (ret < 0) {
344 			if (ret == -ENOMEM)
345 				printf("Unable to test registering of a huge "
346 				       "page.  Try increasing the "
347 				       "RLIMIT_MEMLOCK resource limit by at "
348 				       "least 2MB.");
349 			else {
350 				fprintf(stderr, "expected success, got %d\n", ret);
351 				status = 1;
352 			}
353 		} else {
354 			ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
355 						0, 0);
356 			if (ret < 0) {
357 				fprintf(stderr, "io_uring_unregister: %s\n",
358 					strerror(-ret));
359 				status = 1;
360 			}
361 		}
362 	}
363 	ret = munmap(iov.iov_base, iov.iov_len);
364 	assert(ret == 0);
365 
366 	/* file-backed buffers -- not supported */
367 	buf = map_filebacked(2*1024*1024);
368 	if (!buf)
369 		status = 1;
370 	iov.iov_base = buf;
371 	iov.iov_len = 2*1024*1024;
372 	status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, -EOPNOTSUPP);
373 	munmap(buf, 2*1024*1024);
374 
375 	/* bump up against the soft limit and make sure we get EFAULT
376 	 * or whatever we're supposed to get.  NOTE: this requires
377 	 * running the test as non-root. */
378 	if (getuid() != 0)
379 		status |= test_memlock_exceeded(fd);
380 
381 	return status;
382 }
383 
ioring_poll(struct io_uring * ring,int fd,int fixed)384 static int ioring_poll(struct io_uring *ring, int fd, int fixed)
385 {
386 	int ret;
387 	struct io_uring_sqe *sqe;
388 	struct io_uring_cqe *cqe;
389 
390 	sqe = io_uring_get_sqe(ring);
391 	memset(sqe, 0, sizeof(*sqe));
392 	sqe->opcode = IORING_OP_POLL_ADD;
393 	if (fixed)
394 		sqe->flags = IOSQE_FIXED_FILE;
395 	sqe->fd = fd;
396 	sqe->poll_events = POLLIN|POLLOUT;
397 
398 	ret = io_uring_submit(ring);
399 	if (ret != 1) {
400 		fprintf(stderr, "failed to submit poll sqe: %d.\n", ret);
401 		return 1;
402 	}
403 
404 	ret = io_uring_wait_cqe(ring, &cqe);
405 	if (ret < 0) {
406 		fprintf(stderr, "io_uring_wait_cqe failed with %d\n", ret);
407 		return 1;
408 	}
409 	ret = 0;
410 	if (!(cqe->res & POLLOUT)) {
411 		fprintf(stderr, "io_uring_wait_cqe: expected 0x%.8x, got 0x%.8x\n",
412 		       POLLOUT, cqe->res);
413 		ret = 1;
414 	}
415 
416 	io_uring_cqe_seen(ring, cqe);
417 	return ret;
418 }
419 
test_poll_ringfd(void)420 static int test_poll_ringfd(void)
421 {
422 	int status = 0;
423 	int ret;
424 	int fd;
425 	struct io_uring ring;
426 
427 	ret = io_uring_queue_init(1, &ring, 0);
428 	if (ret) {
429 		perror("io_uring_queue_init");
430 		return 1;
431 	}
432 	fd = ring.ring_fd;
433 
434 	/* try polling the ring fd */
435 	status = ioring_poll(&ring, fd, 0);
436 
437 	/*
438 	 * now register the ring fd, and try the poll again.  This should
439 	 * fail, because the kernel does not allow registering of the
440 	 * ring_fd.
441 	 */
442 	status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, -EBADF, 0);
443 
444 	/* tear down queue */
445 	io_uring_queue_exit(&ring);
446 
447 	return status;
448 }
449 
main(int argc,char ** argv)450 int main(int argc, char **argv)
451 {
452 	int fd, ret;
453 	unsigned int status = 0;
454 	struct io_uring_params p;
455 	struct rlimit rlim;
456 
457 	if (argc > 1)
458 		return T_EXIT_SKIP;
459 
460 	/* setup globals */
461 	pagesize = getpagesize();
462 	ret = getrlimit(RLIMIT_MEMLOCK, &rlim);
463 	if (ret < 0) {
464 		perror("getrlimit");
465 		return T_EXIT_PASS;
466 	}
467 	mlock_limit = rlim.rlim_cur;
468 	devnull = open("/dev/null", O_RDWR);
469 	if (devnull < 0) {
470 		perror("open /dev/null");
471 		exit(T_EXIT_FAIL);
472 	}
473 
474 	/* invalid fd */
475 	status |= expect_fail(-1, 0, NULL, 0, -EBADF, 0);
476 	/* valid fd that is not an io_uring fd */
477 	status |= expect_fail(devnull, 0, NULL, 0, -EOPNOTSUPP, 0);
478 
479 	/* invalid opcode */
480 	memset(&p, 0, sizeof(p));
481 	fd = new_io_uring(1, &p);
482 	ret = expect_fail(fd, ~0U, NULL, 0, -EINVAL, 0);
483 	if (ret) {
484 		/* if this succeeds, tear down the io_uring instance
485 		 * and start clean for the next test. */
486 		close(fd);
487 		fd = new_io_uring(1, &p);
488 	}
489 
490 	/* IORING_REGISTER_BUFFERS */
491 	status |= test_iovec_size(fd);
492 	status |= test_iovec_nr(fd);
493 	/* IORING_REGISTER_FILES */
494 	status |= test_max_fds(fd);
495 	close(fd);
496 	/* uring poll on the uring fd */
497 	status |= test_poll_ringfd();
498 
499 	if (status)
500 		fprintf(stderr, "FAIL\n");
501 
502 	return status;
503 }
504