1 /* SPDX-License-Identifier: MIT */
2 /*
3 * io_uring_register.c
4 *
5 * Description: Unit tests for the io_uring_register system call.
6 *
7 * Copyright 2019, Red Hat, Inc.
8 * Author: Jeff Moyer <jmoyer@redhat.com>
9 */
10 #include <stdio.h>
11 #include <fcntl.h>
12 #include <string.h>
13 #include <stdlib.h>
14 #include <unistd.h>
15 #include <errno.h>
16 #include <sys/sysinfo.h>
17 #include <poll.h>
18 #include <assert.h>
19 #include <sys/uio.h>
20 #include <sys/mman.h>
21 #include <linux/mman.h>
22 #include <sys/time.h>
23 #include <sys/resource.h>
24 #include <limits.h>
25
26 #include "helpers.h"
27 #include "liburing.h"
28 #include "../src/syscall.h"
29
30 static int pagesize;
31 static rlim_t mlock_limit;
32 static int devnull;
33
expect_fail(int fd,unsigned int opcode,void * arg,unsigned int nr_args,int error,int error2)34 static int expect_fail(int fd, unsigned int opcode, void *arg,
35 unsigned int nr_args, int error, int error2)
36 {
37 int ret;
38
39 ret = io_uring_register(fd, opcode, arg, nr_args);
40 if (ret >= 0) {
41 int ret2 = 0;
42
43 fprintf(stderr, "expected %s, but call succeeded\n", strerror(error));
44 if (opcode == IORING_REGISTER_BUFFERS) {
45 ret2 = io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
46 0, 0);
47 } else if (opcode == IORING_REGISTER_FILES) {
48 ret2 = io_uring_register(fd, IORING_UNREGISTER_FILES, 0,
49 0);
50 }
51 if (ret2) {
52 fprintf(stderr, "internal error: failed to unregister\n");
53 exit(1);
54 }
55 return 1;
56 }
57
58 if (ret != error && (error2 && ret != error2)) {
59 fprintf(stderr, "expected %d/%d, got %d\n", error, error2, ret);
60 return 1;
61 }
62 return 0;
63 }
64
new_io_uring(int entries,struct io_uring_params * p)65 static int new_io_uring(int entries, struct io_uring_params *p)
66 {
67 int fd;
68
69 fd = io_uring_setup(entries, p);
70 if (fd < 0) {
71 perror("io_uring_setup");
72 exit(1);
73 }
74 return fd;
75 }
76
77 #define MAXFDS (UINT_MAX * sizeof(int))
78
map_filebacked(size_t size)79 static void *map_filebacked(size_t size)
80 {
81 int fd, ret;
82 void *addr;
83 char template[32] = "io_uring_register-test-XXXXXXXX";
84
85 fd = mkstemp(template);
86 if (fd < 0) {
87 perror("mkstemp");
88 return NULL;
89 }
90 unlink(template);
91
92 ret = ftruncate(fd, size);
93 if (ret < 0) {
94 perror("ftruncate");
95 close(fd);
96 return NULL;
97 }
98
99 addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
100 if (addr == MAP_FAILED) {
101 perror("mmap");
102 close(fd);
103 return NULL;
104 }
105
106 close(fd);
107 return addr;
108 }
109
110 /*
111 * NOTE: this is now limited by SCM_MAX_FD (253). Keep the code for now,
112 * but probably should augment it to test 253 and 254, specifically.
113 */
test_max_fds(int uring_fd)114 static int test_max_fds(int uring_fd)
115 {
116 int status = 1;
117 int ret;
118 void *fd_as; /* file descriptor address space */
119 int fdtable_fd; /* fd for the file that will be mapped over and over */
120 int io_fd; /* the valid fd for I/O -- /dev/null */
121 int *fds; /* used to map the file into the address space */
122 char template[32] = "io_uring_register-test-XXXXXXXX";
123 unsigned long long i, nr_maps, nr_fds;
124
125 /*
126 * First, mmap anonymous the full size. That will guarantee the
127 * mapping will fit in the memory area selected by mmap. Then,
128 * over-write that mapping using a file-backed mapping, 128MiB at
129 * a time using MAP_FIXED.
130 */
131 fd_as = mmap(NULL, UINT_MAX * sizeof(int), PROT_READ|PROT_WRITE,
132 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
133 if (fd_as == MAP_FAILED) {
134 if (errno == ENOMEM)
135 return 0;
136 perror("mmap fd_as");
137 exit(1);
138 }
139
140 fdtable_fd = mkstemp(template);
141 if (fdtable_fd < 0) {
142 perror("mkstemp");
143 exit(1);
144 }
145 unlink(template);
146 ret = ftruncate(fdtable_fd, 128*1024*1024);
147 if (ret < 0) {
148 perror("ftruncate");
149 exit(1);
150 }
151
152 io_fd = open("/dev/null", O_RDWR);
153 if (io_fd < 0) {
154 perror("open /dev/null");
155 exit(1);
156 }
157 fds = mmap(fd_as, 128*1024*1024, PROT_READ|PROT_WRITE,
158 MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
159 if (fds == MAP_FAILED) {
160 perror("mmap fdtable");
161 exit(1);
162 }
163
164 /* fill the fd table */
165 nr_fds = 128*1024*1024 / sizeof(int);
166 for (i = 0; i < nr_fds; i++)
167 fds[i] = io_fd;
168
169 /* map the file through the rest of the address space */
170 nr_maps = (UINT_MAX * sizeof(int)) / (128*1024*1024);
171 for (i = 0; i < nr_maps; i++) {
172 fds = &fds[nr_fds]; /* advance fds by 128MiB */
173 fds = mmap(fds, 128*1024*1024, PROT_READ|PROT_WRITE,
174 MAP_SHARED|MAP_FIXED, fdtable_fd, 0);
175 if (fds == MAP_FAILED) {
176 fprintf(stderr, "mmap failed at offset %lu\n",
177 (unsigned long)((char *)fd_as - (char *)fds));
178 exit(1);
179 }
180 }
181
182 /* Now fd_as points to the file descriptor array. */
183 /*
184 * We may not be able to map all of these files. Let's back off
185 * until success.
186 */
187 nr_fds = UINT_MAX;
188 while (nr_fds) {
189 ret = io_uring_register(uring_fd, IORING_REGISTER_FILES, fd_as,
190 nr_fds);
191 if (ret != 0) {
192 nr_fds /= 2;
193 continue;
194 }
195 status = 0;
196 ret = io_uring_register(uring_fd, IORING_UNREGISTER_FILES, 0, 0);
197 if (ret < 0) {
198 errno = -ret;
199 perror("io_uring_register UNREGISTER_FILES");
200 exit(1);
201 }
202 break;
203 }
204
205 close(io_fd);
206 close(fdtable_fd);
207 ret = munmap(fd_as, UINT_MAX * sizeof(int));
208 if (ret != 0) {
209 fprintf(stderr, "munmap(%zu) failed\n", UINT_MAX * sizeof(int));
210 exit(1);
211 }
212
213 return status;
214 }
215
test_memlock_exceeded(int fd)216 static int test_memlock_exceeded(int fd)
217 {
218 int ret;
219 void *buf;
220 struct iovec iov;
221
222 /* if limit is larger than 2gb, just skip this test */
223 if (mlock_limit >= 2 * 1024 * 1024 * 1024ULL)
224 return 0;
225
226 iov.iov_len = mlock_limit * 2;
227 buf = t_malloc(iov.iov_len);
228 iov.iov_base = buf;
229
230 while (iov.iov_len) {
231 ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
232 if (ret == -ENOMEM) {
233 iov.iov_len /= 2;
234 continue;
235 } else if (ret == -EFAULT) {
236 free(buf);
237 return 0;
238 } else if (ret) {
239 fprintf(stderr, "expected success or EFAULT, got %d\n", ret);
240 free(buf);
241 return 1;
242 }
243 ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS, NULL, 0);
244 if (ret != 0) {
245 fprintf(stderr, "error: unregister failed with %d\n", ret);
246 free(buf);
247 return 1;
248 }
249 break;
250 }
251 if (!iov.iov_len)
252 printf("Unable to register buffers. Check memlock rlimit.\n");
253
254 free(buf);
255 return 0;
256 }
257
test_iovec_nr(int fd)258 static int test_iovec_nr(int fd)
259 {
260 int i, ret, status = 0;
261 unsigned int nr = 1000000;
262 struct iovec *iovs;
263 void *buf;
264
265 iovs = malloc(nr * sizeof(struct iovec));
266 if (!iovs) {
267 fprintf(stdout, "can't allocate iovecs, skip\n");
268 return 0;
269 }
270 buf = t_malloc(pagesize);
271
272 for (i = 0; i < nr; i++) {
273 iovs[i].iov_base = buf;
274 iovs[i].iov_len = pagesize;
275 }
276
277 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, iovs, nr, -EINVAL, 0);
278
279 /* reduce to UIO_MAXIOV */
280 nr = UIO_MAXIOV;
281 ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, iovs, nr);
282 if ((ret == -ENOMEM || ret == -EPERM) && geteuid()) {
283 fprintf(stderr, "can't register large iovec for regular users, skip\n");
284 } else if (ret != 0) {
285 fprintf(stderr, "expected success, got %d\n", ret);
286 status = 1;
287 } else {
288 io_uring_register(fd, IORING_UNREGISTER_BUFFERS, 0, 0);
289 }
290 free(buf);
291 free(iovs);
292 return status;
293 }
294
295 /*
296 * io_uring limit is 1G. iov_len limit is ~OUL, I think
297 */
test_iovec_size(int fd)298 static int test_iovec_size(int fd)
299 {
300 unsigned int status = 0;
301 int ret;
302 struct iovec iov;
303 void *buf;
304
305 /* NULL pointer for base */
306 iov.iov_base = 0;
307 iov.iov_len = 4096;
308 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
309
310 /* valid base, 0 length */
311 iov.iov_base = &buf;
312 iov.iov_len = 0;
313 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
314
315 /* valid base, length exceeds size */
316 /* this requires an unampped page directly after buf */
317 buf = mmap(NULL, 2 * pagesize, PROT_READ|PROT_WRITE,
318 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
319 assert(buf != MAP_FAILED);
320 ret = munmap(buf + pagesize, pagesize);
321 assert(ret == 0);
322 iov.iov_base = buf;
323 iov.iov_len = 2 * pagesize;
324 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, 0);
325 munmap(buf, pagesize);
326
327 /* huge page */
328 buf = mmap(NULL, 2*1024*1024, PROT_READ|PROT_WRITE,
329 MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
330 -1, 0);
331 if (buf == MAP_FAILED) {
332 printf("Unable to map a huge page. Try increasing "
333 "/proc/sys/vm/nr_hugepages by at least 1.\n");
334 printf("Skipping the hugepage test\n");
335 } else {
336 /*
337 * This should succeed, so long as RLIMIT_MEMLOCK is
338 * not exceeded
339 */
340 iov.iov_base = buf;
341 iov.iov_len = 2*1024*1024;
342 ret = io_uring_register(fd, IORING_REGISTER_BUFFERS, &iov, 1);
343 if (ret < 0) {
344 if (ret == -ENOMEM)
345 printf("Unable to test registering of a huge "
346 "page. Try increasing the "
347 "RLIMIT_MEMLOCK resource limit by at "
348 "least 2MB.");
349 else {
350 fprintf(stderr, "expected success, got %d\n", ret);
351 status = 1;
352 }
353 } else {
354 ret = io_uring_register(fd, IORING_UNREGISTER_BUFFERS,
355 0, 0);
356 if (ret < 0) {
357 fprintf(stderr, "io_uring_unregister: %s\n",
358 strerror(-ret));
359 status = 1;
360 }
361 }
362 }
363 ret = munmap(iov.iov_base, iov.iov_len);
364 assert(ret == 0);
365
366 /* file-backed buffers -- not supported */
367 buf = map_filebacked(2*1024*1024);
368 if (!buf)
369 status = 1;
370 iov.iov_base = buf;
371 iov.iov_len = 2*1024*1024;
372 status |= expect_fail(fd, IORING_REGISTER_BUFFERS, &iov, 1, -EFAULT, -EOPNOTSUPP);
373 munmap(buf, 2*1024*1024);
374
375 /* bump up against the soft limit and make sure we get EFAULT
376 * or whatever we're supposed to get. NOTE: this requires
377 * running the test as non-root. */
378 if (getuid() != 0)
379 status |= test_memlock_exceeded(fd);
380
381 return status;
382 }
383
ioring_poll(struct io_uring * ring,int fd,int fixed)384 static int ioring_poll(struct io_uring *ring, int fd, int fixed)
385 {
386 int ret;
387 struct io_uring_sqe *sqe;
388 struct io_uring_cqe *cqe;
389
390 sqe = io_uring_get_sqe(ring);
391 memset(sqe, 0, sizeof(*sqe));
392 sqe->opcode = IORING_OP_POLL_ADD;
393 if (fixed)
394 sqe->flags = IOSQE_FIXED_FILE;
395 sqe->fd = fd;
396 sqe->poll_events = POLLIN|POLLOUT;
397
398 ret = io_uring_submit(ring);
399 if (ret != 1) {
400 fprintf(stderr, "failed to submit poll sqe: %d.\n", ret);
401 return 1;
402 }
403
404 ret = io_uring_wait_cqe(ring, &cqe);
405 if (ret < 0) {
406 fprintf(stderr, "io_uring_wait_cqe failed with %d\n", ret);
407 return 1;
408 }
409 ret = 0;
410 if (!(cqe->res & POLLOUT)) {
411 fprintf(stderr, "io_uring_wait_cqe: expected 0x%.8x, got 0x%.8x\n",
412 POLLOUT, cqe->res);
413 ret = 1;
414 }
415
416 io_uring_cqe_seen(ring, cqe);
417 return ret;
418 }
419
test_poll_ringfd(void)420 static int test_poll_ringfd(void)
421 {
422 int status = 0;
423 int ret;
424 int fd;
425 struct io_uring ring;
426
427 ret = io_uring_queue_init(1, &ring, 0);
428 if (ret) {
429 perror("io_uring_queue_init");
430 return 1;
431 }
432 fd = ring.ring_fd;
433
434 /* try polling the ring fd */
435 status = ioring_poll(&ring, fd, 0);
436
437 /*
438 * now register the ring fd, and try the poll again. This should
439 * fail, because the kernel does not allow registering of the
440 * ring_fd.
441 */
442 status |= expect_fail(fd, IORING_REGISTER_FILES, &fd, 1, -EBADF, 0);
443
444 /* tear down queue */
445 io_uring_queue_exit(&ring);
446
447 return status;
448 }
449
main(int argc,char ** argv)450 int main(int argc, char **argv)
451 {
452 int fd, ret;
453 unsigned int status = 0;
454 struct io_uring_params p;
455 struct rlimit rlim;
456
457 if (argc > 1)
458 return T_EXIT_SKIP;
459
460 /* setup globals */
461 pagesize = getpagesize();
462 ret = getrlimit(RLIMIT_MEMLOCK, &rlim);
463 if (ret < 0) {
464 perror("getrlimit");
465 return T_EXIT_PASS;
466 }
467 mlock_limit = rlim.rlim_cur;
468 devnull = open("/dev/null", O_RDWR);
469 if (devnull < 0) {
470 perror("open /dev/null");
471 exit(T_EXIT_FAIL);
472 }
473
474 /* invalid fd */
475 status |= expect_fail(-1, 0, NULL, 0, -EBADF, 0);
476 /* valid fd that is not an io_uring fd */
477 status |= expect_fail(devnull, 0, NULL, 0, -EOPNOTSUPP, 0);
478
479 /* invalid opcode */
480 memset(&p, 0, sizeof(p));
481 fd = new_io_uring(1, &p);
482 ret = expect_fail(fd, ~0U, NULL, 0, -EINVAL, 0);
483 if (ret) {
484 /* if this succeeds, tear down the io_uring instance
485 * and start clean for the next test. */
486 close(fd);
487 fd = new_io_uring(1, &p);
488 }
489
490 /* IORING_REGISTER_BUFFERS */
491 status |= test_iovec_size(fd);
492 status |= test_iovec_nr(fd);
493 /* IORING_REGISTER_FILES */
494 status |= test_max_fds(fd);
495 close(fd);
496 /* uring poll on the uring fd */
497 status |= test_poll_ringfd();
498
499 if (status)
500 fprintf(stderr, "FAIL\n");
501
502 return status;
503 }
504