1 /* SPDX-License-Identifier: MIT */
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <stdint.h>
5 #include <assert.h>
6 #include <errno.h>
7 #include <limits.h>
8 #include <fcntl.h>
9 #include <unistd.h>
10 #include <stdbool.h>
11 #include <string.h>
12
13 #include <arpa/inet.h>
14 #include <linux/if_packet.h>
15 #include <linux/ipv6.h>
16 #include <linux/socket.h>
17 #include <linux/sockios.h>
18 #include <net/ethernet.h>
19 #include <net/if.h>
20 #include <netinet/ip.h>
21 #include <netinet/in.h>
22 #include <netinet/ip6.h>
23 #include <netinet/tcp.h>
24 #include <netinet/udp.h>
25 #include <sys/socket.h>
26 #include <sys/time.h>
27 #include <sys/resource.h>
28 #include <sys/un.h>
29 #include <sys/ioctl.h>
30 #include <sys/socket.h>
31 #include <sys/stat.h>
32 #include <sys/time.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <sys/mman.h>
36 #include <linux/mman.h>
37
38 #include "liburing.h"
39 #include "helpers.h"
40
41 #define MAX_MSG 128
42
43 #define HOST "127.0.0.1"
44 #define HOSTV6 "::1"
45
46 #define MAX_IOV 32
47 #define CORK_REQS 5
48 #define RX_TAG 10000
49 #define BUFFER_OFFSET 41
50
51 #ifndef ARRAY_SIZE
52 #define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
53 #endif
54
55 enum {
56 BUF_T_NORMAL,
57 BUF_T_SMALL,
58 BUF_T_NONALIGNED,
59 BUF_T_LARGE,
60 BUF_T_HUGETLB,
61
62 __BUF_NR,
63 };
64
65 /* 32MB, should be enough to trigger a short send */
66 #define LARGE_BUF_SIZE (1U << 25)
67
68 static size_t page_sz;
69 static char *tx_buffer, *rx_buffer;
70 static struct iovec buffers_iov[__BUF_NR];
71
72 static bool has_sendzc;
73 static bool has_sendmsg;
74 static bool hit_enomem;
75
probe_zc_support(void)76 static int probe_zc_support(void)
77 {
78 struct io_uring ring;
79 struct io_uring_probe *p;
80 int ret;
81
82 has_sendzc = has_sendmsg = false;
83
84 ret = io_uring_queue_init(1, &ring, 0);
85 if (ret)
86 return -1;
87
88 p = t_calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op));
89 if (!p)
90 return -1;
91
92 ret = io_uring_register_probe(&ring, p, 256);
93 if (ret)
94 return -1;
95
96 has_sendzc = p->ops_len > IORING_OP_SEND_ZC;
97 has_sendmsg = p->ops_len > IORING_OP_SENDMSG_ZC;
98 io_uring_queue_exit(&ring);
99 free(p);
100 return 0;
101 }
102
check_cq_empty(struct io_uring * ring)103 static bool check_cq_empty(struct io_uring *ring)
104 {
105 struct io_uring_cqe *cqe = NULL;
106 int ret;
107
108 ret = io_uring_peek_cqe(ring, &cqe); /* nothing should be there */
109 return ret == -EAGAIN;
110 }
111
test_basic_send(struct io_uring * ring,int sock_tx,int sock_rx)112 static int test_basic_send(struct io_uring *ring, int sock_tx, int sock_rx)
113 {
114 struct io_uring_sqe *sqe;
115 struct io_uring_cqe *cqe;
116 int msg_flags = 0;
117 unsigned zc_flags = 0;
118 int payload_size = 100;
119 int ret;
120
121 sqe = io_uring_get_sqe(ring);
122 io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
123 msg_flags, zc_flags);
124 sqe->user_data = 1;
125
126 ret = io_uring_submit(ring);
127 assert(ret == 1);
128
129 ret = io_uring_wait_cqe(ring, &cqe);
130 assert(!ret && cqe->user_data == 1);
131 if (cqe->res != payload_size) {
132 fprintf(stderr, "send failed %i\n", cqe->res);
133 return T_EXIT_FAIL;
134 }
135
136 assert(cqe->flags & IORING_CQE_F_MORE);
137 io_uring_cqe_seen(ring, cqe);
138
139 ret = io_uring_wait_cqe(ring, &cqe);
140 assert(!ret);
141 assert(cqe->user_data == 1);
142 assert(cqe->flags & IORING_CQE_F_NOTIF);
143 assert(!(cqe->flags & IORING_CQE_F_MORE));
144 io_uring_cqe_seen(ring, cqe);
145 assert(check_cq_empty(ring));
146
147 ret = recv(sock_rx, rx_buffer, payload_size, MSG_TRUNC);
148 assert(ret == payload_size);
149 return T_EXIT_PASS;
150 }
151
test_send_faults_check(struct io_uring * ring,int expected)152 static int test_send_faults_check(struct io_uring *ring, int expected)
153 {
154 struct io_uring_cqe *cqe;
155 int ret, nr_cqes = 0;
156 bool more = true;
157
158 while (more) {
159 nr_cqes++;
160 ret = io_uring_wait_cqe(ring, &cqe);
161 assert(!ret);
162 assert(cqe->user_data == 1);
163
164 if (nr_cqes == 1 && (cqe->flags & IORING_CQE_F_NOTIF)) {
165 fprintf(stderr, "test_send_faults_check notif came first\n");
166 return -1;
167 }
168
169 if (!(cqe->flags & IORING_CQE_F_NOTIF)) {
170 if (cqe->res != expected) {
171 fprintf(stderr, "invalid cqe res %i vs expected %i, "
172 "user_data %i\n",
173 cqe->res, expected, (int)cqe->user_data);
174 return -1;
175 }
176 } else {
177 if (cqe->res != 0 || cqe->flags != IORING_CQE_F_NOTIF) {
178 fprintf(stderr, "invalid notif cqe %i %i\n",
179 cqe->res, cqe->flags);
180 return -1;
181 }
182 }
183
184 more = cqe->flags & IORING_CQE_F_MORE;
185 io_uring_cqe_seen(ring, cqe);
186 }
187
188 if (nr_cqes > 2) {
189 fprintf(stderr, "test_send_faults_check() too many CQEs %i\n",
190 nr_cqes);
191 return -1;
192 }
193 assert(check_cq_empty(ring));
194 return 0;
195 }
196
test_send_faults(int sock_tx,int sock_rx)197 static int test_send_faults(int sock_tx, int sock_rx)
198 {
199 struct io_uring_sqe *sqe;
200 int msg_flags = 0;
201 unsigned zc_flags = 0;
202 int ret, payload_size = 100;
203 struct io_uring ring;
204
205 ret = io_uring_queue_init(32, &ring, 0);
206 if (ret) {
207 fprintf(stderr, "queue init failed: %d\n", ret);
208 return -1;
209 }
210
211 /* invalid buffer */
212 sqe = io_uring_get_sqe(&ring);
213 io_uring_prep_send_zc(sqe, sock_tx, (void *)1UL, payload_size,
214 msg_flags, zc_flags);
215 sqe->user_data = 1;
216 ret = io_uring_submit(&ring);
217 assert(ret == 1);
218
219 ret = test_send_faults_check(&ring, -EFAULT);
220 if (ret) {
221 fprintf(stderr, "test_send_faults with invalid buf failed\n");
222 return -1;
223 }
224
225 /* invalid address */
226 sqe = io_uring_get_sqe(&ring);
227 io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
228 msg_flags, zc_flags);
229 io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)1UL,
230 sizeof(struct sockaddr_in6));
231 sqe->user_data = 1;
232 ret = io_uring_submit(&ring);
233 assert(ret == 1);
234
235 ret = test_send_faults_check(&ring, -EFAULT);
236 if (ret) {
237 fprintf(stderr, "test_send_faults with invalid addr failed\n");
238 return -1;
239 }
240
241 /* invalid send/recv flags */
242 sqe = io_uring_get_sqe(&ring);
243 io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
244 msg_flags, ~0U);
245 sqe->user_data = 1;
246 ret = io_uring_submit(&ring);
247 assert(ret == 1);
248
249 ret = test_send_faults_check(&ring, -EINVAL);
250 if (ret) {
251 fprintf(stderr, "test_send_faults with invalid flags failed\n");
252 return -1;
253 }
254
255 return T_EXIT_PASS;
256 }
257
create_socketpair_ip(struct sockaddr_storage * addr,int * sock_client,int * sock_server,bool ipv6,bool client_connect,bool msg_zc,bool tcp)258 static int create_socketpair_ip(struct sockaddr_storage *addr,
259 int *sock_client, int *sock_server,
260 bool ipv6, bool client_connect,
261 bool msg_zc, bool tcp)
262 {
263 socklen_t addr_size;
264 int family, sock, listen_sock = -1;
265 int ret;
266
267 memset(addr, 0, sizeof(*addr));
268 if (ipv6) {
269 struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr;
270
271 family = AF_INET6;
272 saddr->sin6_family = family;
273 saddr->sin6_port = htons(0);
274 addr_size = sizeof(*saddr);
275 } else {
276 struct sockaddr_in *saddr = (struct sockaddr_in *)addr;
277
278 family = AF_INET;
279 saddr->sin_family = family;
280 saddr->sin_port = htons(0);
281 saddr->sin_addr.s_addr = htonl(INADDR_ANY);
282 addr_size = sizeof(*saddr);
283 }
284
285 /* server sock setup */
286 if (tcp) {
287 sock = listen_sock = socket(family, SOCK_STREAM, IPPROTO_TCP);
288 } else {
289 sock = *sock_server = socket(family, SOCK_DGRAM, 0);
290 }
291 if (sock < 0) {
292 perror("socket");
293 return 1;
294 }
295
296 ret = bind(sock, (struct sockaddr *)addr, addr_size);
297 if (ret < 0) {
298 perror("bind");
299 return 1;
300 }
301
302 ret = getsockname(sock, (struct sockaddr *)addr, &addr_size);
303 if (ret < 0) {
304 fprintf(stderr, "getsockname failed %i\n", errno);
305 return 1;
306 }
307
308 if (tcp) {
309 ret = listen(sock, 128);
310 assert(ret != -1);
311 }
312
313 if (ipv6) {
314 struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr;
315
316 inet_pton(AF_INET6, HOSTV6, &(saddr->sin6_addr));
317 } else {
318 struct sockaddr_in *saddr = (struct sockaddr_in *)addr;
319
320 inet_pton(AF_INET, HOST, &saddr->sin_addr);
321 }
322
323 /* client sock setup */
324 if (tcp) {
325 *sock_client = socket(family, SOCK_STREAM, IPPROTO_TCP);
326 assert(client_connect);
327 } else {
328 *sock_client = socket(family, SOCK_DGRAM, 0);
329 }
330 if (*sock_client < 0) {
331 perror("socket");
332 return 1;
333 }
334 if (client_connect) {
335 ret = connect(*sock_client, (struct sockaddr *)addr, addr_size);
336 if (ret < 0) {
337 perror("connect");
338 return 1;
339 }
340 }
341 if (msg_zc) {
342 #ifdef SO_ZEROCOPY
343 int val = 1;
344
345 /*
346 * NOTE: apps must not set SO_ZEROCOPY when using io_uring zc.
347 * It's only here to test interactions with MSG_ZEROCOPY.
348 */
349 if (setsockopt(*sock_client, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
350 perror("setsockopt zc");
351 return 1;
352 }
353 #else
354 fprintf(stderr, "no SO_ZEROCOPY\n");
355 return 1;
356 #endif
357 }
358 if (tcp) {
359 *sock_server = accept(listen_sock, NULL, NULL);
360 if (!*sock_server) {
361 fprintf(stderr, "can't accept\n");
362 return 1;
363 }
364 close(listen_sock);
365 }
366 return 0;
367 }
368
369 struct send_conf {
370 bool fixed_buf;
371 bool mix_register;
372 bool cork;
373 bool force_async;
374 bool use_sendmsg;
375 bool tcp;
376 bool zc;
377 bool iovec;
378 bool long_iovec;
379 bool poll_first;
380 int buf_index;
381 struct sockaddr_storage *addr;
382 };
383
do_test_inet_send(struct io_uring * ring,int sock_client,int sock_server,struct send_conf * conf)384 static int do_test_inet_send(struct io_uring *ring, int sock_client, int sock_server,
385 struct send_conf *conf)
386 {
387 struct iovec iov[MAX_IOV];
388 struct msghdr msghdr[CORK_REQS];
389 const unsigned zc_flags = 0;
390 struct io_uring_sqe *sqe;
391 struct io_uring_cqe *cqe;
392 int nr_reqs = conf->cork ? CORK_REQS : 1;
393 int i, ret, nr_cqes, addr_len = 0;
394 size_t send_size = buffers_iov[conf->buf_index].iov_len;
395 size_t chunk_size = send_size / nr_reqs;
396 size_t chunk_size_last = send_size - chunk_size * (nr_reqs - 1);
397 char *buf = buffers_iov[conf->buf_index].iov_base;
398
399 assert(MAX_IOV >= CORK_REQS);
400
401 if (conf->addr) {
402 sa_family_t fam = ((struct sockaddr_in *)conf->addr)->sin_family;
403
404 addr_len = (fam == AF_INET) ? sizeof(struct sockaddr_in) :
405 sizeof(struct sockaddr_in6);
406 }
407
408 memset(rx_buffer, 0, send_size);
409
410 for (i = 0; i < nr_reqs; i++) {
411 bool real_fixed_buf = conf->fixed_buf;
412 size_t cur_size = chunk_size;
413 int msg_flags = MSG_WAITALL;
414
415 if (conf->mix_register)
416 real_fixed_buf = rand() & 1;
417
418 if (i != nr_reqs - 1)
419 msg_flags |= MSG_MORE;
420 else
421 cur_size = chunk_size_last;
422
423 sqe = io_uring_get_sqe(ring);
424
425 if (!conf->use_sendmsg) {
426 if (conf->zc) {
427 io_uring_prep_send_zc(sqe, sock_client, buf + i * chunk_size,
428 cur_size, msg_flags, zc_flags);
429 } else {
430 io_uring_prep_send(sqe, sock_client, buf + i * chunk_size,
431 cur_size, msg_flags);
432 }
433
434 if (real_fixed_buf) {
435 sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
436 sqe->buf_index = conf->buf_index;
437 }
438 if (conf->addr)
439 io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)conf->addr,
440 addr_len);
441 } else {
442 struct iovec *io;
443 int iov_len;
444
445 if (conf->zc)
446 io_uring_prep_sendmsg_zc(sqe, sock_client, &msghdr[i], msg_flags);
447 else
448 io_uring_prep_sendmsg(sqe, sock_client, &msghdr[i], msg_flags);
449
450 if (!conf->iovec) {
451 io = &iov[i];
452 iov_len = 1;
453 iov[i].iov_len = cur_size;
454 iov[i].iov_base = buf + i * chunk_size;
455 } else {
456 char *it = buf;
457 int j;
458
459 assert(nr_reqs == 1);
460 iov_len = conf->long_iovec ? MAX_IOV : 4;
461 io = iov;
462
463 for (j = 0; j < iov_len; j++)
464 io[j].iov_len = 1;
465 /* first want to be easily advanced */
466 io[0].iov_base = it;
467 it += io[0].iov_len;
468 /* this should cause retry */
469 io[1].iov_len = chunk_size - iov_len + 1;
470 io[1].iov_base = it;
471 it += io[1].iov_len;
472 /* fill the rest */
473 for (j = 2; j < iov_len; j++) {
474 io[j].iov_base = it;
475 it += io[j].iov_len;
476 }
477 }
478
479 memset(&msghdr[i], 0, sizeof(msghdr[i]));
480 msghdr[i].msg_iov = io;
481 msghdr[i].msg_iovlen = iov_len;
482 if (conf->addr) {
483 msghdr[i].msg_name = conf->addr;
484 msghdr[i].msg_namelen = addr_len;
485 }
486 }
487 sqe->user_data = i;
488 if (conf->force_async)
489 sqe->flags |= IOSQE_ASYNC;
490 if (conf->poll_first)
491 sqe->ioprio |= IORING_RECVSEND_POLL_FIRST;
492 if (i != nr_reqs - 1)
493 sqe->flags |= IOSQE_IO_LINK;
494 }
495
496 sqe = io_uring_get_sqe(ring);
497 io_uring_prep_recv(sqe, sock_server, rx_buffer, send_size, MSG_WAITALL);
498 sqe->user_data = RX_TAG;
499
500 ret = io_uring_submit(ring);
501 if (ret != nr_reqs + 1) {
502 fprintf(stderr, "submit failed, got %i expected %i\n", ret, nr_reqs);
503 return 1;
504 }
505
506 nr_cqes = nr_reqs + 1;
507 for (i = 0; i < nr_cqes; i++) {
508 int expected = chunk_size;
509
510 ret = io_uring_wait_cqe(ring, &cqe);
511 if (ret) {
512 fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
513 return 1;
514 }
515 if (cqe->user_data == RX_TAG) {
516 if (cqe->res != send_size) {
517 fprintf(stderr, "rx failed res: %i, expected %i\n",
518 cqe->res, (int)send_size);
519 return 1;
520 }
521 io_uring_cqe_seen(ring, cqe);
522 continue;
523 }
524 if ((cqe->flags & IORING_CQE_F_MORE) && (cqe->flags & IORING_CQE_F_NOTIF)) {
525 fprintf(stderr, "unexpected cflags %i res %i\n",
526 cqe->flags, cqe->res);
527 return 1;
528 }
529 if (cqe->user_data >= nr_reqs) {
530 fprintf(stderr, "invalid user_data %lu\n",
531 (unsigned long)cqe->user_data);
532 return 1;
533 }
534 if (!(cqe->flags & IORING_CQE_F_NOTIF)) {
535 if (cqe->flags & IORING_CQE_F_MORE)
536 nr_cqes++;
537 if (cqe->user_data == nr_reqs - 1)
538 expected = chunk_size_last;
539 if (cqe->res != expected) {
540 if (cqe->res == -ENOMEM) {
541 if (!hit_enomem) {
542 fprintf(stderr, "Hit -ENOMEM. "
543 "Increase ulimit -l "
544 "limit for a complete "
545 "test run. Skipping "
546 "parts.\n");
547 hit_enomem = 1;
548 }
549 return 0;
550 }
551 fprintf(stderr, "invalid cqe->res %d expected %d\n",
552 cqe->res, expected);
553 return 1;
554 }
555 }
556 io_uring_cqe_seen(ring, cqe);
557 }
558
559 for (i = 0; i < send_size; i++) {
560 if (buf[i] != rx_buffer[i]) {
561 fprintf(stderr, "botched data, first mismated byte %i, "
562 "%u vs %u\n", i, buf[i], rx_buffer[i]);
563 return 1;
564 }
565 }
566 return 0;
567 }
568
test_inet_send(struct io_uring * ring)569 static int test_inet_send(struct io_uring *ring)
570 {
571 struct send_conf conf;
572 struct sockaddr_storage addr;
573 int sock_client = -1, sock_server = -1;
574 int ret, j, i;
575 int buf_index;
576
577 for (j = 0; j < 32; j++) {
578 bool ipv6 = j & 1;
579 bool client_connect = j & 2;
580 bool msg_zc_set = j & 4;
581 bool tcp = j & 8;
582 bool swap_sockets = j & 16;
583
584 if (tcp && !client_connect)
585 continue;
586 if (swap_sockets && !tcp)
587 continue;
588 #ifndef SO_ZEROCOPY
589 if (msg_zc_set)
590 continue;
591 #endif
592 ret = create_socketpair_ip(&addr, &sock_client, &sock_server, ipv6,
593 client_connect, msg_zc_set, tcp);
594 if (ret) {
595 fprintf(stderr, "sock prep failed %d\n", ret);
596 return 1;
597 }
598 if (swap_sockets) {
599 int tmp_sock = sock_client;
600
601 sock_client = sock_server;
602 sock_server = tmp_sock;
603 }
604
605 for (i = 0; i < 1024; i++) {
606 bool regbuf;
607
608 conf.use_sendmsg = i & 1;
609 conf.poll_first = i & 2;
610 conf.fixed_buf = i & 4;
611 conf.addr = (i & 8) ? &addr : NULL;
612 conf.cork = i & 16;
613 conf.mix_register = i & 32;
614 conf.force_async = i & 64;
615 conf.zc = i & 128;
616 conf.iovec = i & 256;
617 conf.long_iovec = i & 512;
618 conf.tcp = tcp;
619 regbuf = conf.mix_register || conf.fixed_buf;
620
621 if (conf.iovec && (!conf.use_sendmsg || regbuf || conf.cork))
622 continue;
623 if (!conf.zc) {
624 if (regbuf)
625 continue;
626 /*
627 * Non zerocopy send w/ addr was added together with sendmsg_zc,
628 * skip if we the kernel doesn't support it.
629 */
630 if (conf.addr && !has_sendmsg)
631 continue;
632 }
633 if (tcp && (conf.cork || conf.addr))
634 continue;
635 if (conf.mix_register && (!conf.cork || conf.fixed_buf))
636 continue;
637 if (!client_connect && conf.addr == NULL)
638 continue;
639 if (conf.use_sendmsg && (regbuf || !has_sendmsg))
640 continue;
641 if (msg_zc_set && !conf.zc)
642 continue;
643
644 for (buf_index = 0; buf_index < ARRAY_SIZE(buffers_iov); buf_index++) {
645 size_t len = buffers_iov[buf_index].iov_len;
646
647 if (!buffers_iov[buf_index].iov_base)
648 continue;
649 /* UDP IPv4 max datagram size is under 64K */
650 if (!tcp && len > (1U << 15))
651 continue;
652
653 conf.buf_index = buf_index;
654 ret = do_test_inet_send(ring, sock_client, sock_server, &conf);
655 if (ret) {
656 fprintf(stderr, "send failed fixed buf %i, "
657 "conn %i, addr %i, cork %i\n",
658 conf.fixed_buf, client_connect,
659 !!conf.addr, conf.cork);
660 return 1;
661 }
662 }
663 }
664
665 close(sock_client);
666 close(sock_server);
667 }
668 return 0;
669 }
670
test_async_addr(struct io_uring * ring)671 static int test_async_addr(struct io_uring *ring)
672 {
673 struct io_uring_sqe *sqe;
674 struct io_uring_cqe *cqe;
675 struct sockaddr_storage addr;
676 int sock_tx = -1, sock_rx = -1;
677 struct __kernel_timespec ts;
678 int ret;
679
680 ts.tv_sec = 1;
681 ts.tv_nsec = 0;
682 ret = create_socketpair_ip(&addr, &sock_tx, &sock_rx, true, false, false, false);
683 if (ret) {
684 fprintf(stderr, "sock prep failed %d\n", ret);
685 return 1;
686 }
687
688 sqe = io_uring_get_sqe(ring);
689 io_uring_prep_timeout(sqe, &ts, 0, IORING_TIMEOUT_ETIME_SUCCESS);
690 sqe->user_data = 1;
691 sqe->flags |= IOSQE_IO_LINK;
692
693 sqe = io_uring_get_sqe(ring);
694 io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, 1, 0, 0);
695 sqe->user_data = 2;
696 io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)&addr,
697 sizeof(struct sockaddr_in6));
698
699 ret = io_uring_submit(ring);
700 assert(ret == 2);
701 memset(&addr, 0, sizeof(addr));
702
703 ret = io_uring_wait_cqe(ring, &cqe);
704 if (ret) {
705 fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
706 return 1;
707 }
708 if (cqe->user_data != 1 || cqe->res != -ETIME) {
709 fprintf(stderr, "invalid timeout res %i %i\n",
710 (int)cqe->user_data, cqe->res);
711 return 1;
712 }
713 io_uring_cqe_seen(ring, cqe);
714
715 ret = io_uring_wait_cqe(ring, &cqe);
716 if (ret) {
717 fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
718 return 1;
719 }
720 if (cqe->user_data != 2 || cqe->res != 1) {
721 fprintf(stderr, "invalid send %i %i\n",
722 (int)cqe->user_data, cqe->res);
723 return 1;
724 }
725 io_uring_cqe_seen(ring, cqe);
726 ret = recv(sock_rx, rx_buffer, 1, MSG_TRUNC);
727 assert(ret == 1);
728
729 ret = io_uring_wait_cqe(ring, &cqe);
730 if (ret) {
731 fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
732 return 1;
733 }
734 assert(cqe->flags & IORING_CQE_F_NOTIF);
735 io_uring_cqe_seen(ring, cqe);
736
737 close(sock_tx);
738 close(sock_rx);
739 return 0;
740 }
741
test_sendzc_report(struct io_uring * ring)742 static int test_sendzc_report(struct io_uring *ring)
743 {
744 struct io_uring_sqe *sqe;
745 struct io_uring_cqe *cqe;
746 struct sockaddr_storage addr;
747 int sock_tx, sock_rx;
748 int ret;
749
750 ret = create_socketpair_ip(&addr, &sock_tx, &sock_rx, true, true, false, true);
751 if (ret) {
752 fprintf(stderr, "sock prep failed %d\n", ret);
753 return 1;
754 }
755
756 sqe = io_uring_get_sqe(ring);
757 io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, 1, 0,
758 IORING_SEND_ZC_REPORT_USAGE);
759 ret = io_uring_submit(ring);
760 if (ret != 1) {
761 fprintf(stderr, "io_uring_submit failed %i\n", ret);
762 return 1;
763 }
764 ret = io_uring_wait_cqe(ring, &cqe);
765 if (ret) {
766 fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
767 return 1;
768 }
769 if (cqe->res != 1 && cqe->res != -EINVAL) {
770 fprintf(stderr, "sendzc report failed %u\n", cqe->res);
771 return 1;
772 }
773 if (!(cqe->flags & IORING_CQE_F_MORE)) {
774 fprintf(stderr, "expected notification %i\n", cqe->res);
775 return 1;
776 }
777 io_uring_cqe_seen(ring, cqe);
778
779 ret = io_uring_wait_cqe(ring, &cqe);
780 if (ret) {
781 fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
782 return 1;
783 }
784 if (cqe->flags & IORING_CQE_F_MORE) {
785 fprintf(stderr, "F_MORE after notification\n");
786 return 1;
787 }
788 io_uring_cqe_seen(ring, cqe);
789
790 close(sock_tx);
791 close(sock_rx);
792 return 0;
793 }
794
795 /* see also send_recv.c:test_invalid */
test_invalid_zc(int fds[2])796 static int test_invalid_zc(int fds[2])
797 {
798 struct io_uring ring;
799 int ret;
800 struct io_uring_cqe *cqe;
801 struct io_uring_sqe *sqe;
802 bool notif = false;
803
804 if (!has_sendmsg)
805 return 0;
806
807 ret = t_create_ring(8, &ring, 0);
808 if (ret)
809 return ret;
810
811 sqe = io_uring_get_sqe(&ring);
812 io_uring_prep_sendmsg(sqe, fds[0], NULL, MSG_WAITALL);
813 sqe->opcode = IORING_OP_SENDMSG_ZC;
814 sqe->flags |= IOSQE_ASYNC;
815
816 ret = io_uring_submit(&ring);
817 if (ret != 1) {
818 fprintf(stderr, "submit failed %i\n", ret);
819 return ret;
820 }
821 ret = io_uring_wait_cqe(&ring, &cqe);
822 if (ret)
823 return 1;
824 if (cqe->flags & IORING_CQE_F_MORE)
825 notif = true;
826 io_uring_cqe_seen(&ring, cqe);
827
828 if (notif) {
829 ret = io_uring_wait_cqe(&ring, &cqe);
830 if (ret)
831 return 1;
832 io_uring_cqe_seen(&ring, cqe);
833 }
834 io_uring_queue_exit(&ring);
835 return 0;
836 }
837
run_basic_tests(void)838 static int run_basic_tests(void)
839 {
840 struct sockaddr_storage addr;
841 int ret, i, sp[2];
842
843 /* create TCP IPv6 pair */
844 ret = create_socketpair_ip(&addr, &sp[0], &sp[1], true, true, false, true);
845 if (ret) {
846 fprintf(stderr, "sock prep failed %d\n", ret);
847 return -1;
848 }
849
850 for (i = 0; i < 2; i++) {
851 struct io_uring ring;
852 unsigned ring_flags = 0;
853
854 if (i & 1)
855 ring_flags |= IORING_SETUP_DEFER_TASKRUN;
856
857 ret = io_uring_queue_init(32, &ring, ring_flags);
858 if (ret) {
859 if (ret == -EINVAL)
860 continue;
861 fprintf(stderr, "queue init failed: %d\n", ret);
862 return -1;
863 }
864
865 ret = test_basic_send(&ring, sp[0], sp[1]);
866 if (ret) {
867 fprintf(stderr, "test_basic_send() failed\n");
868 return -1;
869 }
870
871 ret = test_send_faults(sp[0], sp[1]);
872 if (ret) {
873 fprintf(stderr, "test_send_faults() failed\n");
874 return -1;
875 }
876
877 ret = test_invalid_zc(sp);
878 if (ret) {
879 fprintf(stderr, "test_invalid_zc() failed\n");
880 return -1;
881 }
882
883 ret = test_async_addr(&ring);
884 if (ret) {
885 fprintf(stderr, "test_async_addr() failed\n");
886 return T_EXIT_FAIL;
887 }
888
889 ret = test_sendzc_report(&ring);
890 if (ret) {
891 fprintf(stderr, "test_sendzc_report() failed\n");
892 return T_EXIT_FAIL;
893 }
894
895 io_uring_queue_exit(&ring);
896 }
897
898 close(sp[0]);
899 close(sp[1]);
900 return 0;
901 }
902
main(int argc,char * argv[])903 int main(int argc, char *argv[])
904 {
905 size_t len;
906 int ret, i;
907
908 if (argc > 1)
909 return T_EXIT_SKIP;
910
911 ret = probe_zc_support();
912 if (ret) {
913 printf("probe failed\n");
914 return T_EXIT_FAIL;
915 }
916 if (!has_sendzc) {
917 printf("no IORING_OP_SEND_ZC support, skip\n");
918 return T_EXIT_SKIP;
919 }
920
921 page_sz = sysconf(_SC_PAGESIZE);
922
923 len = LARGE_BUF_SIZE;
924 tx_buffer = aligned_alloc(page_sz, len);
925 rx_buffer = aligned_alloc(page_sz, len);
926 if (tx_buffer && rx_buffer) {
927 buffers_iov[BUF_T_LARGE].iov_base = tx_buffer;
928 buffers_iov[BUF_T_LARGE].iov_len = len;
929 } else {
930 if (tx_buffer)
931 free(tx_buffer);
932 if (rx_buffer)
933 free(rx_buffer);
934
935 printf("skip large buffer tests, can't alloc\n");
936
937 len = 2 * page_sz;
938 tx_buffer = aligned_alloc(page_sz, len);
939 rx_buffer = aligned_alloc(page_sz, len);
940 }
941 if (!tx_buffer || !rx_buffer) {
942 fprintf(stderr, "can't allocate buffers\n");
943 return T_EXIT_FAIL;
944 }
945
946 srand((unsigned)time(NULL));
947 for (i = 0; i < len; i++)
948 tx_buffer[i] = i;
949 memset(rx_buffer, 0, len);
950
951 buffers_iov[BUF_T_NORMAL].iov_base = tx_buffer + page_sz;
952 buffers_iov[BUF_T_NORMAL].iov_len = page_sz;
953 buffers_iov[BUF_T_SMALL].iov_base = tx_buffer;
954 buffers_iov[BUF_T_SMALL].iov_len = 137;
955 buffers_iov[BUF_T_NONALIGNED].iov_base = tx_buffer + BUFFER_OFFSET;
956 buffers_iov[BUF_T_NONALIGNED].iov_len = 2 * page_sz - BUFFER_OFFSET - 13;
957
958 if (len == LARGE_BUF_SIZE) {
959 void *huge_page;
960 int off = page_sz + 27;
961
962 len = 1U << 22;
963 huge_page = mmap(NULL, len, PROT_READ|PROT_WRITE,
964 MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
965 -1, 0);
966 if (huge_page != MAP_FAILED) {
967 buffers_iov[BUF_T_HUGETLB].iov_base = huge_page + off;
968 buffers_iov[BUF_T_HUGETLB].iov_len = len - off;
969 }
970 }
971
972 ret = run_basic_tests();
973 if (ret)
974 return T_EXIT_FAIL;
975
976 for (i = 0; i < 2; i++) {
977 struct io_uring ring;
978 unsigned ring_flags = 0;
979
980 if (i & 1)
981 ring_flags |= IORING_SETUP_SINGLE_ISSUER |
982 IORING_SETUP_DEFER_TASKRUN;
983
984 ret = io_uring_queue_init(32, &ring, ring_flags);
985 if (ret) {
986 if (ret == -EINVAL)
987 continue;
988 fprintf(stderr, "queue init failed: %d\n", ret);
989 return -1;
990 }
991
992 ret = t_register_buffers(&ring, buffers_iov, ARRAY_SIZE(buffers_iov));
993 if (ret == T_SETUP_SKIP) {
994 fprintf(stderr, "can't register bufs, skip\n");
995 goto out;
996 } else if (ret != T_SETUP_OK) {
997 fprintf(stderr, "buffer registration failed %i\n", ret);
998 return T_EXIT_FAIL;
999 }
1000
1001 if (buffers_iov[BUF_T_HUGETLB].iov_base) {
1002 buffers_iov[BUF_T_HUGETLB].iov_base += 13;
1003 buffers_iov[BUF_T_HUGETLB].iov_len -= 26;
1004 }
1005 if (buffers_iov[BUF_T_LARGE].iov_base) {
1006 buffers_iov[BUF_T_LARGE].iov_base += 13;
1007 buffers_iov[BUF_T_LARGE].iov_len -= 26;
1008 }
1009
1010 ret = test_inet_send(&ring);
1011 if (ret) {
1012 fprintf(stderr, "test_inet_send() failed (defer_taskrun %i)\n",
1013 ring_flags & IORING_SETUP_DEFER_TASKRUN);
1014 return T_EXIT_FAIL;
1015 }
1016
1017 if (buffers_iov[BUF_T_HUGETLB].iov_base) {
1018 buffers_iov[BUF_T_HUGETLB].iov_base -= 13;
1019 buffers_iov[BUF_T_HUGETLB].iov_len += 26;
1020 }
1021 if (buffers_iov[BUF_T_LARGE].iov_base) {
1022 buffers_iov[BUF_T_LARGE].iov_base -= 13;
1023 buffers_iov[BUF_T_LARGE].iov_len += 26;
1024 }
1025 out:
1026 io_uring_queue_exit(&ring);
1027 }
1028
1029 return T_EXIT_PASS;
1030 }
1031