• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: MIT */
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <stdint.h>
5 #include <assert.h>
6 #include <errno.h>
7 #include <limits.h>
8 #include <fcntl.h>
9 #include <unistd.h>
10 #include <stdbool.h>
11 #include <string.h>
12 
13 #include <arpa/inet.h>
14 #include <linux/if_packet.h>
15 #include <linux/ipv6.h>
16 #include <linux/socket.h>
17 #include <linux/sockios.h>
18 #include <net/ethernet.h>
19 #include <net/if.h>
20 #include <netinet/ip.h>
21 #include <netinet/in.h>
22 #include <netinet/ip6.h>
23 #include <netinet/tcp.h>
24 #include <netinet/udp.h>
25 #include <sys/socket.h>
26 #include <sys/time.h>
27 #include <sys/resource.h>
28 #include <sys/un.h>
29 #include <sys/ioctl.h>
30 #include <sys/socket.h>
31 #include <sys/stat.h>
32 #include <sys/time.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <sys/mman.h>
36 #include <linux/mman.h>
37 
38 #include "liburing.h"
39 #include "helpers.h"
40 
41 #define MAX_MSG	128
42 
43 #define HOST	"127.0.0.1"
44 #define HOSTV6	"::1"
45 
46 #define MAX_IOV 32
47 #define CORK_REQS 5
48 #define RX_TAG 10000
49 #define BUFFER_OFFSET 41
50 
51 #ifndef ARRAY_SIZE
52 	#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
53 #endif
54 
55 enum {
56 	BUF_T_NORMAL,
57 	BUF_T_SMALL,
58 	BUF_T_NONALIGNED,
59 	BUF_T_LARGE,
60 	BUF_T_HUGETLB,
61 
62 	__BUF_NR,
63 };
64 
65 /* 32MB, should be enough to trigger a short send */
66 #define LARGE_BUF_SIZE		(1U << 25)
67 
68 static size_t page_sz;
69 static char *tx_buffer, *rx_buffer;
70 static struct iovec buffers_iov[__BUF_NR];
71 
72 static bool has_sendzc;
73 static bool has_sendmsg;
74 static bool hit_enomem;
75 
probe_zc_support(void)76 static int probe_zc_support(void)
77 {
78 	struct io_uring ring;
79 	struct io_uring_probe *p;
80 	int ret;
81 
82 	has_sendzc = has_sendmsg = false;
83 
84 	ret = io_uring_queue_init(1, &ring, 0);
85 	if (ret)
86 		return -1;
87 
88 	p = t_calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op));
89 	if (!p)
90 		return -1;
91 
92 	ret = io_uring_register_probe(&ring, p, 256);
93 	if (ret)
94 		return -1;
95 
96 	has_sendzc = p->ops_len > IORING_OP_SEND_ZC;
97 	has_sendmsg = p->ops_len > IORING_OP_SENDMSG_ZC;
98 	io_uring_queue_exit(&ring);
99 	free(p);
100 	return 0;
101 }
102 
check_cq_empty(struct io_uring * ring)103 static bool check_cq_empty(struct io_uring *ring)
104 {
105 	struct io_uring_cqe *cqe = NULL;
106 	int ret;
107 
108 	ret = io_uring_peek_cqe(ring, &cqe); /* nothing should be there */
109 	return ret == -EAGAIN;
110 }
111 
test_basic_send(struct io_uring * ring,int sock_tx,int sock_rx)112 static int test_basic_send(struct io_uring *ring, int sock_tx, int sock_rx)
113 {
114 	struct io_uring_sqe *sqe;
115 	struct io_uring_cqe *cqe;
116 	int msg_flags = 0;
117 	unsigned zc_flags = 0;
118 	int payload_size = 100;
119 	int ret;
120 
121 	sqe = io_uring_get_sqe(ring);
122 	io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
123 			      msg_flags, zc_flags);
124 	sqe->user_data = 1;
125 
126 	ret = io_uring_submit(ring);
127 	assert(ret == 1);
128 
129 	ret = io_uring_wait_cqe(ring, &cqe);
130 	assert(!ret && cqe->user_data == 1);
131 	if (cqe->res != payload_size) {
132 		fprintf(stderr, "send failed %i\n", cqe->res);
133 		return T_EXIT_FAIL;
134 	}
135 
136 	assert(cqe->flags & IORING_CQE_F_MORE);
137 	io_uring_cqe_seen(ring, cqe);
138 
139 	ret = io_uring_wait_cqe(ring, &cqe);
140 	assert(!ret);
141 	assert(cqe->user_data == 1);
142 	assert(cqe->flags & IORING_CQE_F_NOTIF);
143 	assert(!(cqe->flags & IORING_CQE_F_MORE));
144 	io_uring_cqe_seen(ring, cqe);
145 	assert(check_cq_empty(ring));
146 
147 	ret = recv(sock_rx, rx_buffer, payload_size, MSG_TRUNC);
148 	assert(ret == payload_size);
149 	return T_EXIT_PASS;
150 }
151 
test_send_faults_check(struct io_uring * ring,int expected)152 static int test_send_faults_check(struct io_uring *ring, int expected)
153 {
154 	struct io_uring_cqe *cqe;
155 	int ret, nr_cqes = 0;
156 	bool more = true;
157 
158 	while (more) {
159 		nr_cqes++;
160 		ret = io_uring_wait_cqe(ring, &cqe);
161 		assert(!ret);
162 		assert(cqe->user_data == 1);
163 
164 		if (nr_cqes == 1 && (cqe->flags & IORING_CQE_F_NOTIF)) {
165 			fprintf(stderr, "test_send_faults_check notif came first\n");
166 			return -1;
167 		}
168 
169 		if (!(cqe->flags & IORING_CQE_F_NOTIF)) {
170 			if (cqe->res != expected) {
171 				fprintf(stderr, "invalid cqe res %i vs expected %i, "
172 					"user_data %i\n",
173 					cqe->res, expected, (int)cqe->user_data);
174 				return -1;
175 			}
176 		} else {
177 			if (cqe->res != 0 || cqe->flags != IORING_CQE_F_NOTIF) {
178 				fprintf(stderr, "invalid notif cqe %i %i\n",
179 					cqe->res, cqe->flags);
180 				return -1;
181 			}
182 		}
183 
184 		more = cqe->flags & IORING_CQE_F_MORE;
185 		io_uring_cqe_seen(ring, cqe);
186 	}
187 
188 	if (nr_cqes > 2) {
189 		fprintf(stderr, "test_send_faults_check() too many CQEs %i\n",
190 				nr_cqes);
191 		return -1;
192 	}
193 	assert(check_cq_empty(ring));
194 	return 0;
195 }
196 
test_send_faults(int sock_tx,int sock_rx)197 static int test_send_faults(int sock_tx, int sock_rx)
198 {
199 	struct io_uring_sqe *sqe;
200 	int msg_flags = 0;
201 	unsigned zc_flags = 0;
202 	int ret, payload_size = 100;
203 	struct io_uring ring;
204 
205 	ret = io_uring_queue_init(32, &ring, 0);
206 	if (ret) {
207 		fprintf(stderr, "queue init failed: %d\n", ret);
208 		return -1;
209 	}
210 
211 	/* invalid buffer */
212 	sqe = io_uring_get_sqe(&ring);
213 	io_uring_prep_send_zc(sqe, sock_tx, (void *)1UL, payload_size,
214 			      msg_flags, zc_flags);
215 	sqe->user_data = 1;
216 	ret = io_uring_submit(&ring);
217 	assert(ret == 1);
218 
219 	ret = test_send_faults_check(&ring, -EFAULT);
220 	if (ret) {
221 		fprintf(stderr, "test_send_faults with invalid buf failed\n");
222 		return -1;
223 	}
224 
225 	/* invalid address */
226 	sqe = io_uring_get_sqe(&ring);
227 	io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
228 			      msg_flags, zc_flags);
229 	io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)1UL,
230 				    sizeof(struct sockaddr_in6));
231 	sqe->user_data = 1;
232 	ret = io_uring_submit(&ring);
233 	assert(ret == 1);
234 
235 	ret = test_send_faults_check(&ring, -EFAULT);
236 	if (ret) {
237 		fprintf(stderr, "test_send_faults with invalid addr failed\n");
238 		return -1;
239 	}
240 
241 	/* invalid send/recv flags */
242 	sqe = io_uring_get_sqe(&ring);
243 	io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
244 			      msg_flags, ~0U);
245 	sqe->user_data = 1;
246 	ret = io_uring_submit(&ring);
247 	assert(ret == 1);
248 
249 	ret = test_send_faults_check(&ring, -EINVAL);
250 	if (ret) {
251 		fprintf(stderr, "test_send_faults with invalid flags failed\n");
252 		return -1;
253 	}
254 
255 	return T_EXIT_PASS;
256 }
257 
create_socketpair_ip(struct sockaddr_storage * addr,int * sock_client,int * sock_server,bool ipv6,bool client_connect,bool msg_zc,bool tcp)258 static int create_socketpair_ip(struct sockaddr_storage *addr,
259 				int *sock_client, int *sock_server,
260 				bool ipv6, bool client_connect,
261 				bool msg_zc, bool tcp)
262 {
263 	socklen_t addr_size;
264 	int family, sock, listen_sock = -1;
265 	int ret;
266 
267 	memset(addr, 0, sizeof(*addr));
268 	if (ipv6) {
269 		struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr;
270 
271 		family = AF_INET6;
272 		saddr->sin6_family = family;
273 		saddr->sin6_port = htons(0);
274 		addr_size = sizeof(*saddr);
275 	} else {
276 		struct sockaddr_in *saddr = (struct sockaddr_in *)addr;
277 
278 		family = AF_INET;
279 		saddr->sin_family = family;
280 		saddr->sin_port = htons(0);
281 		saddr->sin_addr.s_addr = htonl(INADDR_ANY);
282 		addr_size = sizeof(*saddr);
283 	}
284 
285 	/* server sock setup */
286 	if (tcp) {
287 		sock = listen_sock = socket(family, SOCK_STREAM, IPPROTO_TCP);
288 	} else {
289 		sock = *sock_server = socket(family, SOCK_DGRAM, 0);
290 	}
291 	if (sock < 0) {
292 		perror("socket");
293 		return 1;
294 	}
295 
296 	ret = bind(sock, (struct sockaddr *)addr, addr_size);
297 	if (ret < 0) {
298 		perror("bind");
299 		return 1;
300 	}
301 
302 	ret = getsockname(sock, (struct sockaddr *)addr, &addr_size);
303 	if (ret < 0) {
304 		fprintf(stderr, "getsockname failed %i\n", errno);
305 		return 1;
306 	}
307 
308 	if (tcp) {
309 		ret = listen(sock, 128);
310 		assert(ret != -1);
311 	}
312 
313 	if (ipv6) {
314 		struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr;
315 
316 		inet_pton(AF_INET6, HOSTV6, &(saddr->sin6_addr));
317 	} else {
318 		struct sockaddr_in *saddr = (struct sockaddr_in *)addr;
319 
320 		inet_pton(AF_INET, HOST, &saddr->sin_addr);
321 	}
322 
323 	/* client sock setup */
324 	if (tcp) {
325 		*sock_client = socket(family, SOCK_STREAM, IPPROTO_TCP);
326 		assert(client_connect);
327 	} else {
328 		*sock_client = socket(family, SOCK_DGRAM, 0);
329 	}
330 	if (*sock_client < 0) {
331 		perror("socket");
332 		return 1;
333 	}
334 	if (client_connect) {
335 		ret = connect(*sock_client, (struct sockaddr *)addr, addr_size);
336 		if (ret < 0) {
337 			perror("connect");
338 			return 1;
339 		}
340 	}
341 	if (msg_zc) {
342 #ifdef SO_ZEROCOPY
343 		int val = 1;
344 
345 		/*
346 		 * NOTE: apps must not set SO_ZEROCOPY when using io_uring zc.
347 		 * It's only here to test interactions with MSG_ZEROCOPY.
348 		 */
349 		if (setsockopt(*sock_client, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
350 			perror("setsockopt zc");
351 			return 1;
352 		}
353 #else
354 		fprintf(stderr, "no SO_ZEROCOPY\n");
355 		return 1;
356 #endif
357 	}
358 	if (tcp) {
359 		*sock_server = accept(listen_sock, NULL, NULL);
360 		if (!*sock_server) {
361 			fprintf(stderr, "can't accept\n");
362 			return 1;
363 		}
364 		close(listen_sock);
365 	}
366 	return 0;
367 }
368 
369 struct send_conf {
370 	bool fixed_buf;
371 	bool mix_register;
372 	bool cork;
373 	bool force_async;
374 	bool use_sendmsg;
375 	bool tcp;
376 	bool zc;
377 	bool iovec;
378 	bool long_iovec;
379 	bool poll_first;
380 	int buf_index;
381 	struct sockaddr_storage *addr;
382 };
383 
do_test_inet_send(struct io_uring * ring,int sock_client,int sock_server,struct send_conf * conf)384 static int do_test_inet_send(struct io_uring *ring, int sock_client, int sock_server,
385 			     struct send_conf *conf)
386 {
387 	struct iovec iov[MAX_IOV];
388 	struct msghdr msghdr[CORK_REQS];
389 	const unsigned zc_flags = 0;
390 	struct io_uring_sqe *sqe;
391 	struct io_uring_cqe *cqe;
392 	int nr_reqs = conf->cork ? CORK_REQS : 1;
393 	int i, ret, nr_cqes, addr_len = 0;
394 	size_t send_size = buffers_iov[conf->buf_index].iov_len;
395 	size_t chunk_size = send_size / nr_reqs;
396 	size_t chunk_size_last = send_size - chunk_size * (nr_reqs - 1);
397 	char *buf = buffers_iov[conf->buf_index].iov_base;
398 
399 	assert(MAX_IOV >= CORK_REQS);
400 
401 	if (conf->addr) {
402 		sa_family_t fam = ((struct sockaddr_in *)conf->addr)->sin_family;
403 
404 		addr_len = (fam == AF_INET) ? sizeof(struct sockaddr_in) :
405 					      sizeof(struct sockaddr_in6);
406 	}
407 
408 	memset(rx_buffer, 0, send_size);
409 
410 	for (i = 0; i < nr_reqs; i++) {
411 		bool real_fixed_buf = conf->fixed_buf;
412 		size_t cur_size = chunk_size;
413 		int msg_flags = MSG_WAITALL;
414 
415 		if (conf->mix_register)
416 			real_fixed_buf = rand() & 1;
417 
418 		if (i != nr_reqs - 1)
419 			msg_flags |= MSG_MORE;
420 		else
421 			cur_size = chunk_size_last;
422 
423 		sqe = io_uring_get_sqe(ring);
424 
425 		if (!conf->use_sendmsg) {
426 			if (conf->zc) {
427 				io_uring_prep_send_zc(sqe, sock_client, buf + i * chunk_size,
428 						      cur_size, msg_flags, zc_flags);
429 			} else {
430 				io_uring_prep_send(sqe, sock_client, buf + i * chunk_size,
431 						      cur_size, msg_flags);
432 			}
433 
434 			if (real_fixed_buf) {
435 				sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
436 				sqe->buf_index = conf->buf_index;
437 			}
438 			if (conf->addr)
439 				io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)conf->addr,
440 							    addr_len);
441 		} else {
442 			struct iovec *io;
443 			int iov_len;
444 
445 			if (conf->zc)
446 				io_uring_prep_sendmsg_zc(sqe, sock_client, &msghdr[i], msg_flags);
447 			else
448 				io_uring_prep_sendmsg(sqe, sock_client, &msghdr[i], msg_flags);
449 
450 			if (!conf->iovec) {
451 				io = &iov[i];
452 				iov_len = 1;
453 				iov[i].iov_len = cur_size;
454 				iov[i].iov_base = buf + i * chunk_size;
455 			} else {
456 				char *it = buf;
457 				int j;
458 
459 				assert(nr_reqs == 1);
460 				iov_len = conf->long_iovec ? MAX_IOV : 4;
461 				io = iov;
462 
463 				for (j = 0; j < iov_len; j++)
464 					io[j].iov_len = 1;
465 				/* first want to be easily advanced */
466 				io[0].iov_base = it;
467 				it += io[0].iov_len;
468 				/* this should cause retry */
469 				io[1].iov_len = chunk_size - iov_len + 1;
470 				io[1].iov_base = it;
471 				it += io[1].iov_len;
472 				/* fill the rest */
473 				for (j = 2; j < iov_len; j++) {
474 					io[j].iov_base = it;
475 					it += io[j].iov_len;
476 				}
477 			}
478 
479 			memset(&msghdr[i], 0, sizeof(msghdr[i]));
480 			msghdr[i].msg_iov = io;
481 			msghdr[i].msg_iovlen = iov_len;
482 			if (conf->addr) {
483 				msghdr[i].msg_name = conf->addr;
484 				msghdr[i].msg_namelen = addr_len;
485 			}
486 		}
487 		sqe->user_data = i;
488 		if (conf->force_async)
489 			sqe->flags |= IOSQE_ASYNC;
490 		if (conf->poll_first)
491 			sqe->ioprio |= IORING_RECVSEND_POLL_FIRST;
492 		if (i != nr_reqs - 1)
493 			sqe->flags |= IOSQE_IO_LINK;
494 	}
495 
496 	sqe = io_uring_get_sqe(ring);
497 	io_uring_prep_recv(sqe, sock_server, rx_buffer, send_size, MSG_WAITALL);
498 	sqe->user_data = RX_TAG;
499 
500 	ret = io_uring_submit(ring);
501 	if (ret != nr_reqs + 1) {
502 		fprintf(stderr, "submit failed, got %i expected %i\n", ret, nr_reqs);
503 		return 1;
504 	}
505 
506 	nr_cqes = nr_reqs + 1;
507 	for (i = 0; i < nr_cqes; i++) {
508 		int expected = chunk_size;
509 
510 		ret = io_uring_wait_cqe(ring, &cqe);
511 		if (ret) {
512 			fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
513 			return 1;
514 		}
515 		if (cqe->user_data == RX_TAG) {
516 			if (cqe->res != send_size) {
517 				fprintf(stderr, "rx failed res: %i, expected %i\n",
518 						cqe->res, (int)send_size);
519 				return 1;
520 			}
521 			io_uring_cqe_seen(ring, cqe);
522 			continue;
523 		}
524 		if ((cqe->flags & IORING_CQE_F_MORE) && (cqe->flags & IORING_CQE_F_NOTIF)) {
525 			fprintf(stderr, "unexpected cflags %i res %i\n",
526 					cqe->flags, cqe->res);
527 			return 1;
528 		}
529 		if (cqe->user_data >= nr_reqs) {
530 			fprintf(stderr, "invalid user_data %lu\n",
531 					(unsigned long)cqe->user_data);
532 			return 1;
533 		}
534 		if (!(cqe->flags & IORING_CQE_F_NOTIF)) {
535 			if (cqe->flags & IORING_CQE_F_MORE)
536 				nr_cqes++;
537 			if (cqe->user_data == nr_reqs - 1)
538 				expected = chunk_size_last;
539 			if (cqe->res != expected) {
540 				if (cqe->res == -ENOMEM) {
541 					if (!hit_enomem) {
542 						fprintf(stderr, "Hit -ENOMEM. "
543 							"Increase ulimit -l "
544 							"limit for a complete "
545 							"test run. Skipping "
546 							"parts.\n");
547 						hit_enomem = 1;
548 					}
549 					return 0;
550 				}
551 				fprintf(stderr, "invalid cqe->res %d expected %d\n",
552 						 cqe->res, expected);
553 				return 1;
554 			}
555 		}
556 		io_uring_cqe_seen(ring, cqe);
557 	}
558 
559 	for (i = 0; i < send_size; i++) {
560 		if (buf[i] != rx_buffer[i]) {
561 			fprintf(stderr, "botched data, first mismated byte %i, "
562 				"%u vs %u\n", i, buf[i], rx_buffer[i]);
563 			return 1;
564 		}
565 	}
566 	return 0;
567 }
568 
test_inet_send(struct io_uring * ring)569 static int test_inet_send(struct io_uring *ring)
570 {
571 	struct send_conf conf;
572 	struct sockaddr_storage addr;
573 	int sock_client = -1, sock_server = -1;
574 	int ret, j, i;
575 	int buf_index;
576 
577 	for (j = 0; j < 32; j++) {
578 		bool ipv6 = j & 1;
579 		bool client_connect = j & 2;
580 		bool msg_zc_set = j & 4;
581 		bool tcp = j & 8;
582 		bool swap_sockets = j & 16;
583 
584 		if (tcp && !client_connect)
585 			continue;
586 		if (swap_sockets && !tcp)
587 			continue;
588 #ifndef SO_ZEROCOPY
589 		if (msg_zc_set)
590 			continue;
591 #endif
592 		ret = create_socketpair_ip(&addr, &sock_client, &sock_server, ipv6,
593 				 client_connect, msg_zc_set, tcp);
594 		if (ret) {
595 			fprintf(stderr, "sock prep failed %d\n", ret);
596 			return 1;
597 		}
598 		if (swap_sockets) {
599 			int tmp_sock = sock_client;
600 
601 			sock_client = sock_server;
602 			sock_server = tmp_sock;
603 		}
604 
605 		for (i = 0; i < 1024; i++) {
606 			bool regbuf;
607 
608 			conf.use_sendmsg = i & 1;
609 			conf.poll_first = i & 2;
610 			conf.fixed_buf = i & 4;
611 			conf.addr = (i & 8) ? &addr : NULL;
612 			conf.cork = i & 16;
613 			conf.mix_register = i & 32;
614 			conf.force_async = i & 64;
615 			conf.zc = i & 128;
616 			conf.iovec = i & 256;
617 			conf.long_iovec = i & 512;
618 			conf.tcp = tcp;
619 			regbuf = conf.mix_register || conf.fixed_buf;
620 
621 			if (conf.iovec && (!conf.use_sendmsg || regbuf || conf.cork))
622 				continue;
623 			if (!conf.zc) {
624 				if (regbuf)
625 					continue;
626 				/*
627 				* Non zerocopy send w/ addr was added together with sendmsg_zc,
628 				* skip if we the kernel doesn't support it.
629 				*/
630 				if (conf.addr && !has_sendmsg)
631 					continue;
632 			}
633 			if (tcp && (conf.cork || conf.addr))
634 				continue;
635 			if (conf.mix_register && (!conf.cork || conf.fixed_buf))
636 				continue;
637 			if (!client_connect && conf.addr == NULL)
638 				continue;
639 			if (conf.use_sendmsg && (regbuf || !has_sendmsg))
640 				continue;
641 			if (msg_zc_set && !conf.zc)
642 				continue;
643 
644 			for (buf_index = 0; buf_index < ARRAY_SIZE(buffers_iov); buf_index++) {
645 				size_t len = buffers_iov[buf_index].iov_len;
646 
647 				if (!buffers_iov[buf_index].iov_base)
648 					continue;
649 				if (!tcp && len > 4 * page_sz)
650 					continue;
651 
652 				conf.buf_index = buf_index;
653 				ret = do_test_inet_send(ring, sock_client, sock_server, &conf);
654 				if (ret) {
655 					fprintf(stderr, "send failed fixed buf %i, "
656 							"conn %i, addr %i, cork %i\n",
657 						conf.fixed_buf, client_connect,
658 						!!conf.addr, conf.cork);
659 					return 1;
660 				}
661 			}
662 		}
663 
664 		close(sock_client);
665 		close(sock_server);
666 	}
667 	return 0;
668 }
669 
test_async_addr(struct io_uring * ring)670 static int test_async_addr(struct io_uring *ring)
671 {
672 	struct io_uring_sqe *sqe;
673 	struct io_uring_cqe *cqe;
674 	struct sockaddr_storage addr;
675 	int sock_tx = -1, sock_rx = -1;
676 	struct __kernel_timespec ts;
677 	int ret;
678 
679 	ts.tv_sec = 1;
680 	ts.tv_nsec = 0;
681 	ret = create_socketpair_ip(&addr, &sock_tx, &sock_rx, true, false, false, false);
682 	if (ret) {
683 		fprintf(stderr, "sock prep failed %d\n", ret);
684 		return 1;
685 	}
686 
687 	sqe = io_uring_get_sqe(ring);
688 	io_uring_prep_timeout(sqe, &ts, 0, IORING_TIMEOUT_ETIME_SUCCESS);
689 	sqe->user_data = 1;
690 	sqe->flags |= IOSQE_IO_LINK;
691 
692 	sqe = io_uring_get_sqe(ring);
693 	io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, 1, 0, 0);
694 	sqe->user_data = 2;
695 	io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)&addr,
696 				    sizeof(struct sockaddr_in6));
697 
698 	ret = io_uring_submit(ring);
699 	assert(ret == 2);
700 	memset(&addr, 0, sizeof(addr));
701 
702 	ret = io_uring_wait_cqe(ring, &cqe);
703 	if (ret) {
704 		fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
705 		return 1;
706 	}
707 	if (cqe->user_data != 1 || cqe->res != -ETIME) {
708 		fprintf(stderr, "invalid timeout res %i %i\n",
709 			(int)cqe->user_data, cqe->res);
710 		return 1;
711 	}
712 	io_uring_cqe_seen(ring, cqe);
713 
714 	ret = io_uring_wait_cqe(ring, &cqe);
715 	if (ret) {
716 		fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
717 		return 1;
718 	}
719 	if (cqe->user_data != 2 || cqe->res != 1) {
720 		fprintf(stderr, "invalid send %i %i\n",
721 			(int)cqe->user_data, cqe->res);
722 		return 1;
723 	}
724 	io_uring_cqe_seen(ring, cqe);
725 	ret = recv(sock_rx, rx_buffer, 1, MSG_TRUNC);
726 	assert(ret == 1);
727 
728 	ret = io_uring_wait_cqe(ring, &cqe);
729 	if (ret) {
730 		fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
731 		return 1;
732 	}
733 	assert(cqe->flags & IORING_CQE_F_NOTIF);
734 	io_uring_cqe_seen(ring, cqe);
735 
736 	close(sock_tx);
737 	close(sock_rx);
738 	return 0;
739 }
740 
741 /* see also send_recv.c:test_invalid */
test_invalid_zc(int fds[2])742 static int test_invalid_zc(int fds[2])
743 {
744 	struct io_uring ring;
745 	int ret;
746 	struct io_uring_cqe *cqe;
747 	struct io_uring_sqe *sqe;
748 	bool notif = false;
749 
750 	if (!has_sendmsg)
751 		return 0;
752 
753 	ret = t_create_ring(8, &ring, 0);
754 	if (ret)
755 		return ret;
756 
757 	sqe = io_uring_get_sqe(&ring);
758 	io_uring_prep_sendmsg(sqe, fds[0], NULL, MSG_WAITALL);
759 	sqe->opcode = IORING_OP_SENDMSG_ZC;
760 	sqe->flags |= IOSQE_ASYNC;
761 
762 	ret = io_uring_submit(&ring);
763 	if (ret != 1) {
764 		fprintf(stderr, "submit failed %i\n", ret);
765 		return ret;
766 	}
767 	ret = io_uring_wait_cqe(&ring, &cqe);
768 	if (ret)
769 		return 1;
770 	if (cqe->flags & IORING_CQE_F_MORE)
771 		notif = true;
772 	io_uring_cqe_seen(&ring, cqe);
773 
774 	if (notif) {
775 		ret = io_uring_wait_cqe(&ring, &cqe);
776 		if (ret)
777 			return 1;
778 		io_uring_cqe_seen(&ring, cqe);
779 	}
780 	io_uring_queue_exit(&ring);
781 	return 0;
782 }
783 
run_basic_tests(void)784 static int run_basic_tests(void)
785 {
786 	struct sockaddr_storage addr;
787 	int ret, i, sp[2];
788 
789 	/* create TCP IPv6 pair */
790 	ret = create_socketpair_ip(&addr, &sp[0], &sp[1], true, true, false, true);
791 	if (ret) {
792 		fprintf(stderr, "sock prep failed %d\n", ret);
793 		return -1;
794 	}
795 
796 	for (i = 0; i < 2; i++) {
797 		struct io_uring ring;
798 		unsigned ring_flags = 0;
799 
800 		if (i & 1)
801 			ring_flags |= IORING_SETUP_DEFER_TASKRUN;
802 
803 		ret = io_uring_queue_init(32, &ring, ring_flags);
804 		if (ret) {
805 			if (ret == -EINVAL)
806 				continue;
807 			fprintf(stderr, "queue init failed: %d\n", ret);
808 			return -1;
809 		}
810 
811 		ret = test_basic_send(&ring, sp[0], sp[1]);
812 		if (ret) {
813 			fprintf(stderr, "test_basic_send() failed\n");
814 			return -1;
815 		}
816 
817 		ret = test_send_faults(sp[0], sp[1]);
818 		if (ret) {
819 			fprintf(stderr, "test_send_faults() failed\n");
820 			return -1;
821 		}
822 
823 		ret = test_invalid_zc(sp);
824 		if (ret) {
825 			fprintf(stderr, "test_invalid_zc() failed\n");
826 			return -1;
827 		}
828 
829 		ret = test_async_addr(&ring);
830 		if (ret) {
831 			fprintf(stderr, "test_async_addr() failed\n");
832 			return T_EXIT_FAIL;
833 		}
834 
835 		io_uring_queue_exit(&ring);
836 	}
837 
838 	close(sp[0]);
839 	close(sp[1]);
840 	return 0;
841 }
842 
main(int argc,char * argv[])843 int main(int argc, char *argv[])
844 {
845 	size_t len;
846 	int ret, i;
847 
848 	if (argc > 1)
849 		return T_EXIT_SKIP;
850 
851 	ret = probe_zc_support();
852 	if (ret) {
853 		printf("probe failed\n");
854 		return T_EXIT_FAIL;
855 	}
856 	if (!has_sendzc) {
857 		printf("no IORING_OP_SEND_ZC support, skip\n");
858 		return T_EXIT_SKIP;
859 	}
860 
861 	page_sz = sysconf(_SC_PAGESIZE);
862 
863 	len = LARGE_BUF_SIZE;
864 	tx_buffer = aligned_alloc(page_sz, len);
865 	rx_buffer = aligned_alloc(page_sz, len);
866 	if (tx_buffer && rx_buffer) {
867 		buffers_iov[BUF_T_LARGE].iov_base = tx_buffer;
868 		buffers_iov[BUF_T_LARGE].iov_len = len;
869 	} else {
870 		if (tx_buffer)
871 			free(tx_buffer);
872 		if (rx_buffer)
873 			free(rx_buffer);
874 
875 		printf("skip large buffer tests, can't alloc\n");
876 
877 		len = 2 * page_sz;
878 		tx_buffer = aligned_alloc(page_sz, len);
879 		rx_buffer = aligned_alloc(page_sz, len);
880 	}
881 	if (!tx_buffer || !rx_buffer) {
882 		fprintf(stderr, "can't allocate buffers\n");
883 		return T_EXIT_FAIL;
884 	}
885 
886 	srand((unsigned)time(NULL));
887 	for (i = 0; i < len; i++)
888 		tx_buffer[i] = i;
889 	memset(rx_buffer, 0, len);
890 
891 	buffers_iov[BUF_T_NORMAL].iov_base = tx_buffer + page_sz;
892 	buffers_iov[BUF_T_NORMAL].iov_len = page_sz;
893 	buffers_iov[BUF_T_SMALL].iov_base = tx_buffer;
894 	buffers_iov[BUF_T_SMALL].iov_len = 137;
895 	buffers_iov[BUF_T_NONALIGNED].iov_base = tx_buffer + BUFFER_OFFSET;
896 	buffers_iov[BUF_T_NONALIGNED].iov_len = 2 * page_sz - BUFFER_OFFSET - 13;
897 
898 	if (len == LARGE_BUF_SIZE) {
899 		void *huge_page;
900 		int off = page_sz + 27;
901 
902 		len = 1U << 22;
903 		huge_page = mmap(NULL, len, PROT_READ|PROT_WRITE,
904 				 MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
905 				 -1, 0);
906 		if (huge_page != MAP_FAILED) {
907 			buffers_iov[BUF_T_HUGETLB].iov_base = huge_page + off;
908 			buffers_iov[BUF_T_HUGETLB].iov_len = len - off;
909 		}
910 	}
911 
912 	ret = run_basic_tests();
913 	if (ret)
914 		return T_EXIT_FAIL;
915 
916 	for (i = 0; i < 2; i++) {
917 		struct io_uring ring;
918 		unsigned ring_flags = 0;
919 
920 		if (i & 1)
921 			ring_flags |= IORING_SETUP_SINGLE_ISSUER |
922 				      IORING_SETUP_DEFER_TASKRUN;
923 
924 		ret = io_uring_queue_init(32, &ring, ring_flags);
925 		if (ret) {
926 			if (ret == -EINVAL)
927 				continue;
928 			fprintf(stderr, "queue init failed: %d\n", ret);
929 			return -1;
930 		}
931 
932 		ret = t_register_buffers(&ring, buffers_iov, ARRAY_SIZE(buffers_iov));
933 		if (ret == T_SETUP_SKIP) {
934 			fprintf(stderr, "can't register bufs, skip\n");
935 			goto out;
936 		} else if (ret != T_SETUP_OK) {
937 			fprintf(stderr, "buffer registration failed %i\n", ret);
938 			return T_EXIT_FAIL;
939 		}
940 
941 		if (buffers_iov[BUF_T_HUGETLB].iov_base) {
942 			buffers_iov[BUF_T_HUGETLB].iov_base += 13;
943 			buffers_iov[BUF_T_HUGETLB].iov_len -= 26;
944 		}
945 		if (buffers_iov[BUF_T_LARGE].iov_base) {
946 			buffers_iov[BUF_T_LARGE].iov_base += 13;
947 			buffers_iov[BUF_T_LARGE].iov_len -= 26;
948 		}
949 
950 		ret = test_inet_send(&ring);
951 		if (ret) {
952 			fprintf(stderr, "test_inet_send() failed (defer_taskrun %i)\n",
953 					 ring_flags & IORING_SETUP_DEFER_TASKRUN);
954 			return T_EXIT_FAIL;
955 		}
956 
957 		if (buffers_iov[BUF_T_HUGETLB].iov_base) {
958 			buffers_iov[BUF_T_HUGETLB].iov_base -= 13;
959 			buffers_iov[BUF_T_HUGETLB].iov_len += 26;
960 		}
961 		if (buffers_iov[BUF_T_LARGE].iov_base) {
962 			buffers_iov[BUF_T_LARGE].iov_base -= 13;
963 			buffers_iov[BUF_T_LARGE].iov_len += 26;
964 		}
965 out:
966 		io_uring_queue_exit(&ring);
967 	}
968 
969 	return T_EXIT_PASS;
970 }
971