• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: MIT */
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <stdint.h>
5 #include <assert.h>
6 #include <errno.h>
7 #include <limits.h>
8 #include <fcntl.h>
9 #include <unistd.h>
10 #include <stdbool.h>
11 #include <string.h>
12 
13 #include <arpa/inet.h>
14 #include <linux/if_packet.h>
15 #include <linux/ipv6.h>
16 #include <linux/socket.h>
17 #include <linux/sockios.h>
18 #include <net/ethernet.h>
19 #include <net/if.h>
20 #include <netinet/ip.h>
21 #include <netinet/in.h>
22 #include <netinet/ip6.h>
23 #include <netinet/tcp.h>
24 #include <netinet/udp.h>
25 #include <sys/socket.h>
26 #include <sys/time.h>
27 #include <sys/resource.h>
28 #include <sys/un.h>
29 #include <sys/ioctl.h>
30 #include <sys/socket.h>
31 #include <sys/stat.h>
32 #include <sys/time.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <sys/mman.h>
36 #include <linux/mman.h>
37 
38 #include "liburing.h"
39 #include "helpers.h"
40 
41 #define MAX_MSG	128
42 
43 #define HOST	"127.0.0.1"
44 #define HOSTV6	"::1"
45 
46 #define MAX_IOV 32
47 #define CORK_REQS 5
48 #define RX_TAG 10000
49 #define BUFFER_OFFSET 41
50 
51 #ifndef ARRAY_SIZE
52 	#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0]))
53 #endif
54 
55 enum {
56 	BUF_T_NORMAL,
57 	BUF_T_SMALL,
58 	BUF_T_NONALIGNED,
59 	BUF_T_LARGE,
60 	BUF_T_HUGETLB,
61 
62 	__BUF_NR,
63 };
64 
65 /* 32MB, should be enough to trigger a short send */
66 #define LARGE_BUF_SIZE		(1U << 25)
67 
68 static size_t page_sz;
69 static char *tx_buffer, *rx_buffer;
70 static struct iovec buffers_iov[__BUF_NR];
71 
72 static bool has_sendzc;
73 static bool has_sendmsg;
74 static bool hit_enomem;
75 
probe_zc_support(void)76 static int probe_zc_support(void)
77 {
78 	struct io_uring ring;
79 	struct io_uring_probe *p;
80 	int ret;
81 
82 	has_sendzc = has_sendmsg = false;
83 
84 	ret = io_uring_queue_init(1, &ring, 0);
85 	if (ret)
86 		return -1;
87 
88 	p = t_calloc(1, sizeof(*p) + 256 * sizeof(struct io_uring_probe_op));
89 	if (!p)
90 		return -1;
91 
92 	ret = io_uring_register_probe(&ring, p, 256);
93 	if (ret)
94 		return -1;
95 
96 	has_sendzc = p->ops_len > IORING_OP_SEND_ZC;
97 	has_sendmsg = p->ops_len > IORING_OP_SENDMSG_ZC;
98 	io_uring_queue_exit(&ring);
99 	free(p);
100 	return 0;
101 }
102 
check_cq_empty(struct io_uring * ring)103 static bool check_cq_empty(struct io_uring *ring)
104 {
105 	struct io_uring_cqe *cqe = NULL;
106 	int ret;
107 
108 	ret = io_uring_peek_cqe(ring, &cqe); /* nothing should be there */
109 	return ret == -EAGAIN;
110 }
111 
test_basic_send(struct io_uring * ring,int sock_tx,int sock_rx)112 static int test_basic_send(struct io_uring *ring, int sock_tx, int sock_rx)
113 {
114 	struct io_uring_sqe *sqe;
115 	struct io_uring_cqe *cqe;
116 	int msg_flags = 0;
117 	unsigned zc_flags = 0;
118 	int payload_size = 100;
119 	int ret;
120 
121 	sqe = io_uring_get_sqe(ring);
122 	io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
123 			      msg_flags, zc_flags);
124 	sqe->user_data = 1;
125 
126 	ret = io_uring_submit(ring);
127 	assert(ret == 1);
128 
129 	ret = io_uring_wait_cqe(ring, &cqe);
130 	assert(!ret && cqe->user_data == 1);
131 	if (cqe->res != payload_size) {
132 		fprintf(stderr, "send failed %i\n", cqe->res);
133 		return T_EXIT_FAIL;
134 	}
135 
136 	assert(cqe->flags & IORING_CQE_F_MORE);
137 	io_uring_cqe_seen(ring, cqe);
138 
139 	ret = io_uring_wait_cqe(ring, &cqe);
140 	assert(!ret);
141 	assert(cqe->user_data == 1);
142 	assert(cqe->flags & IORING_CQE_F_NOTIF);
143 	assert(!(cqe->flags & IORING_CQE_F_MORE));
144 	io_uring_cqe_seen(ring, cqe);
145 	assert(check_cq_empty(ring));
146 
147 	ret = recv(sock_rx, rx_buffer, payload_size, MSG_TRUNC);
148 	assert(ret == payload_size);
149 	return T_EXIT_PASS;
150 }
151 
test_send_faults_check(struct io_uring * ring,int expected)152 static int test_send_faults_check(struct io_uring *ring, int expected)
153 {
154 	struct io_uring_cqe *cqe;
155 	int ret, nr_cqes = 0;
156 	bool more = true;
157 
158 	while (more) {
159 		nr_cqes++;
160 		ret = io_uring_wait_cqe(ring, &cqe);
161 		assert(!ret);
162 		assert(cqe->user_data == 1);
163 
164 		if (nr_cqes == 1 && (cqe->flags & IORING_CQE_F_NOTIF)) {
165 			fprintf(stderr, "test_send_faults_check notif came first\n");
166 			return -1;
167 		}
168 
169 		if (!(cqe->flags & IORING_CQE_F_NOTIF)) {
170 			if (cqe->res != expected) {
171 				fprintf(stderr, "invalid cqe res %i vs expected %i, "
172 					"user_data %i\n",
173 					cqe->res, expected, (int)cqe->user_data);
174 				return -1;
175 			}
176 		} else {
177 			if (cqe->res != 0 || cqe->flags != IORING_CQE_F_NOTIF) {
178 				fprintf(stderr, "invalid notif cqe %i %i\n",
179 					cqe->res, cqe->flags);
180 				return -1;
181 			}
182 		}
183 
184 		more = cqe->flags & IORING_CQE_F_MORE;
185 		io_uring_cqe_seen(ring, cqe);
186 	}
187 
188 	if (nr_cqes > 2) {
189 		fprintf(stderr, "test_send_faults_check() too many CQEs %i\n",
190 				nr_cqes);
191 		return -1;
192 	}
193 	assert(check_cq_empty(ring));
194 	return 0;
195 }
196 
test_send_faults(int sock_tx,int sock_rx)197 static int test_send_faults(int sock_tx, int sock_rx)
198 {
199 	struct io_uring_sqe *sqe;
200 	int msg_flags = 0;
201 	unsigned zc_flags = 0;
202 	int ret, payload_size = 100;
203 	struct io_uring ring;
204 
205 	ret = io_uring_queue_init(32, &ring, 0);
206 	if (ret) {
207 		fprintf(stderr, "queue init failed: %d\n", ret);
208 		return -1;
209 	}
210 
211 	/* invalid buffer */
212 	sqe = io_uring_get_sqe(&ring);
213 	io_uring_prep_send_zc(sqe, sock_tx, (void *)1UL, payload_size,
214 			      msg_flags, zc_flags);
215 	sqe->user_data = 1;
216 	ret = io_uring_submit(&ring);
217 	assert(ret == 1);
218 
219 	ret = test_send_faults_check(&ring, -EFAULT);
220 	if (ret) {
221 		fprintf(stderr, "test_send_faults with invalid buf failed\n");
222 		return -1;
223 	}
224 
225 	/* invalid address */
226 	sqe = io_uring_get_sqe(&ring);
227 	io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
228 			      msg_flags, zc_flags);
229 	io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)1UL,
230 				    sizeof(struct sockaddr_in6));
231 	sqe->user_data = 1;
232 	ret = io_uring_submit(&ring);
233 	assert(ret == 1);
234 
235 	ret = test_send_faults_check(&ring, -EFAULT);
236 	if (ret) {
237 		fprintf(stderr, "test_send_faults with invalid addr failed\n");
238 		return -1;
239 	}
240 
241 	/* invalid send/recv flags */
242 	sqe = io_uring_get_sqe(&ring);
243 	io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, payload_size,
244 			      msg_flags, ~0U);
245 	sqe->user_data = 1;
246 	ret = io_uring_submit(&ring);
247 	assert(ret == 1);
248 
249 	ret = test_send_faults_check(&ring, -EINVAL);
250 	if (ret) {
251 		fprintf(stderr, "test_send_faults with invalid flags failed\n");
252 		return -1;
253 	}
254 
255 	return T_EXIT_PASS;
256 }
257 
create_socketpair_ip(struct sockaddr_storage * addr,int * sock_client,int * sock_server,bool ipv6,bool client_connect,bool msg_zc,bool tcp)258 static int create_socketpair_ip(struct sockaddr_storage *addr,
259 				int *sock_client, int *sock_server,
260 				bool ipv6, bool client_connect,
261 				bool msg_zc, bool tcp)
262 {
263 	socklen_t addr_size;
264 	int family, sock, listen_sock = -1;
265 	int ret;
266 
267 	memset(addr, 0, sizeof(*addr));
268 	if (ipv6) {
269 		struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr;
270 
271 		family = AF_INET6;
272 		saddr->sin6_family = family;
273 		saddr->sin6_port = htons(0);
274 		addr_size = sizeof(*saddr);
275 	} else {
276 		struct sockaddr_in *saddr = (struct sockaddr_in *)addr;
277 
278 		family = AF_INET;
279 		saddr->sin_family = family;
280 		saddr->sin_port = htons(0);
281 		saddr->sin_addr.s_addr = htonl(INADDR_ANY);
282 		addr_size = sizeof(*saddr);
283 	}
284 
285 	/* server sock setup */
286 	if (tcp) {
287 		sock = listen_sock = socket(family, SOCK_STREAM, IPPROTO_TCP);
288 	} else {
289 		sock = *sock_server = socket(family, SOCK_DGRAM, 0);
290 	}
291 	if (sock < 0) {
292 		perror("socket");
293 		return 1;
294 	}
295 
296 	ret = bind(sock, (struct sockaddr *)addr, addr_size);
297 	if (ret < 0) {
298 		perror("bind");
299 		return 1;
300 	}
301 
302 	ret = getsockname(sock, (struct sockaddr *)addr, &addr_size);
303 	if (ret < 0) {
304 		fprintf(stderr, "getsockname failed %i\n", errno);
305 		return 1;
306 	}
307 
308 	if (tcp) {
309 		ret = listen(sock, 128);
310 		assert(ret != -1);
311 	}
312 
313 	if (ipv6) {
314 		struct sockaddr_in6 *saddr = (struct sockaddr_in6 *)addr;
315 
316 		inet_pton(AF_INET6, HOSTV6, &(saddr->sin6_addr));
317 	} else {
318 		struct sockaddr_in *saddr = (struct sockaddr_in *)addr;
319 
320 		inet_pton(AF_INET, HOST, &saddr->sin_addr);
321 	}
322 
323 	/* client sock setup */
324 	if (tcp) {
325 		*sock_client = socket(family, SOCK_STREAM, IPPROTO_TCP);
326 		assert(client_connect);
327 	} else {
328 		*sock_client = socket(family, SOCK_DGRAM, 0);
329 	}
330 	if (*sock_client < 0) {
331 		perror("socket");
332 		return 1;
333 	}
334 	if (client_connect) {
335 		ret = connect(*sock_client, (struct sockaddr *)addr, addr_size);
336 		if (ret < 0) {
337 			perror("connect");
338 			return 1;
339 		}
340 	}
341 	if (msg_zc) {
342 #ifdef SO_ZEROCOPY
343 		int val = 1;
344 
345 		/*
346 		 * NOTE: apps must not set SO_ZEROCOPY when using io_uring zc.
347 		 * It's only here to test interactions with MSG_ZEROCOPY.
348 		 */
349 		if (setsockopt(*sock_client, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val))) {
350 			perror("setsockopt zc");
351 			return 1;
352 		}
353 #else
354 		fprintf(stderr, "no SO_ZEROCOPY\n");
355 		return 1;
356 #endif
357 	}
358 	if (tcp) {
359 		*sock_server = accept(listen_sock, NULL, NULL);
360 		if (!*sock_server) {
361 			fprintf(stderr, "can't accept\n");
362 			return 1;
363 		}
364 		close(listen_sock);
365 	}
366 	return 0;
367 }
368 
369 struct send_conf {
370 	bool fixed_buf;
371 	bool mix_register;
372 	bool cork;
373 	bool force_async;
374 	bool use_sendmsg;
375 	bool tcp;
376 	bool zc;
377 	bool iovec;
378 	bool long_iovec;
379 	bool poll_first;
380 	int buf_index;
381 	struct sockaddr_storage *addr;
382 };
383 
do_test_inet_send(struct io_uring * ring,int sock_client,int sock_server,struct send_conf * conf)384 static int do_test_inet_send(struct io_uring *ring, int sock_client, int sock_server,
385 			     struct send_conf *conf)
386 {
387 	struct iovec iov[MAX_IOV];
388 	struct msghdr msghdr[CORK_REQS];
389 	const unsigned zc_flags = 0;
390 	struct io_uring_sqe *sqe;
391 	struct io_uring_cqe *cqe;
392 	int nr_reqs = conf->cork ? CORK_REQS : 1;
393 	int i, ret, nr_cqes, addr_len = 0;
394 	size_t send_size = buffers_iov[conf->buf_index].iov_len;
395 	size_t chunk_size = send_size / nr_reqs;
396 	size_t chunk_size_last = send_size - chunk_size * (nr_reqs - 1);
397 	char *buf = buffers_iov[conf->buf_index].iov_base;
398 
399 	assert(MAX_IOV >= CORK_REQS);
400 
401 	if (conf->addr) {
402 		sa_family_t fam = ((struct sockaddr_in *)conf->addr)->sin_family;
403 
404 		addr_len = (fam == AF_INET) ? sizeof(struct sockaddr_in) :
405 					      sizeof(struct sockaddr_in6);
406 	}
407 
408 	memset(rx_buffer, 0, send_size);
409 
410 	for (i = 0; i < nr_reqs; i++) {
411 		bool real_fixed_buf = conf->fixed_buf;
412 		size_t cur_size = chunk_size;
413 		int msg_flags = MSG_WAITALL;
414 
415 		if (conf->mix_register)
416 			real_fixed_buf = rand() & 1;
417 
418 		if (i != nr_reqs - 1)
419 			msg_flags |= MSG_MORE;
420 		else
421 			cur_size = chunk_size_last;
422 
423 		sqe = io_uring_get_sqe(ring);
424 
425 		if (!conf->use_sendmsg) {
426 			if (conf->zc) {
427 				io_uring_prep_send_zc(sqe, sock_client, buf + i * chunk_size,
428 						      cur_size, msg_flags, zc_flags);
429 			} else {
430 				io_uring_prep_send(sqe, sock_client, buf + i * chunk_size,
431 						      cur_size, msg_flags);
432 			}
433 
434 			if (real_fixed_buf) {
435 				sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
436 				sqe->buf_index = conf->buf_index;
437 			}
438 			if (conf->addr)
439 				io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)conf->addr,
440 							    addr_len);
441 		} else {
442 			struct iovec *io;
443 			int iov_len;
444 
445 			if (conf->zc)
446 				io_uring_prep_sendmsg_zc(sqe, sock_client, &msghdr[i], msg_flags);
447 			else
448 				io_uring_prep_sendmsg(sqe, sock_client, &msghdr[i], msg_flags);
449 
450 			if (!conf->iovec) {
451 				io = &iov[i];
452 				iov_len = 1;
453 				iov[i].iov_len = cur_size;
454 				iov[i].iov_base = buf + i * chunk_size;
455 			} else {
456 				char *it = buf;
457 				int j;
458 
459 				assert(nr_reqs == 1);
460 				iov_len = conf->long_iovec ? MAX_IOV : 4;
461 				io = iov;
462 
463 				for (j = 0; j < iov_len; j++)
464 					io[j].iov_len = 1;
465 				/* first want to be easily advanced */
466 				io[0].iov_base = it;
467 				it += io[0].iov_len;
468 				/* this should cause retry */
469 				io[1].iov_len = chunk_size - iov_len + 1;
470 				io[1].iov_base = it;
471 				it += io[1].iov_len;
472 				/* fill the rest */
473 				for (j = 2; j < iov_len; j++) {
474 					io[j].iov_base = it;
475 					it += io[j].iov_len;
476 				}
477 			}
478 
479 			memset(&msghdr[i], 0, sizeof(msghdr[i]));
480 			msghdr[i].msg_iov = io;
481 			msghdr[i].msg_iovlen = iov_len;
482 			if (conf->addr) {
483 				msghdr[i].msg_name = conf->addr;
484 				msghdr[i].msg_namelen = addr_len;
485 			}
486 		}
487 		sqe->user_data = i;
488 		if (conf->force_async)
489 			sqe->flags |= IOSQE_ASYNC;
490 		if (conf->poll_first)
491 			sqe->ioprio |= IORING_RECVSEND_POLL_FIRST;
492 		if (i != nr_reqs - 1)
493 			sqe->flags |= IOSQE_IO_LINK;
494 	}
495 
496 	sqe = io_uring_get_sqe(ring);
497 	io_uring_prep_recv(sqe, sock_server, rx_buffer, send_size, MSG_WAITALL);
498 	sqe->user_data = RX_TAG;
499 
500 	ret = io_uring_submit(ring);
501 	if (ret != nr_reqs + 1) {
502 		fprintf(stderr, "submit failed, got %i expected %i\n", ret, nr_reqs);
503 		return 1;
504 	}
505 
506 	nr_cqes = nr_reqs + 1;
507 	for (i = 0; i < nr_cqes; i++) {
508 		int expected = chunk_size;
509 
510 		ret = io_uring_wait_cqe(ring, &cqe);
511 		if (ret) {
512 			fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
513 			return 1;
514 		}
515 		if (cqe->user_data == RX_TAG) {
516 			if (cqe->res != send_size) {
517 				fprintf(stderr, "rx failed res: %i, expected %i\n",
518 						cqe->res, (int)send_size);
519 				return 1;
520 			}
521 			io_uring_cqe_seen(ring, cqe);
522 			continue;
523 		}
524 		if ((cqe->flags & IORING_CQE_F_MORE) && (cqe->flags & IORING_CQE_F_NOTIF)) {
525 			fprintf(stderr, "unexpected cflags %i res %i\n",
526 					cqe->flags, cqe->res);
527 			return 1;
528 		}
529 		if (cqe->user_data >= nr_reqs) {
530 			fprintf(stderr, "invalid user_data %lu\n",
531 					(unsigned long)cqe->user_data);
532 			return 1;
533 		}
534 		if (!(cqe->flags & IORING_CQE_F_NOTIF)) {
535 			if (cqe->flags & IORING_CQE_F_MORE)
536 				nr_cqes++;
537 			if (cqe->user_data == nr_reqs - 1)
538 				expected = chunk_size_last;
539 			if (cqe->res != expected) {
540 				if (cqe->res == -ENOMEM) {
541 					if (!hit_enomem) {
542 						fprintf(stderr, "Hit -ENOMEM. "
543 							"Increase ulimit -l "
544 							"limit for a complete "
545 							"test run. Skipping "
546 							"parts.\n");
547 						hit_enomem = 1;
548 					}
549 					return 0;
550 				}
551 				fprintf(stderr, "invalid cqe->res %d expected %d\n",
552 						 cqe->res, expected);
553 				return 1;
554 			}
555 		}
556 		io_uring_cqe_seen(ring, cqe);
557 	}
558 
559 	for (i = 0; i < send_size; i++) {
560 		if (buf[i] != rx_buffer[i]) {
561 			fprintf(stderr, "botched data, first mismated byte %i, "
562 				"%u vs %u\n", i, buf[i], rx_buffer[i]);
563 			return 1;
564 		}
565 	}
566 	return 0;
567 }
568 
test_inet_send(struct io_uring * ring)569 static int test_inet_send(struct io_uring *ring)
570 {
571 	struct send_conf conf;
572 	struct sockaddr_storage addr;
573 	int sock_client = -1, sock_server = -1;
574 	int ret, j, i;
575 	int buf_index;
576 
577 	for (j = 0; j < 32; j++) {
578 		bool ipv6 = j & 1;
579 		bool client_connect = j & 2;
580 		bool msg_zc_set = j & 4;
581 		bool tcp = j & 8;
582 		bool swap_sockets = j & 16;
583 
584 		if (tcp && !client_connect)
585 			continue;
586 		if (swap_sockets && !tcp)
587 			continue;
588 #ifndef SO_ZEROCOPY
589 		if (msg_zc_set)
590 			continue;
591 #endif
592 		ret = create_socketpair_ip(&addr, &sock_client, &sock_server, ipv6,
593 				 client_connect, msg_zc_set, tcp);
594 		if (ret) {
595 			fprintf(stderr, "sock prep failed %d\n", ret);
596 			return 1;
597 		}
598 		if (swap_sockets) {
599 			int tmp_sock = sock_client;
600 
601 			sock_client = sock_server;
602 			sock_server = tmp_sock;
603 		}
604 
605 		for (i = 0; i < 1024; i++) {
606 			bool regbuf;
607 
608 			conf.use_sendmsg = i & 1;
609 			conf.poll_first = i & 2;
610 			conf.fixed_buf = i & 4;
611 			conf.addr = (i & 8) ? &addr : NULL;
612 			conf.cork = i & 16;
613 			conf.mix_register = i & 32;
614 			conf.force_async = i & 64;
615 			conf.zc = i & 128;
616 			conf.iovec = i & 256;
617 			conf.long_iovec = i & 512;
618 			conf.tcp = tcp;
619 			regbuf = conf.mix_register || conf.fixed_buf;
620 
621 			if (conf.iovec && (!conf.use_sendmsg || regbuf || conf.cork))
622 				continue;
623 			if (!conf.zc) {
624 				if (regbuf)
625 					continue;
626 				/*
627 				* Non zerocopy send w/ addr was added together with sendmsg_zc,
628 				* skip if we the kernel doesn't support it.
629 				*/
630 				if (conf.addr && !has_sendmsg)
631 					continue;
632 			}
633 			if (tcp && (conf.cork || conf.addr))
634 				continue;
635 			if (conf.mix_register && (!conf.cork || conf.fixed_buf))
636 				continue;
637 			if (!client_connect && conf.addr == NULL)
638 				continue;
639 			if (conf.use_sendmsg && (regbuf || !has_sendmsg))
640 				continue;
641 			if (msg_zc_set && !conf.zc)
642 				continue;
643 
644 			for (buf_index = 0; buf_index < ARRAY_SIZE(buffers_iov); buf_index++) {
645 				size_t len = buffers_iov[buf_index].iov_len;
646 
647 				if (!buffers_iov[buf_index].iov_base)
648 					continue;
649 				/* UDP IPv4 max datagram size is under 64K */
650 				if (!tcp && len > (1U << 15))
651 					continue;
652 
653 				conf.buf_index = buf_index;
654 				ret = do_test_inet_send(ring, sock_client, sock_server, &conf);
655 				if (ret) {
656 					fprintf(stderr, "send failed fixed buf %i, "
657 							"conn %i, addr %i, cork %i\n",
658 						conf.fixed_buf, client_connect,
659 						!!conf.addr, conf.cork);
660 					return 1;
661 				}
662 			}
663 		}
664 
665 		close(sock_client);
666 		close(sock_server);
667 	}
668 	return 0;
669 }
670 
test_async_addr(struct io_uring * ring)671 static int test_async_addr(struct io_uring *ring)
672 {
673 	struct io_uring_sqe *sqe;
674 	struct io_uring_cqe *cqe;
675 	struct sockaddr_storage addr;
676 	int sock_tx = -1, sock_rx = -1;
677 	struct __kernel_timespec ts;
678 	int ret;
679 
680 	ts.tv_sec = 1;
681 	ts.tv_nsec = 0;
682 	ret = create_socketpair_ip(&addr, &sock_tx, &sock_rx, true, false, false, false);
683 	if (ret) {
684 		fprintf(stderr, "sock prep failed %d\n", ret);
685 		return 1;
686 	}
687 
688 	sqe = io_uring_get_sqe(ring);
689 	io_uring_prep_timeout(sqe, &ts, 0, IORING_TIMEOUT_ETIME_SUCCESS);
690 	sqe->user_data = 1;
691 	sqe->flags |= IOSQE_IO_LINK;
692 
693 	sqe = io_uring_get_sqe(ring);
694 	io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, 1, 0, 0);
695 	sqe->user_data = 2;
696 	io_uring_prep_send_set_addr(sqe, (const struct sockaddr *)&addr,
697 				    sizeof(struct sockaddr_in6));
698 
699 	ret = io_uring_submit(ring);
700 	assert(ret == 2);
701 	memset(&addr, 0, sizeof(addr));
702 
703 	ret = io_uring_wait_cqe(ring, &cqe);
704 	if (ret) {
705 		fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
706 		return 1;
707 	}
708 	if (cqe->user_data != 1 || cqe->res != -ETIME) {
709 		fprintf(stderr, "invalid timeout res %i %i\n",
710 			(int)cqe->user_data, cqe->res);
711 		return 1;
712 	}
713 	io_uring_cqe_seen(ring, cqe);
714 
715 	ret = io_uring_wait_cqe(ring, &cqe);
716 	if (ret) {
717 		fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
718 		return 1;
719 	}
720 	if (cqe->user_data != 2 || cqe->res != 1) {
721 		fprintf(stderr, "invalid send %i %i\n",
722 			(int)cqe->user_data, cqe->res);
723 		return 1;
724 	}
725 	io_uring_cqe_seen(ring, cqe);
726 	ret = recv(sock_rx, rx_buffer, 1, MSG_TRUNC);
727 	assert(ret == 1);
728 
729 	ret = io_uring_wait_cqe(ring, &cqe);
730 	if (ret) {
731 		fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
732 		return 1;
733 	}
734 	assert(cqe->flags & IORING_CQE_F_NOTIF);
735 	io_uring_cqe_seen(ring, cqe);
736 
737 	close(sock_tx);
738 	close(sock_rx);
739 	return 0;
740 }
741 
test_sendzc_report(struct io_uring * ring)742 static int test_sendzc_report(struct io_uring *ring)
743 {
744 	struct io_uring_sqe *sqe;
745 	struct io_uring_cqe *cqe;
746 	struct sockaddr_storage addr;
747 	int sock_tx, sock_rx;
748 	int ret;
749 
750 	ret = create_socketpair_ip(&addr, &sock_tx, &sock_rx, true, true, false, true);
751 	if (ret) {
752 		fprintf(stderr, "sock prep failed %d\n", ret);
753 		return 1;
754 	}
755 
756 	sqe = io_uring_get_sqe(ring);
757 	io_uring_prep_send_zc(sqe, sock_tx, tx_buffer, 1, 0,
758 				IORING_SEND_ZC_REPORT_USAGE);
759 	ret = io_uring_submit(ring);
760 	if (ret != 1) {
761 		fprintf(stderr, "io_uring_submit failed %i\n", ret);
762 		return 1;
763 	}
764 	ret = io_uring_wait_cqe(ring, &cqe);
765 	if (ret) {
766 		fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
767 		return 1;
768 	}
769 	if (cqe->res != 1 && cqe->res != -EINVAL) {
770 		fprintf(stderr, "sendzc report failed %u\n", cqe->res);
771 		return 1;
772 	}
773 	if (!(cqe->flags & IORING_CQE_F_MORE)) {
774 		fprintf(stderr, "expected notification %i\n", cqe->res);
775 		return 1;
776 	}
777 	io_uring_cqe_seen(ring, cqe);
778 
779 	ret = io_uring_wait_cqe(ring, &cqe);
780 	if (ret) {
781 		fprintf(stderr, "io_uring_wait_cqe failed %i\n", ret);
782 		return 1;
783 	}
784 	if (cqe->flags & IORING_CQE_F_MORE) {
785 		fprintf(stderr, "F_MORE after notification\n");
786 		return 1;
787 	}
788 	io_uring_cqe_seen(ring, cqe);
789 
790 	close(sock_tx);
791 	close(sock_rx);
792 	return 0;
793 }
794 
795 /* see also send_recv.c:test_invalid */
test_invalid_zc(int fds[2])796 static int test_invalid_zc(int fds[2])
797 {
798 	struct io_uring ring;
799 	int ret;
800 	struct io_uring_cqe *cqe;
801 	struct io_uring_sqe *sqe;
802 	bool notif = false;
803 
804 	if (!has_sendmsg)
805 		return 0;
806 
807 	ret = t_create_ring(8, &ring, 0);
808 	if (ret)
809 		return ret;
810 
811 	sqe = io_uring_get_sqe(&ring);
812 	io_uring_prep_sendmsg(sqe, fds[0], NULL, MSG_WAITALL);
813 	sqe->opcode = IORING_OP_SENDMSG_ZC;
814 	sqe->flags |= IOSQE_ASYNC;
815 
816 	ret = io_uring_submit(&ring);
817 	if (ret != 1) {
818 		fprintf(stderr, "submit failed %i\n", ret);
819 		return ret;
820 	}
821 	ret = io_uring_wait_cqe(&ring, &cqe);
822 	if (ret)
823 		return 1;
824 	if (cqe->flags & IORING_CQE_F_MORE)
825 		notif = true;
826 	io_uring_cqe_seen(&ring, cqe);
827 
828 	if (notif) {
829 		ret = io_uring_wait_cqe(&ring, &cqe);
830 		if (ret)
831 			return 1;
832 		io_uring_cqe_seen(&ring, cqe);
833 	}
834 	io_uring_queue_exit(&ring);
835 	return 0;
836 }
837 
run_basic_tests(void)838 static int run_basic_tests(void)
839 {
840 	struct sockaddr_storage addr;
841 	int ret, i, sp[2];
842 
843 	/* create TCP IPv6 pair */
844 	ret = create_socketpair_ip(&addr, &sp[0], &sp[1], true, true, false, true);
845 	if (ret) {
846 		fprintf(stderr, "sock prep failed %d\n", ret);
847 		return -1;
848 	}
849 
850 	for (i = 0; i < 2; i++) {
851 		struct io_uring ring;
852 		unsigned ring_flags = 0;
853 
854 		if (i & 1)
855 			ring_flags |= IORING_SETUP_DEFER_TASKRUN;
856 
857 		ret = io_uring_queue_init(32, &ring, ring_flags);
858 		if (ret) {
859 			if (ret == -EINVAL)
860 				continue;
861 			fprintf(stderr, "queue init failed: %d\n", ret);
862 			return -1;
863 		}
864 
865 		ret = test_basic_send(&ring, sp[0], sp[1]);
866 		if (ret) {
867 			fprintf(stderr, "test_basic_send() failed\n");
868 			return -1;
869 		}
870 
871 		ret = test_send_faults(sp[0], sp[1]);
872 		if (ret) {
873 			fprintf(stderr, "test_send_faults() failed\n");
874 			return -1;
875 		}
876 
877 		ret = test_invalid_zc(sp);
878 		if (ret) {
879 			fprintf(stderr, "test_invalid_zc() failed\n");
880 			return -1;
881 		}
882 
883 		ret = test_async_addr(&ring);
884 		if (ret) {
885 			fprintf(stderr, "test_async_addr() failed\n");
886 			return T_EXIT_FAIL;
887 		}
888 
889 		ret = test_sendzc_report(&ring);
890 		if (ret) {
891 			fprintf(stderr, "test_sendzc_report() failed\n");
892 			return T_EXIT_FAIL;
893 		}
894 
895 		io_uring_queue_exit(&ring);
896 	}
897 
898 	close(sp[0]);
899 	close(sp[1]);
900 	return 0;
901 }
902 
main(int argc,char * argv[])903 int main(int argc, char *argv[])
904 {
905 	size_t len;
906 	int ret, i;
907 
908 	if (argc > 1)
909 		return T_EXIT_SKIP;
910 
911 	ret = probe_zc_support();
912 	if (ret) {
913 		printf("probe failed\n");
914 		return T_EXIT_FAIL;
915 	}
916 	if (!has_sendzc) {
917 		printf("no IORING_OP_SEND_ZC support, skip\n");
918 		return T_EXIT_SKIP;
919 	}
920 
921 	page_sz = sysconf(_SC_PAGESIZE);
922 
923 	len = LARGE_BUF_SIZE;
924 	tx_buffer = aligned_alloc(page_sz, len);
925 	rx_buffer = aligned_alloc(page_sz, len);
926 	if (tx_buffer && rx_buffer) {
927 		buffers_iov[BUF_T_LARGE].iov_base = tx_buffer;
928 		buffers_iov[BUF_T_LARGE].iov_len = len;
929 	} else {
930 		if (tx_buffer)
931 			free(tx_buffer);
932 		if (rx_buffer)
933 			free(rx_buffer);
934 
935 		printf("skip large buffer tests, can't alloc\n");
936 
937 		len = 2 * page_sz;
938 		tx_buffer = aligned_alloc(page_sz, len);
939 		rx_buffer = aligned_alloc(page_sz, len);
940 	}
941 	if (!tx_buffer || !rx_buffer) {
942 		fprintf(stderr, "can't allocate buffers\n");
943 		return T_EXIT_FAIL;
944 	}
945 
946 	srand((unsigned)time(NULL));
947 	for (i = 0; i < len; i++)
948 		tx_buffer[i] = i;
949 	memset(rx_buffer, 0, len);
950 
951 	buffers_iov[BUF_T_NORMAL].iov_base = tx_buffer + page_sz;
952 	buffers_iov[BUF_T_NORMAL].iov_len = page_sz;
953 	buffers_iov[BUF_T_SMALL].iov_base = tx_buffer;
954 	buffers_iov[BUF_T_SMALL].iov_len = 137;
955 	buffers_iov[BUF_T_NONALIGNED].iov_base = tx_buffer + BUFFER_OFFSET;
956 	buffers_iov[BUF_T_NONALIGNED].iov_len = 2 * page_sz - BUFFER_OFFSET - 13;
957 
958 	if (len == LARGE_BUF_SIZE) {
959 		void *huge_page;
960 		int off = page_sz + 27;
961 
962 		len = 1U << 22;
963 		huge_page = mmap(NULL, len, PROT_READ|PROT_WRITE,
964 				 MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_2MB | MAP_ANONYMOUS,
965 				 -1, 0);
966 		if (huge_page != MAP_FAILED) {
967 			buffers_iov[BUF_T_HUGETLB].iov_base = huge_page + off;
968 			buffers_iov[BUF_T_HUGETLB].iov_len = len - off;
969 		}
970 	}
971 
972 	ret = run_basic_tests();
973 	if (ret)
974 		return T_EXIT_FAIL;
975 
976 	for (i = 0; i < 2; i++) {
977 		struct io_uring ring;
978 		unsigned ring_flags = 0;
979 
980 		if (i & 1)
981 			ring_flags |= IORING_SETUP_SINGLE_ISSUER |
982 				      IORING_SETUP_DEFER_TASKRUN;
983 
984 		ret = io_uring_queue_init(32, &ring, ring_flags);
985 		if (ret) {
986 			if (ret == -EINVAL)
987 				continue;
988 			fprintf(stderr, "queue init failed: %d\n", ret);
989 			return -1;
990 		}
991 
992 		ret = t_register_buffers(&ring, buffers_iov, ARRAY_SIZE(buffers_iov));
993 		if (ret == T_SETUP_SKIP) {
994 			fprintf(stderr, "can't register bufs, skip\n");
995 			goto out;
996 		} else if (ret != T_SETUP_OK) {
997 			fprintf(stderr, "buffer registration failed %i\n", ret);
998 			return T_EXIT_FAIL;
999 		}
1000 
1001 		if (buffers_iov[BUF_T_HUGETLB].iov_base) {
1002 			buffers_iov[BUF_T_HUGETLB].iov_base += 13;
1003 			buffers_iov[BUF_T_HUGETLB].iov_len -= 26;
1004 		}
1005 		if (buffers_iov[BUF_T_LARGE].iov_base) {
1006 			buffers_iov[BUF_T_LARGE].iov_base += 13;
1007 			buffers_iov[BUF_T_LARGE].iov_len -= 26;
1008 		}
1009 
1010 		ret = test_inet_send(&ring);
1011 		if (ret) {
1012 			fprintf(stderr, "test_inet_send() failed (defer_taskrun %i)\n",
1013 					 ring_flags & IORING_SETUP_DEFER_TASKRUN);
1014 			return T_EXIT_FAIL;
1015 		}
1016 
1017 		if (buffers_iov[BUF_T_HUGETLB].iov_base) {
1018 			buffers_iov[BUF_T_HUGETLB].iov_base -= 13;
1019 			buffers_iov[BUF_T_HUGETLB].iov_len += 26;
1020 		}
1021 		if (buffers_iov[BUF_T_LARGE].iov_base) {
1022 			buffers_iov[BUF_T_LARGE].iov_base -= 13;
1023 			buffers_iov[BUF_T_LARGE].iov_len += 26;
1024 		}
1025 out:
1026 		io_uring_queue_exit(&ring);
1027 	}
1028 
1029 	return T_EXIT_PASS;
1030 }
1031