• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Simple ping/pong client which can use the io_uring NAPI support.
4  *
5  * Needs to be run as root because it sets SCHED_FIFO scheduling class,
6  * but will work without that.
7  *
8  * Example:
9  *
10  * sudo examples/napi-busy-poll-client -a 192.168.2.2 -n100000 -p4444 \
11  *	-b -t10 -u
12  *
13  * send and receive 100k packets, using NAPI.
14  */
15 #include <ctype.h>
16 #include <errno.h>
17 #include <float.h>
18 #include <getopt.h>
19 #include <liburing.h>
20 #include <math.h>
21 #include <sched.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/types.h>
26 #include <sys/socket.h>
27 #include <time.h>
28 #include <unistd.h>
29 #include <arpa/inet.h>
30 #include <netdb.h>
31 #include <netinet/in.h>
32 
33 #define MAXBUFLEN 100
34 #define PORTNOLEN 10
35 #define ADDRLEN   80
36 #define RINGSIZE  1024
37 
38 #define printable(ch) (isprint((unsigned char)ch) ? ch : '#')
39 
40 enum {
41 	IOURING_RECV,
42 	IOURING_SEND,
43 	IOURING_RECVMSG,
44 	IOURING_SENDMSG
45 };
46 
47 struct ctx
48 {
49 	struct io_uring ring;
50 	union {
51 		struct sockaddr_in6 saddr6;
52 		struct sockaddr_in saddr;
53 	};
54 
55 	int sockfd;
56 	int buffer_len;
57 	int num_pings;
58 	bool napi_check;
59 
60 	union {
61 		char buffer[MAXBUFLEN];
62 		struct timespec ts;
63 	};
64 
65 	int rtt_index;
66 	double *rtt;
67 };
68 
69 struct options
70 {
71 	int  num_pings;
72 	__u32  timeout;
73 
74 	bool sq_poll;
75 	bool defer_tw;
76 	bool busy_loop;
77 	bool prefer_busy_poll;
78 	bool ipv6;
79 
80 	char port[PORTNOLEN];
81 	char addr[ADDRLEN];
82 };
83 
84 static struct option longopts[] =
85 {
86 	{"address"  , 1, NULL, 'a'},
87 	{"busy"     , 0, NULL, 'b'},
88 	{"help"     , 0, NULL, 'h'},
89 	{"num_pings", 1, NULL, 'n'},
90 	{"port"     , 1, NULL, 'p'},
91 	{"prefer"   , 1, NULL, 'u'},
92 	{"sqpoll"   , 0, NULL, 's'},
93 	{"timeout"  , 1, NULL, 't'},
94 	{NULL       , 0, NULL,  0 }
95 };
96 
printUsage(const char * name)97 static void printUsage(const char *name)
98 {
99 	fprintf(stderr,
100 	"Usage: %s [-l|--listen] [-a|--address ip_address] [-p|--port port-no] [-s|--sqpoll]"
101 	" [-b|--busy] [-n|--num pings] [-t|--timeout busy-poll-timeout] [-u||--prefer] [-6] [-h|--help]\n"
102 	"--address\n"
103 	"-a        : remote or local ipv6 address\n"
104 	"--busy\n"
105 	"-b        : busy poll io_uring instead of blocking.\n"
106 	"--num_pings\n"
107 	"-n        : number of pings\n"
108 	"--port\n"
109 	"-p        : port\n"
110 	"--sqpoll\n"
111 	"-s        : Configure io_uring to use SQPOLL thread\n"
112 	"--timeout\n"
113 	"-t        : Configure NAPI busy poll timeout"
114 	"--prefer\n"
115 	"-u        : prefer NAPI busy poll\n"
116 	"-6        : use IPV6\n"
117 	"--help\n"
118 	"-h        : Display this usage message\n\n",
119 	name);
120 }
121 
printError(const char * msg,int opt)122 static void printError(const char *msg, int opt)
123 {
124 	if (msg && opt)
125 		fprintf(stderr, "%s (-%c)\n", msg, printable(opt));
126 }
127 
setProcessScheduler(void)128 static void setProcessScheduler(void)
129 {
130 	struct sched_param param;
131 
132 	param.sched_priority = sched_get_priority_max(SCHED_FIFO);
133 	if (sched_setscheduler(0, SCHED_FIFO, &param) < 0)
134 		fprintf(stderr, "sched_setscheduler() failed: (%d) %s\n",
135 			errno, strerror(errno));
136 }
137 
diffTimespec(const struct timespec * time1,const struct timespec * time0)138 static double diffTimespec(const struct timespec *time1, const struct timespec *time0)
139 {
140 	return (time1->tv_sec - time0->tv_sec)
141 		+ (time1->tv_nsec - time0->tv_nsec) / 1000000000.0;
142 }
143 
encodeUserData(char type,int fd)144 static uint64_t encodeUserData(char type, int fd)
145 {
146 	return (uint32_t)fd | ((uint64_t)type << 56);
147 }
148 
decodeUserData(uint64_t data,char * type,int * fd)149 static void decodeUserData(uint64_t data, char *type, int *fd)
150 {
151 	*type = data >> 56;
152 	*fd   = data & 0xffffffffU;
153 }
154 
opTypeToStr(char type)155 static const char *opTypeToStr(char type)
156 {
157 	const char *res;
158 
159 	switch (type) {
160 	case IOURING_RECV:
161 		res = "IOURING_RECV";
162 		break;
163 	case IOURING_SEND:
164 		res = "IOURING_SEND";
165 		break;
166 	case IOURING_RECVMSG:
167 		res = "IOURING_RECVMSG";
168 		break;
169 	case IOURING_SENDMSG:
170 		res = "IOURING_SENDMSG";
171 		break;
172 	default:
173 		res = "Unknown";
174 	}
175 
176 	return res;
177 }
178 
reportNapi(struct ctx * ctx)179 static void reportNapi(struct ctx *ctx)
180 {
181 	unsigned int napi_id = 0;
182 	socklen_t len = sizeof(napi_id);
183 
184 	getsockopt(ctx->sockfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, &len);
185 	if (napi_id)
186 		printf(" napi id: %d\n", napi_id);
187 	else
188 		printf(" unassigned napi id\n");
189 
190 	ctx->napi_check = true;
191 }
192 
sendPing(struct ctx * ctx)193 static void sendPing(struct ctx *ctx)
194 {
195 	struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring);
196 
197 	clock_gettime(CLOCK_REALTIME, (struct timespec *)ctx->buffer);
198 	io_uring_prep_send(sqe, ctx->sockfd, ctx->buffer, sizeof(struct timespec), 0);
199 	sqe->user_data = encodeUserData(IOURING_SEND, ctx->sockfd);
200 }
201 
receivePing(struct ctx * ctx)202 static void receivePing(struct ctx *ctx)
203 {
204 	struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring);
205 
206 	io_uring_prep_recv(sqe, ctx->sockfd, ctx->buffer, MAXBUFLEN, 0);
207 	sqe->user_data = encodeUserData(IOURING_RECV, ctx->sockfd);
208 }
209 
recordRTT(struct ctx * ctx)210 static void recordRTT(struct ctx *ctx)
211 {
212 	struct timespec startTs = ctx->ts;
213 
214 	// Send next ping.
215 	sendPing(ctx);
216 
217 	// Store round-trip time.
218 	ctx->rtt[ctx->rtt_index] = diffTimespec(&ctx->ts, &startTs);
219 	ctx->rtt_index++;
220 }
221 
printStats(struct ctx * ctx)222 static void printStats(struct ctx *ctx)
223 {
224 	double minRTT    = DBL_MAX;
225 	double maxRTT    = 0.0;
226 	double avgRTT    = 0.0;
227 	double stddevRTT = 0.0;
228 
229 	// Calculate min, max, avg.
230 	for (int i = 0; i < ctx->rtt_index; i++) {
231 		if (ctx->rtt[i] < minRTT)
232 			minRTT = ctx->rtt[i];
233 		if (ctx->rtt[i] > maxRTT)
234 			maxRTT = ctx->rtt[i];
235 
236 		avgRTT += ctx->rtt[i];
237 	}
238 	avgRTT /= ctx->rtt_index;
239 
240 	// Calculate stddev.
241 	for (int i = 0; i < ctx->rtt_index; i++)
242 		stddevRTT += fabs(ctx->rtt[i] - avgRTT);
243 	stddevRTT /= ctx->rtt_index;
244 
245 	fprintf(stdout, " rtt(us) min/avg/max/mdev = %.3f/%.3f/%.3f/%.3f\n",
246 		minRTT * 1000000, avgRTT * 1000000, maxRTT * 1000000, stddevRTT * 1000000);
247 }
248 
completion(struct ctx * ctx,struct io_uring_cqe * cqe)249 static int completion(struct ctx *ctx, struct io_uring_cqe *cqe)
250 {
251 	char type;
252 	int  fd;
253 	int  res = cqe->res;
254 
255 	decodeUserData(cqe->user_data, &type, &fd);
256 	if (res < 0) {
257 		fprintf(stderr, "unexpected %s failure: (%d) %s\n",
258 			opTypeToStr(type), -res, strerror(-res));
259 		return -1;
260 	}
261 
262 	switch (type) {
263 	case IOURING_SEND:
264 		receivePing(ctx);
265 		break;
266 	case IOURING_RECV:
267 		if (res != sizeof(struct timespec)) {
268 			fprintf(stderr, "unexpected ping reply len: %d\n", res);
269 			abort();
270 		}
271 
272 		if (!ctx->napi_check) {
273 			reportNapi(ctx);
274 			sendPing(ctx);
275 		} else {
276 			recordRTT(ctx);
277 		}
278 
279 		--ctx->num_pings;
280 		break;
281 
282 	default:
283 		fprintf(stderr, "unexpected %s completion\n",
284 			opTypeToStr(type));
285 		return -1;
286 		break;
287 	}
288 
289 	return 0;
290 }
291 
main(int argc,char * argv[])292 int main(int argc, char *argv[])
293 {
294 	struct ctx       ctx;
295 	struct options   opt;
296 	struct __kernel_timespec *tsPtr;
297 	struct __kernel_timespec ts;
298 	struct io_uring_params params;
299 	struct io_uring_napi napi;
300 	int flag, ret, af;
301 
302 	memset(&opt, 0, sizeof(struct options));
303 
304 	// Process flags.
305 	while ((flag = getopt_long(argc, argv, ":hs:bua:n:p:t:6d:", longopts, NULL)) != -1) {
306 		switch (flag) {
307 		case 'a':
308 			strcpy(opt.addr, optarg);
309 			break;
310 		case 'b':
311 			opt.busy_loop = true;
312 			break;
313 		case 'h':
314 			printUsage(argv[0]);
315 			exit(0);
316 			break;
317 		case 'n':
318 			opt.num_pings = atoi(optarg) + 1;
319 			break;
320 		case 'p':
321 			strcpy(opt.port, optarg);
322 			break;
323 		case 's':
324 			opt.sq_poll = !!atoi(optarg);
325 			break;
326 		case 't':
327 			opt.timeout = atoi(optarg);
328 			break;
329 		case 'u':
330 			opt.prefer_busy_poll = true;
331 			break;
332 		case '6':
333 			opt.ipv6 = true;
334 			break;
335 		case 'd':
336 			opt.defer_tw = !!atoi(optarg);
337 			break;
338 		case ':':
339 			printError("Missing argument", optopt);
340 			printUsage(argv[0]);
341 			exit(-1);
342 			break;
343 		case '?':
344 			printError("Unrecognized option", optopt);
345 			printUsage(argv[0]);
346 			exit(-1);
347 			break;
348 
349 		default:
350 			fprintf(stderr, "Fatal: Unexpected case in CmdLineProcessor switch()\n");
351 			exit(-1);
352 			break;
353 		}
354 	}
355 
356 	if (strlen(opt.addr) == 0) {
357 		fprintf(stderr, "address option is mandatory\n");
358 		printUsage(argv[0]);
359 		exit(1);
360 	}
361 
362 	if (opt.ipv6) {
363 		af = AF_INET6;
364 		ctx.saddr6.sin6_port   = htons(atoi(opt.port));
365 		ctx.saddr6.sin6_family = AF_INET6;
366 	} else {
367 		af = AF_INET;
368 		ctx.saddr.sin_port   = htons(atoi(opt.port));
369 		ctx.saddr.sin_family = AF_INET;
370 	}
371 
372 	if (opt.ipv6)
373 		ret = inet_pton(af, opt.addr, &ctx.saddr6.sin6_addr);
374 	else
375 		ret = inet_pton(af, opt.addr, &ctx.saddr.sin_addr);
376 	if (ret <= 0) {
377 		fprintf(stderr, "inet_pton error for %s\n", optarg);
378 		printUsage(argv[0]);
379 		exit(1);
380 	}
381 
382 	// Connect to server.
383 	fprintf(stdout, "Connecting to %s... (port=%s) to send %d pings\n", opt.addr, opt.port, opt.num_pings - 1);
384 
385 	if ((ctx.sockfd = socket(af, SOCK_DGRAM, 0)) < 0) {
386 		fprintf(stderr, "socket() failed: (%d) %s\n", errno, strerror(errno));
387 		exit(1);
388 	}
389 
390 	if (opt.ipv6)
391 		ret = connect(ctx.sockfd, (struct sockaddr *)&ctx.saddr6, sizeof(struct sockaddr_in6));
392 	else
393 		ret = connect(ctx.sockfd, (struct sockaddr *)&ctx.saddr, sizeof(struct sockaddr_in));
394 	if (ret < 0) {
395 		fprintf(stderr, "connect() failed: (%d) %s\n", errno, strerror(errno));
396 		exit(1);
397 	}
398 
399 	// Setup ring.
400 	memset(&params, 0, sizeof(params));
401 	memset(&ts, 0, sizeof(ts));
402 	memset(&napi, 0, sizeof(napi));
403 
404 	params.flags = IORING_SETUP_SINGLE_ISSUER;
405 	if (opt.defer_tw) {
406 		params.flags |= IORING_SETUP_DEFER_TASKRUN;
407 	} else if (opt.sq_poll) {
408 		params.flags = IORING_SETUP_SQPOLL;
409 		params.sq_thread_idle = 50;
410 	} else {
411 		params.flags |= IORING_SETUP_COOP_TASKRUN;
412 	}
413 
414 	ret = io_uring_queue_init_params(RINGSIZE, &ctx.ring, &params);
415 	if (ret) {
416 		fprintf(stderr, "io_uring_queue_init_params() failed: (%d) %s\n",
417 			ret, strerror(-ret));
418 		exit(1);
419 	}
420 
421 	if (opt.timeout || opt.prefer_busy_poll) {
422 		napi.prefer_busy_poll = opt.prefer_busy_poll;
423 		napi.busy_poll_to = opt.timeout;
424 
425 		ret = io_uring_register_napi(&ctx.ring, &napi);
426 		if (ret) {
427 			fprintf(stderr, "io_uring_register_napi: %d\n", ret);
428 			exit(1);
429 		}
430 	}
431 
432 	if (opt.busy_loop)
433 		tsPtr = &ts;
434 	else
435 		tsPtr = NULL;
436 
437 	// Use realtime scheduler.
438 	setProcessScheduler();
439 
440 	// Copy payload.
441 	clock_gettime(CLOCK_REALTIME, &ctx.ts);
442 
443 	// Setup context.
444 	ctx.napi_check = false;
445 	ctx.buffer_len = sizeof(struct timespec);
446 	ctx.num_pings  = opt.num_pings;
447 
448 	ctx.rtt_index = 0;
449 	ctx.rtt = (double *)malloc(sizeof(double) * opt.num_pings);
450 	if (!ctx.rtt) {
451 		fprintf(stderr, "Cannot allocate results array\n");
452 		exit(1);
453 	}
454 
455 	// Send initial message to get napi id.
456 	sendPing(&ctx);
457 
458 	while (ctx.num_pings != 0) {
459 		int res;
460 		unsigned num_completed = 0;
461 		unsigned head;
462 		struct io_uring_cqe *cqe;
463 
464 		do {
465 			res = io_uring_submit_and_wait_timeout(&ctx.ring, &cqe, 1, tsPtr, NULL);
466 			if (res >= 0)
467 				break;
468 			else if (res == -ETIME)
469 				continue;
470 			fprintf(stderr, "submit_and_wait: %d\n", res);
471 			exit(1);
472 		} while (1);
473 
474 		io_uring_for_each_cqe(&ctx.ring, head, cqe) {
475 			++num_completed;
476 			if (completion(&ctx, cqe))
477 				goto out;
478 		}
479 
480 		if (num_completed)
481 			io_uring_cq_advance(&ctx.ring, num_completed);
482 	}
483 
484 	printStats(&ctx);
485 
486 out:
487 	// Clean up.
488 	if (opt.timeout || opt.prefer_busy_poll) {
489 		ret = io_uring_unregister_napi(&ctx.ring, &napi);
490 		if (ret)
491 			fprintf(stderr, "io_uring_unregister_napi: %d\n", ret);
492 		if (opt.timeout          != napi.busy_poll_to ||
493 		    opt.prefer_busy_poll != napi.prefer_busy_poll) {
494 			fprintf(stderr, "Expected busy poll to = %d, got %d\n",
495 				opt.timeout, napi.busy_poll_to);
496 			fprintf(stderr, "Expected prefer busy poll = %d, got %d\n",
497 				opt.prefer_busy_poll, napi.prefer_busy_poll);
498 		}
499 	} else {
500 		ret = io_uring_unregister_napi(&ctx.ring, NULL);
501 		if (ret)
502 			fprintf(stderr, "io_uring_unregister_napi: %d\n", ret);
503 	}
504 
505 	io_uring_queue_exit(&ctx.ring);
506 	free(ctx.rtt);
507 	close(ctx.sockfd);
508 	return 0;
509 }
510