1 /* SPDX-License-Identifier: MIT */
2 /*
3 * Simple ping/pong client which can use the io_uring NAPI support.
4 *
5 * Needs to be run as root because it sets SCHED_FIFO scheduling class,
6 * but will work without that.
7 *
8 * Example:
9 *
10 * sudo examples/napi-busy-poll-client -a 192.168.2.2 -n100000 -p4444 \
11 * -b -t10 -u
12 *
13 * send and receive 100k packets, using NAPI.
14 */
15 #include <ctype.h>
16 #include <errno.h>
17 #include <float.h>
18 #include <getopt.h>
19 #include <liburing.h>
20 #include <math.h>
21 #include <sched.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <sys/types.h>
26 #include <sys/socket.h>
27 #include <time.h>
28 #include <unistd.h>
29 #include <arpa/inet.h>
30 #include <netdb.h>
31 #include <netinet/in.h>
32
33 #define MAXBUFLEN 100
34 #define PORTNOLEN 10
35 #define ADDRLEN 80
36 #define RINGSIZE 1024
37
38 #define printable(ch) (isprint((unsigned char)ch) ? ch : '#')
39
40 enum {
41 IOURING_RECV,
42 IOURING_SEND,
43 IOURING_RECVMSG,
44 IOURING_SENDMSG
45 };
46
47 struct ctx
48 {
49 struct io_uring ring;
50 union {
51 struct sockaddr_in6 saddr6;
52 struct sockaddr_in saddr;
53 };
54
55 int sockfd;
56 int buffer_len;
57 int num_pings;
58 bool napi_check;
59
60 union {
61 char buffer[MAXBUFLEN];
62 struct timespec ts;
63 };
64
65 int rtt_index;
66 double *rtt;
67 };
68
69 struct options
70 {
71 int num_pings;
72 __u32 timeout;
73
74 bool sq_poll;
75 bool defer_tw;
76 bool busy_loop;
77 bool prefer_busy_poll;
78 bool ipv6;
79
80 char port[PORTNOLEN];
81 char addr[ADDRLEN];
82 };
83
84 static struct option longopts[] =
85 {
86 {"address" , 1, NULL, 'a'},
87 {"busy" , 0, NULL, 'b'},
88 {"help" , 0, NULL, 'h'},
89 {"num_pings", 1, NULL, 'n'},
90 {"port" , 1, NULL, 'p'},
91 {"prefer" , 1, NULL, 'u'},
92 {"sqpoll" , 0, NULL, 's'},
93 {"timeout" , 1, NULL, 't'},
94 {NULL , 0, NULL, 0 }
95 };
96
printUsage(const char * name)97 static void printUsage(const char *name)
98 {
99 fprintf(stderr,
100 "Usage: %s [-l|--listen] [-a|--address ip_address] [-p|--port port-no] [-s|--sqpoll]"
101 " [-b|--busy] [-n|--num pings] [-t|--timeout busy-poll-timeout] [-u||--prefer] [-6] [-h|--help]\n"
102 "--address\n"
103 "-a : remote or local ipv6 address\n"
104 "--busy\n"
105 "-b : busy poll io_uring instead of blocking.\n"
106 "--num_pings\n"
107 "-n : number of pings\n"
108 "--port\n"
109 "-p : port\n"
110 "--sqpoll\n"
111 "-s : Configure io_uring to use SQPOLL thread\n"
112 "--timeout\n"
113 "-t : Configure NAPI busy poll timeout"
114 "--prefer\n"
115 "-u : prefer NAPI busy poll\n"
116 "-6 : use IPV6\n"
117 "--help\n"
118 "-h : Display this usage message\n\n",
119 name);
120 }
121
printError(const char * msg,int opt)122 static void printError(const char *msg, int opt)
123 {
124 if (msg && opt)
125 fprintf(stderr, "%s (-%c)\n", msg, printable(opt));
126 }
127
setProcessScheduler(void)128 static void setProcessScheduler(void)
129 {
130 struct sched_param param;
131
132 param.sched_priority = sched_get_priority_max(SCHED_FIFO);
133 if (sched_setscheduler(0, SCHED_FIFO, ¶m) < 0)
134 fprintf(stderr, "sched_setscheduler() failed: (%d) %s\n",
135 errno, strerror(errno));
136 }
137
diffTimespec(const struct timespec * time1,const struct timespec * time0)138 static double diffTimespec(const struct timespec *time1, const struct timespec *time0)
139 {
140 return (time1->tv_sec - time0->tv_sec)
141 + (time1->tv_nsec - time0->tv_nsec) / 1000000000.0;
142 }
143
encodeUserData(char type,int fd)144 static uint64_t encodeUserData(char type, int fd)
145 {
146 return (uint32_t)fd | ((uint64_t)type << 56);
147 }
148
decodeUserData(uint64_t data,char * type,int * fd)149 static void decodeUserData(uint64_t data, char *type, int *fd)
150 {
151 *type = data >> 56;
152 *fd = data & 0xffffffffU;
153 }
154
opTypeToStr(char type)155 static const char *opTypeToStr(char type)
156 {
157 const char *res;
158
159 switch (type) {
160 case IOURING_RECV:
161 res = "IOURING_RECV";
162 break;
163 case IOURING_SEND:
164 res = "IOURING_SEND";
165 break;
166 case IOURING_RECVMSG:
167 res = "IOURING_RECVMSG";
168 break;
169 case IOURING_SENDMSG:
170 res = "IOURING_SENDMSG";
171 break;
172 default:
173 res = "Unknown";
174 }
175
176 return res;
177 }
178
reportNapi(struct ctx * ctx)179 static void reportNapi(struct ctx *ctx)
180 {
181 unsigned int napi_id = 0;
182 socklen_t len = sizeof(napi_id);
183
184 getsockopt(ctx->sockfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id, &len);
185 if (napi_id)
186 printf(" napi id: %d\n", napi_id);
187 else
188 printf(" unassigned napi id\n");
189
190 ctx->napi_check = true;
191 }
192
sendPing(struct ctx * ctx)193 static void sendPing(struct ctx *ctx)
194 {
195 struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring);
196
197 clock_gettime(CLOCK_REALTIME, (struct timespec *)ctx->buffer);
198 io_uring_prep_send(sqe, ctx->sockfd, ctx->buffer, sizeof(struct timespec), 0);
199 sqe->user_data = encodeUserData(IOURING_SEND, ctx->sockfd);
200 }
201
receivePing(struct ctx * ctx)202 static void receivePing(struct ctx *ctx)
203 {
204 struct io_uring_sqe *sqe = io_uring_get_sqe(&ctx->ring);
205
206 io_uring_prep_recv(sqe, ctx->sockfd, ctx->buffer, MAXBUFLEN, 0);
207 sqe->user_data = encodeUserData(IOURING_RECV, ctx->sockfd);
208 }
209
recordRTT(struct ctx * ctx)210 static void recordRTT(struct ctx *ctx)
211 {
212 struct timespec startTs = ctx->ts;
213
214 // Send next ping.
215 sendPing(ctx);
216
217 // Store round-trip time.
218 ctx->rtt[ctx->rtt_index] = diffTimespec(&ctx->ts, &startTs);
219 ctx->rtt_index++;
220 }
221
printStats(struct ctx * ctx)222 static void printStats(struct ctx *ctx)
223 {
224 double minRTT = DBL_MAX;
225 double maxRTT = 0.0;
226 double avgRTT = 0.0;
227 double stddevRTT = 0.0;
228
229 // Calculate min, max, avg.
230 for (int i = 0; i < ctx->rtt_index; i++) {
231 if (ctx->rtt[i] < minRTT)
232 minRTT = ctx->rtt[i];
233 if (ctx->rtt[i] > maxRTT)
234 maxRTT = ctx->rtt[i];
235
236 avgRTT += ctx->rtt[i];
237 }
238 avgRTT /= ctx->rtt_index;
239
240 // Calculate stddev.
241 for (int i = 0; i < ctx->rtt_index; i++)
242 stddevRTT += fabs(ctx->rtt[i] - avgRTT);
243 stddevRTT /= ctx->rtt_index;
244
245 fprintf(stdout, " rtt(us) min/avg/max/mdev = %.3f/%.3f/%.3f/%.3f\n",
246 minRTT * 1000000, avgRTT * 1000000, maxRTT * 1000000, stddevRTT * 1000000);
247 }
248
completion(struct ctx * ctx,struct io_uring_cqe * cqe)249 static int completion(struct ctx *ctx, struct io_uring_cqe *cqe)
250 {
251 char type;
252 int fd;
253 int res = cqe->res;
254
255 decodeUserData(cqe->user_data, &type, &fd);
256 if (res < 0) {
257 fprintf(stderr, "unexpected %s failure: (%d) %s\n",
258 opTypeToStr(type), -res, strerror(-res));
259 return -1;
260 }
261
262 switch (type) {
263 case IOURING_SEND:
264 receivePing(ctx);
265 break;
266 case IOURING_RECV:
267 if (res != sizeof(struct timespec)) {
268 fprintf(stderr, "unexpected ping reply len: %d\n", res);
269 abort();
270 }
271
272 if (!ctx->napi_check) {
273 reportNapi(ctx);
274 sendPing(ctx);
275 } else {
276 recordRTT(ctx);
277 }
278
279 --ctx->num_pings;
280 break;
281
282 default:
283 fprintf(stderr, "unexpected %s completion\n",
284 opTypeToStr(type));
285 return -1;
286 break;
287 }
288
289 return 0;
290 }
291
main(int argc,char * argv[])292 int main(int argc, char *argv[])
293 {
294 struct ctx ctx;
295 struct options opt;
296 struct __kernel_timespec *tsPtr;
297 struct __kernel_timespec ts;
298 struct io_uring_params params;
299 struct io_uring_napi napi;
300 int flag, ret, af;
301
302 memset(&opt, 0, sizeof(struct options));
303
304 // Process flags.
305 while ((flag = getopt_long(argc, argv, ":hs:bua:n:p:t:6d:", longopts, NULL)) != -1) {
306 switch (flag) {
307 case 'a':
308 strcpy(opt.addr, optarg);
309 break;
310 case 'b':
311 opt.busy_loop = true;
312 break;
313 case 'h':
314 printUsage(argv[0]);
315 exit(0);
316 break;
317 case 'n':
318 opt.num_pings = atoi(optarg) + 1;
319 break;
320 case 'p':
321 strcpy(opt.port, optarg);
322 break;
323 case 's':
324 opt.sq_poll = !!atoi(optarg);
325 break;
326 case 't':
327 opt.timeout = atoi(optarg);
328 break;
329 case 'u':
330 opt.prefer_busy_poll = true;
331 break;
332 case '6':
333 opt.ipv6 = true;
334 break;
335 case 'd':
336 opt.defer_tw = !!atoi(optarg);
337 break;
338 case ':':
339 printError("Missing argument", optopt);
340 printUsage(argv[0]);
341 exit(-1);
342 break;
343 case '?':
344 printError("Unrecognized option", optopt);
345 printUsage(argv[0]);
346 exit(-1);
347 break;
348
349 default:
350 fprintf(stderr, "Fatal: Unexpected case in CmdLineProcessor switch()\n");
351 exit(-1);
352 break;
353 }
354 }
355
356 if (strlen(opt.addr) == 0) {
357 fprintf(stderr, "address option is mandatory\n");
358 printUsage(argv[0]);
359 exit(1);
360 }
361
362 if (opt.ipv6) {
363 af = AF_INET6;
364 ctx.saddr6.sin6_port = htons(atoi(opt.port));
365 ctx.saddr6.sin6_family = AF_INET6;
366 } else {
367 af = AF_INET;
368 ctx.saddr.sin_port = htons(atoi(opt.port));
369 ctx.saddr.sin_family = AF_INET;
370 }
371
372 if (opt.ipv6)
373 ret = inet_pton(af, opt.addr, &ctx.saddr6.sin6_addr);
374 else
375 ret = inet_pton(af, opt.addr, &ctx.saddr.sin_addr);
376 if (ret <= 0) {
377 fprintf(stderr, "inet_pton error for %s\n", optarg);
378 printUsage(argv[0]);
379 exit(1);
380 }
381
382 // Connect to server.
383 fprintf(stdout, "Connecting to %s... (port=%s) to send %d pings\n", opt.addr, opt.port, opt.num_pings - 1);
384
385 if ((ctx.sockfd = socket(af, SOCK_DGRAM, 0)) < 0) {
386 fprintf(stderr, "socket() failed: (%d) %s\n", errno, strerror(errno));
387 exit(1);
388 }
389
390 if (opt.ipv6)
391 ret = connect(ctx.sockfd, (struct sockaddr *)&ctx.saddr6, sizeof(struct sockaddr_in6));
392 else
393 ret = connect(ctx.sockfd, (struct sockaddr *)&ctx.saddr, sizeof(struct sockaddr_in));
394 if (ret < 0) {
395 fprintf(stderr, "connect() failed: (%d) %s\n", errno, strerror(errno));
396 exit(1);
397 }
398
399 // Setup ring.
400 memset(¶ms, 0, sizeof(params));
401 memset(&ts, 0, sizeof(ts));
402 memset(&napi, 0, sizeof(napi));
403
404 params.flags = IORING_SETUP_SINGLE_ISSUER;
405 if (opt.defer_tw) {
406 params.flags |= IORING_SETUP_DEFER_TASKRUN;
407 } else if (opt.sq_poll) {
408 params.flags = IORING_SETUP_SQPOLL;
409 params.sq_thread_idle = 50;
410 } else {
411 params.flags |= IORING_SETUP_COOP_TASKRUN;
412 }
413
414 ret = io_uring_queue_init_params(RINGSIZE, &ctx.ring, ¶ms);
415 if (ret) {
416 fprintf(stderr, "io_uring_queue_init_params() failed: (%d) %s\n",
417 ret, strerror(-ret));
418 exit(1);
419 }
420
421 if (opt.timeout || opt.prefer_busy_poll) {
422 napi.prefer_busy_poll = opt.prefer_busy_poll;
423 napi.busy_poll_to = opt.timeout;
424
425 ret = io_uring_register_napi(&ctx.ring, &napi);
426 if (ret) {
427 fprintf(stderr, "io_uring_register_napi: %d\n", ret);
428 exit(1);
429 }
430 }
431
432 if (opt.busy_loop)
433 tsPtr = &ts;
434 else
435 tsPtr = NULL;
436
437 // Use realtime scheduler.
438 setProcessScheduler();
439
440 // Copy payload.
441 clock_gettime(CLOCK_REALTIME, &ctx.ts);
442
443 // Setup context.
444 ctx.napi_check = false;
445 ctx.buffer_len = sizeof(struct timespec);
446 ctx.num_pings = opt.num_pings;
447
448 ctx.rtt_index = 0;
449 ctx.rtt = (double *)malloc(sizeof(double) * opt.num_pings);
450 if (!ctx.rtt) {
451 fprintf(stderr, "Cannot allocate results array\n");
452 exit(1);
453 }
454
455 // Send initial message to get napi id.
456 sendPing(&ctx);
457
458 while (ctx.num_pings != 0) {
459 int res;
460 unsigned num_completed = 0;
461 unsigned head;
462 struct io_uring_cqe *cqe;
463
464 do {
465 res = io_uring_submit_and_wait_timeout(&ctx.ring, &cqe, 1, tsPtr, NULL);
466 if (res >= 0)
467 break;
468 else if (res == -ETIME)
469 continue;
470 fprintf(stderr, "submit_and_wait: %d\n", res);
471 exit(1);
472 } while (1);
473
474 io_uring_for_each_cqe(&ctx.ring, head, cqe) {
475 ++num_completed;
476 if (completion(&ctx, cqe))
477 goto out;
478 }
479
480 if (num_completed)
481 io_uring_cq_advance(&ctx.ring, num_completed);
482 }
483
484 printStats(&ctx);
485
486 out:
487 // Clean up.
488 if (opt.timeout || opt.prefer_busy_poll) {
489 ret = io_uring_unregister_napi(&ctx.ring, &napi);
490 if (ret)
491 fprintf(stderr, "io_uring_unregister_napi: %d\n", ret);
492 if (opt.timeout != napi.busy_poll_to ||
493 opt.prefer_busy_poll != napi.prefer_busy_poll) {
494 fprintf(stderr, "Expected busy poll to = %d, got %d\n",
495 opt.timeout, napi.busy_poll_to);
496 fprintf(stderr, "Expected prefer busy poll = %d, got %d\n",
497 opt.prefer_busy_poll, napi.prefer_busy_poll);
498 }
499 } else {
500 ret = io_uring_unregister_napi(&ctx.ring, NULL);
501 if (ret)
502 fprintf(stderr, "io_uring_unregister_napi: %d\n", ret);
503 }
504
505 io_uring_queue_exit(&ctx.ring);
506 free(ctx.rtt);
507 close(ctx.sockfd);
508 return 0;
509 }
510