1 /* SPDX-License-Identifier: MIT */
2 /*
3 * Description: run various CQ ring overflow tests
4 *
5 */
6 #include <errno.h>
7 #include <stdio.h>
8 #include <unistd.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <fcntl.h>
12 #include <assert.h>
13
14 #include "helpers.h"
15 #include "liburing.h"
16
17 #define FILE_SIZE (256 * 1024)
18 #define BS 4096
19 #define BUFFERS (FILE_SIZE / BS)
20
21 static struct iovec *vecs;
22
23 #define ENTRIES 8
24
25 /*
26 * io_uring has rare cases where CQEs are lost.
27 * This happens when there is no space in the CQ ring, and also there is no
28 * GFP_ATOMIC memory available. In reality this probably means that the process
29 * is about to be killed as many other things might start failing, but we still
30 * want to test that liburing and the kernel deal with this properly. The fault
31 * injection framework allows us to test this scenario. Unfortunately this
32 * requires some system wide changes and so we do not enable this by default.
33 * The tests in this file should work in both cases (where overflows are queued
34 * and where they are dropped) on recent kernels.
35 *
36 * In order to test dropped CQEs you should enable fault injection in the kernel
37 * config:
38 *
39 * CONFIG_FAULT_INJECTION=y
40 * CONFIG_FAILSLAB=y
41 * CONFIG_FAULT_INJECTION_DEBUG_FS=y
42 *
43 * and then run the test as follows:
44 * echo Y > /sys/kernel/debug/failslab/task-filter
45 * echo 100 > /sys/kernel/debug/failslab/probability
46 * echo 0 > /sys/kernel/debug/failslab/verbose
47 * echo 100000 > /sys/kernel/debug/failslab/times
48 * bash -c "echo 1 > /proc/self/make-it-fail && exec ./cq-overflow.t"
49 */
50
test_io(const char * file,unsigned long usecs,unsigned * drops,int fault)51 static int test_io(const char *file, unsigned long usecs, unsigned *drops,
52 int fault)
53 {
54 struct io_uring_sqe *sqe;
55 struct io_uring_cqe *cqe;
56 struct io_uring_params p;
57 unsigned reaped, total;
58 struct io_uring ring;
59 int nodrop, i, fd, ret;
60 bool cqe_dropped = false;
61
62 fd = open(file, O_RDONLY | O_DIRECT);
63 if (fd < 0) {
64 if (errno == EINVAL)
65 return T_EXIT_SKIP;
66 perror("file open");
67 return T_EXIT_FAIL;
68 }
69
70 memset(&p, 0, sizeof(p));
71 ret = io_uring_queue_init_params(ENTRIES, &ring, &p);
72 if (ret) {
73 close(fd);
74 fprintf(stderr, "ring create failed: %d\n", ret);
75 return T_EXIT_FAIL;
76 }
77 nodrop = 0;
78 if (p.features & IORING_FEAT_NODROP)
79 nodrop = 1;
80
81 total = 0;
82 for (i = 0; i < BUFFERS / 2; i++) {
83 off_t offset;
84
85 sqe = io_uring_get_sqe(&ring);
86 if (!sqe) {
87 fprintf(stderr, "sqe get failed\n");
88 goto err;
89 }
90 offset = BS * (rand() % BUFFERS);
91 if (fault && i == ENTRIES + 4) {
92 free(vecs[i].iov_base);
93 vecs[i].iov_base = NULL;
94 }
95 io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
96
97 ret = io_uring_submit(&ring);
98 if (nodrop && ret == -EBUSY) {
99 *drops = 1;
100 total = i;
101 break;
102 } else if (ret != 1) {
103 fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
104 total = i;
105 break;
106 }
107 total++;
108 }
109
110 if (*drops)
111 goto reap_it;
112
113 usleep(usecs);
114
115 for (i = total; i < BUFFERS; i++) {
116 off_t offset;
117
118 sqe = io_uring_get_sqe(&ring);
119 if (!sqe) {
120 fprintf(stderr, "sqe get failed\n");
121 goto err;
122 }
123 offset = BS * (rand() % BUFFERS);
124 io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
125
126 ret = io_uring_submit(&ring);
127 if (nodrop && ret == -EBUSY) {
128 *drops = 1;
129 break;
130 } else if (ret != 1) {
131 fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
132 break;
133 }
134 total++;
135 }
136
137 reap_it:
138 reaped = 0;
139 do {
140 if (nodrop && !cqe_dropped) {
141 /* nodrop should never lose events unless cqe_dropped */
142 if (reaped == total)
143 break;
144 } else {
145 if (reaped + *ring.cq.koverflow == total)
146 break;
147 }
148 ret = io_uring_wait_cqe(&ring, &cqe);
149 if (nodrop && ret == -EBADR) {
150 cqe_dropped = true;
151 continue;
152 } else if (ret) {
153 fprintf(stderr, "wait_cqe=%d\n", ret);
154 goto err;
155 }
156 if (cqe->res != BS) {
157 if (!(fault && cqe->res == -EFAULT)) {
158 fprintf(stderr, "cqe res %d, wanted %d\n",
159 cqe->res, BS);
160 goto err;
161 }
162 }
163 io_uring_cqe_seen(&ring, cqe);
164 reaped++;
165 } while (1);
166
167 if (!io_uring_peek_cqe(&ring, &cqe)) {
168 fprintf(stderr, "found unexpected completion\n");
169 goto err;
170 }
171
172 if (!nodrop || cqe_dropped) {
173 *drops = *ring.cq.koverflow;
174 } else if (*ring.cq.koverflow) {
175 fprintf(stderr, "Found %u overflows\n", *ring.cq.koverflow);
176 goto err;
177 }
178
179 io_uring_queue_exit(&ring);
180 close(fd);
181 return T_EXIT_PASS;
182 err:
183 if (fd != -1)
184 close(fd);
185 io_uring_queue_exit(&ring);
186 return T_EXIT_SKIP;
187 }
188
reap_events(struct io_uring * ring,unsigned nr_events,int do_wait)189 static int reap_events(struct io_uring *ring, unsigned nr_events, int do_wait)
190 {
191 struct io_uring_cqe *cqe;
192 int i, ret = 0, seq = 0;
193 unsigned int start_overflow = *ring->cq.koverflow;
194 bool dropped = false;
195
196 for (i = 0; i < nr_events; i++) {
197 if (do_wait)
198 ret = io_uring_wait_cqe(ring, &cqe);
199 else
200 ret = io_uring_peek_cqe(ring, &cqe);
201 if (do_wait && ret == -EBADR) {
202 unsigned int this_drop = *ring->cq.koverflow -
203 start_overflow;
204
205 dropped = true;
206 start_overflow = *ring->cq.koverflow;
207 assert(this_drop > 0);
208 i += (this_drop - 1);
209 continue;
210 } else if (ret) {
211 if (ret != -EAGAIN)
212 fprintf(stderr, "cqe peek failed: %d\n", ret);
213 break;
214 }
215 if (!dropped && cqe->user_data != seq) {
216 fprintf(stderr, "cqe sequence out-of-order\n");
217 fprintf(stderr, "got %d, wanted %d\n", (int) cqe->user_data,
218 seq);
219 return -EINVAL;
220 }
221 seq++;
222 io_uring_cqe_seen(ring, cqe);
223 }
224
225 return i ? i : ret;
226 }
227
228 /*
229 * Submit some NOPs and watch if the overflow is correct
230 */
test_overflow(void)231 static int test_overflow(void)
232 {
233 struct io_uring ring;
234 struct io_uring_params p;
235 struct io_uring_sqe *sqe;
236 unsigned pending;
237 int ret, i, j;
238
239 memset(&p, 0, sizeof(p));
240 ret = io_uring_queue_init_params(4, &ring, &p);
241 if (ret) {
242 fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
243 return 1;
244 }
245
246 /* submit 4x4 SQEs, should overflow the ring by 8 */
247 pending = 0;
248 for (i = 0; i < 4; i++) {
249 for (j = 0; j < 4; j++) {
250 sqe = io_uring_get_sqe(&ring);
251 if (!sqe) {
252 fprintf(stderr, "get sqe failed\n");
253 goto err;
254 }
255
256 io_uring_prep_nop(sqe);
257 sqe->user_data = (i * 4) + j;
258 }
259
260 ret = io_uring_submit(&ring);
261 if (ret == 4) {
262 pending += 4;
263 continue;
264 }
265 if (p.features & IORING_FEAT_NODROP) {
266 if (ret == -EBUSY)
267 break;
268 }
269 fprintf(stderr, "sqe submit failed: %d\n", ret);
270 goto err;
271 }
272
273 /* we should now have 8 completions ready */
274 ret = reap_events(&ring, pending, 0);
275 if (ret < 0)
276 goto err;
277
278 if (!(p.features & IORING_FEAT_NODROP)) {
279 if (*ring.cq.koverflow != 8) {
280 fprintf(stderr, "cq ring overflow %d, expected 8\n",
281 *ring.cq.koverflow);
282 goto err;
283 }
284 }
285 io_uring_queue_exit(&ring);
286 return 0;
287 err:
288 io_uring_queue_exit(&ring);
289 return 1;
290 }
291
292
submit_one_nop(struct io_uring * ring,int ud)293 static void submit_one_nop(struct io_uring *ring, int ud)
294 {
295 struct io_uring_sqe *sqe;
296 int ret;
297
298 sqe = io_uring_get_sqe(ring);
299 assert(sqe);
300 io_uring_prep_nop(sqe);
301 sqe->user_data = ud;
302 ret = io_uring_submit(ring);
303 assert(ret == 1);
304 }
305
306 /*
307 * Create an overflow condition and ensure that SQEs are still processed
308 */
test_overflow_handling(bool batch,int cqe_multiple,bool poll,bool defer)309 static int test_overflow_handling(bool batch, int cqe_multiple, bool poll,
310 bool defer)
311 {
312 struct io_uring ring;
313 struct io_uring_params p;
314 int ret, i, j, ud, cqe_count;
315 unsigned int count;
316 int const N = 8;
317 int const LOOPS = 128;
318 int const QUEUE_LENGTH = 1024;
319 int completions[N];
320 int queue[QUEUE_LENGTH];
321 int queued = 0;
322 int outstanding = 0;
323 bool cqe_dropped = false;
324
325 memset(&completions, 0, sizeof(int) * N);
326 memset(&p, 0, sizeof(p));
327 p.cq_entries = 2 * cqe_multiple;
328 p.flags |= IORING_SETUP_CQSIZE;
329
330 if (poll)
331 p.flags |= IORING_SETUP_IOPOLL;
332
333 if (defer)
334 p.flags |= IORING_SETUP_SINGLE_ISSUER |
335 IORING_SETUP_DEFER_TASKRUN;
336
337 ret = io_uring_queue_init_params(2, &ring, &p);
338 if (ret) {
339 fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
340 return 1;
341 }
342
343 assert(p.cq_entries < N);
344 /* submit N SQEs, some should overflow */
345 for (i = 0; i < N; i++) {
346 submit_one_nop(&ring, i);
347 outstanding++;
348 }
349
350 for (i = 0; i < LOOPS; i++) {
351 struct io_uring_cqe *cqes[N];
352
353 if (io_uring_cq_has_overflow(&ring)) {
354 /*
355 * Flush any overflowed CQEs and process those. Actively
356 * flush these to make sure CQEs arrive in vague order
357 * of being sent.
358 */
359 ret = io_uring_get_events(&ring);
360 if (ret != 0) {
361 fprintf(stderr,
362 "io_uring_get_events returned %d\n",
363 ret);
364 goto err;
365 }
366 } else if (!cqe_dropped) {
367 for (j = 0; j < queued; j++) {
368 submit_one_nop(&ring, queue[j]);
369 outstanding++;
370 }
371 queued = 0;
372 }
373
374 /* We have lost some random cqes, stop if no remaining. */
375 if (cqe_dropped && outstanding == *ring.cq.koverflow)
376 break;
377
378 ret = io_uring_wait_cqe(&ring, &cqes[0]);
379 if (ret == -EBADR) {
380 cqe_dropped = true;
381 fprintf(stderr, "CQE dropped\n");
382 continue;
383 } else if (ret != 0) {
384 fprintf(stderr, "io_uring_wait_cqes failed %d\n", ret);
385 goto err;
386 }
387 cqe_count = 1;
388 if (batch) {
389 ret = io_uring_peek_batch_cqe(&ring, &cqes[0], 2);
390 if (ret < 0) {
391 fprintf(stderr,
392 "io_uring_peek_batch_cqe failed %d\n",
393 ret);
394 goto err;
395 }
396 cqe_count = ret;
397 }
398 for (j = 0; j < cqe_count; j++) {
399 assert(cqes[j]->user_data < N);
400 ud = cqes[j]->user_data;
401 completions[ud]++;
402 assert(queued < QUEUE_LENGTH);
403 queue[queued++] = (int)ud;
404 }
405 io_uring_cq_advance(&ring, cqe_count);
406 outstanding -= cqe_count;
407 }
408
409 /* See if there were any drops by flushing the CQ ring *and* overflow */
410 do {
411 struct io_uring_cqe *cqe;
412
413 ret = io_uring_get_events(&ring);
414 if (ret < 0) {
415 if (ret == -EBADR) {
416 fprintf(stderr, "CQE dropped\n");
417 cqe_dropped = true;
418 break;
419 }
420 goto err;
421 }
422 if (outstanding && !io_uring_cq_ready(&ring))
423 ret = io_uring_wait_cqe_timeout(&ring, &cqe, NULL);
424
425 if (ret && ret != -ETIME) {
426 if (ret == -EBADR) {
427 fprintf(stderr, "CQE dropped\n");
428 cqe_dropped = true;
429 break;
430 }
431 fprintf(stderr, "wait_cqe_timeout = %d\n", ret);
432 goto err;
433 }
434 count = io_uring_cq_ready(&ring);
435 io_uring_cq_advance(&ring, count);
436 outstanding -= count;
437 } while (count);
438
439 io_uring_queue_exit(&ring);
440
441 /* Make sure that completions come back in the same order they were
442 * sent. If they come back unfairly then this will concentrate on a
443 * couple of indices.
444 */
445 for (i = 1; !cqe_dropped && i < N; i++) {
446 if (abs(completions[i] - completions[i - 1]) > 1) {
447 fprintf(stderr, "bad completion size %d %d\n",
448 completions[i], completions[i - 1]);
449 goto err;
450 }
451 }
452 return 0;
453 err:
454 io_uring_queue_exit(&ring);
455 return 1;
456 }
457
main(int argc,char * argv[])458 int main(int argc, char *argv[])
459 {
460 const char *fname = ".cq-overflow";
461 unsigned iters, drops;
462 unsigned long usecs;
463 int ret;
464 int i;
465 bool can_defer;
466
467 if (argc > 1)
468 return T_EXIT_SKIP;
469
470 can_defer = t_probe_defer_taskrun();
471 for (i = 0; i < 16; i++) {
472 bool batch = i & 1;
473 int mult = (i & 2) ? 1 : 2;
474 bool poll = i & 4;
475 bool defer = i & 8;
476
477 if (defer && !can_defer)
478 continue;
479
480 ret = test_overflow_handling(batch, mult, poll, defer);
481 if (ret) {
482 fprintf(stderr, "test_overflow_handling("
483 "batch=%d, mult=%d, poll=%d, defer=%d) failed\n",
484 batch, mult, poll, defer);
485 goto err;
486 }
487 }
488
489 ret = test_overflow();
490 if (ret) {
491 fprintf(stderr, "test_overflow failed\n");
492 return ret;
493 }
494
495 t_create_file(fname, FILE_SIZE);
496
497 vecs = t_create_buffers(BUFFERS, BS);
498
499 iters = 0;
500 usecs = 1000;
501 do {
502 drops = 0;
503
504 ret = test_io(fname, usecs, &drops, 0);
505 if (ret == T_EXIT_SKIP)
506 break;
507 else if (ret != T_EXIT_PASS) {
508 fprintf(stderr, "test_io nofault failed\n");
509 goto err;
510 }
511 if (drops)
512 break;
513 usecs = (usecs * 12) / 10;
514 iters++;
515 } while (iters < 40);
516
517 if (test_io(fname, usecs, &drops, 0) == T_EXIT_FAIL) {
518 fprintf(stderr, "test_io nofault failed\n");
519 goto err;
520 }
521
522 if (test_io(fname, usecs, &drops, 1) == T_EXIT_FAIL) {
523 fprintf(stderr, "test_io fault failed\n");
524 goto err;
525 }
526
527 unlink(fname);
528 if(vecs != NULL) {
529 for (i = 0; i < BUFFERS; i++)
530 free(vecs[i].iov_base);
531 }
532 free(vecs);
533 return T_EXIT_PASS;
534 err:
535 unlink(fname);
536 if(vecs != NULL) {
537 for (i = 0; i < BUFFERS; i++)
538 free(vecs[i].iov_base);
539 }
540 free(vecs);
541 return T_EXIT_FAIL;
542 }
543