1 /* SPDX-License-Identifier: MIT */
2 /*
3 * Description: run various CQ ring overflow tests
4 *
5 */
6 #include <errno.h>
7 #include <stdio.h>
8 #include <unistd.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <fcntl.h>
12 #include <assert.h>
13
14 #include "helpers.h"
15 #include "liburing.h"
16
17 #define FILE_SIZE (256 * 1024)
18 #define BS 4096
19 #define BUFFERS (FILE_SIZE / BS)
20
21 static struct iovec *vecs;
22
23 #define ENTRIES 8
24
25 /*
26 * io_uring has rare cases where CQEs are lost.
27 * This happens when there is no space in the CQ ring, and also there is no
28 * GFP_ATOMIC memory available. In reality this probably means that the process
29 * is about to be killed as many other things might start failing, but we still
30 * want to test that liburing and the kernel deal with this properly. The fault
31 * injection framework allows us to test this scenario. Unfortunately this
32 * requires some system wide changes and so we do not enable this by default.
33 * The tests in this file should work in both cases (where overflows are queued
34 * and where they are dropped) on recent kernels.
35 *
36 * In order to test dropped CQEs you should enable fault injection in the kernel
37 * config:
38 *
39 * CONFIG_FAULT_INJECTION=y
40 * CONFIG_FAILSLAB=y
41 * CONFIG_FAULT_INJECTION_DEBUG_FS=y
42 *
43 * and then run the test as follows:
44 * echo Y > /sys/kernel/debug/failslab/task-filter
45 * echo 100 > /sys/kernel/debug/failslab/probability
46 * echo 0 > /sys/kernel/debug/failslab/verbose
47 * echo 100000 > /sys/kernel/debug/failslab/times
48 * bash -c "echo 1 > /proc/self/make-it-fail && exec ./cq-overflow.t"
49 */
50
test_io(const char * file,unsigned long usecs,unsigned * drops,int fault)51 static int test_io(const char *file, unsigned long usecs, unsigned *drops,
52 int fault)
53 {
54 struct io_uring_sqe *sqe;
55 struct io_uring_cqe *cqe;
56 struct io_uring_params p;
57 unsigned reaped, total;
58 struct io_uring ring;
59 int nodrop, i, fd, ret;
60 bool cqe_dropped = false;
61
62 fd = open(file, O_RDONLY | O_DIRECT);
63 if (fd < 0) {
64 if (errno == EINVAL)
65 return T_EXIT_SKIP;
66 perror("file open");
67 return T_EXIT_FAIL;
68 }
69
70 memset(&p, 0, sizeof(p));
71 ret = io_uring_queue_init_params(ENTRIES, &ring, &p);
72 if (ret) {
73 close(fd);
74 fprintf(stderr, "ring create failed: %d\n", ret);
75 return T_EXIT_FAIL;
76 }
77 nodrop = 0;
78 if (p.features & IORING_FEAT_NODROP)
79 nodrop = 1;
80
81 total = 0;
82 for (i = 0; i < BUFFERS / 2; i++) {
83 off_t offset;
84
85 sqe = io_uring_get_sqe(&ring);
86 if (!sqe) {
87 fprintf(stderr, "sqe get failed\n");
88 goto err;
89 }
90 offset = BS * (rand() % BUFFERS);
91 if (fault && i == ENTRIES + 4)
92 vecs[i].iov_base = NULL;
93 io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
94
95 ret = io_uring_submit(&ring);
96 if (nodrop && ret == -EBUSY) {
97 *drops = 1;
98 total = i;
99 break;
100 } else if (ret != 1) {
101 fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
102 total = i;
103 break;
104 }
105 total++;
106 }
107
108 if (*drops)
109 goto reap_it;
110
111 usleep(usecs);
112
113 for (i = total; i < BUFFERS; i++) {
114 off_t offset;
115
116 sqe = io_uring_get_sqe(&ring);
117 if (!sqe) {
118 fprintf(stderr, "sqe get failed\n");
119 goto err;
120 }
121 offset = BS * (rand() % BUFFERS);
122 io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
123
124 ret = io_uring_submit(&ring);
125 if (nodrop && ret == -EBUSY) {
126 *drops = 1;
127 break;
128 } else if (ret != 1) {
129 fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
130 break;
131 }
132 total++;
133 }
134
135 reap_it:
136 reaped = 0;
137 do {
138 if (nodrop && !cqe_dropped) {
139 /* nodrop should never lose events unless cqe_dropped */
140 if (reaped == total)
141 break;
142 } else {
143 if (reaped + *ring.cq.koverflow == total)
144 break;
145 }
146 ret = io_uring_wait_cqe(&ring, &cqe);
147 if (nodrop && ret == -EBADR) {
148 cqe_dropped = true;
149 continue;
150 } else if (ret) {
151 fprintf(stderr, "wait_cqe=%d\n", ret);
152 goto err;
153 }
154 if (cqe->res != BS) {
155 if (!(fault && cqe->res == -EFAULT)) {
156 fprintf(stderr, "cqe res %d, wanted %d\n",
157 cqe->res, BS);
158 goto err;
159 }
160 }
161 io_uring_cqe_seen(&ring, cqe);
162 reaped++;
163 } while (1);
164
165 if (!io_uring_peek_cqe(&ring, &cqe)) {
166 fprintf(stderr, "found unexpected completion\n");
167 goto err;
168 }
169
170 if (!nodrop || cqe_dropped) {
171 *drops = *ring.cq.koverflow;
172 } else if (*ring.cq.koverflow) {
173 fprintf(stderr, "Found %u overflows\n", *ring.cq.koverflow);
174 goto err;
175 }
176
177 io_uring_queue_exit(&ring);
178 close(fd);
179 return T_EXIT_PASS;
180 err:
181 if (fd != -1)
182 close(fd);
183 io_uring_queue_exit(&ring);
184 return T_EXIT_SKIP;
185 }
186
reap_events(struct io_uring * ring,unsigned nr_events,int do_wait)187 static int reap_events(struct io_uring *ring, unsigned nr_events, int do_wait)
188 {
189 struct io_uring_cqe *cqe;
190 int i, ret = 0, seq = 0;
191 unsigned int start_overflow = *ring->cq.koverflow;
192 bool dropped = false;
193
194 for (i = 0; i < nr_events; i++) {
195 if (do_wait)
196 ret = io_uring_wait_cqe(ring, &cqe);
197 else
198 ret = io_uring_peek_cqe(ring, &cqe);
199 if (do_wait && ret == -EBADR) {
200 unsigned int this_drop = *ring->cq.koverflow -
201 start_overflow;
202
203 dropped = true;
204 start_overflow = *ring->cq.koverflow;
205 assert(this_drop > 0);
206 i += (this_drop - 1);
207 continue;
208 } else if (ret) {
209 if (ret != -EAGAIN)
210 fprintf(stderr, "cqe peek failed: %d\n", ret);
211 break;
212 }
213 if (!dropped && cqe->user_data != seq) {
214 fprintf(stderr, "cqe sequence out-of-order\n");
215 fprintf(stderr, "got %d, wanted %d\n", (int) cqe->user_data,
216 seq);
217 return -EINVAL;
218 }
219 seq++;
220 io_uring_cqe_seen(ring, cqe);
221 }
222
223 return i ? i : ret;
224 }
225
226 /*
227 * Submit some NOPs and watch if the overflow is correct
228 */
test_overflow(void)229 static int test_overflow(void)
230 {
231 struct io_uring ring;
232 struct io_uring_params p;
233 struct io_uring_sqe *sqe;
234 unsigned pending;
235 int ret, i, j;
236
237 memset(&p, 0, sizeof(p));
238 ret = io_uring_queue_init_params(4, &ring, &p);
239 if (ret) {
240 fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
241 return 1;
242 }
243
244 /* submit 4x4 SQEs, should overflow the ring by 8 */
245 pending = 0;
246 for (i = 0; i < 4; i++) {
247 for (j = 0; j < 4; j++) {
248 sqe = io_uring_get_sqe(&ring);
249 if (!sqe) {
250 fprintf(stderr, "get sqe failed\n");
251 goto err;
252 }
253
254 io_uring_prep_nop(sqe);
255 sqe->user_data = (i * 4) + j;
256 }
257
258 ret = io_uring_submit(&ring);
259 if (ret == 4) {
260 pending += 4;
261 continue;
262 }
263 if (p.features & IORING_FEAT_NODROP) {
264 if (ret == -EBUSY)
265 break;
266 }
267 fprintf(stderr, "sqe submit failed: %d\n", ret);
268 goto err;
269 }
270
271 /* we should now have 8 completions ready */
272 ret = reap_events(&ring, pending, 0);
273 if (ret < 0)
274 goto err;
275
276 if (!(p.features & IORING_FEAT_NODROP)) {
277 if (*ring.cq.koverflow != 8) {
278 fprintf(stderr, "cq ring overflow %d, expected 8\n",
279 *ring.cq.koverflow);
280 goto err;
281 }
282 }
283 io_uring_queue_exit(&ring);
284 return 0;
285 err:
286 io_uring_queue_exit(&ring);
287 return 1;
288 }
289
290
submit_one_nop(struct io_uring * ring,int ud)291 static void submit_one_nop(struct io_uring *ring, int ud)
292 {
293 struct io_uring_sqe *sqe;
294 int ret;
295
296 sqe = io_uring_get_sqe(ring);
297 assert(sqe);
298 io_uring_prep_nop(sqe);
299 sqe->user_data = ud;
300 ret = io_uring_submit(ring);
301 assert(ret == 1);
302 }
303
304 /*
305 * Create an overflow condition and ensure that SQEs are still processed
306 */
test_overflow_handling(bool batch,int cqe_multiple,bool poll,bool defer)307 static int test_overflow_handling(bool batch, int cqe_multiple, bool poll,
308 bool defer)
309 {
310 struct io_uring ring;
311 struct io_uring_params p;
312 int ret, i, j, ud, cqe_count;
313 unsigned int count;
314 int const N = 8;
315 int const LOOPS = 128;
316 int const QUEUE_LENGTH = 1024;
317 int completions[N];
318 int queue[QUEUE_LENGTH];
319 int queued = 0;
320 int outstanding = 0;
321 bool cqe_dropped = false;
322
323 memset(&completions, 0, sizeof(int) * N);
324 memset(&p, 0, sizeof(p));
325 p.cq_entries = 2 * cqe_multiple;
326 p.flags |= IORING_SETUP_CQSIZE;
327
328 if (poll)
329 p.flags |= IORING_SETUP_IOPOLL;
330
331 if (defer)
332 p.flags |= IORING_SETUP_SINGLE_ISSUER |
333 IORING_SETUP_DEFER_TASKRUN;
334
335 ret = io_uring_queue_init_params(2, &ring, &p);
336 if (ret) {
337 fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
338 return 1;
339 }
340
341 assert(p.cq_entries < N);
342 /* submit N SQEs, some should overflow */
343 for (i = 0; i < N; i++) {
344 submit_one_nop(&ring, i);
345 outstanding++;
346 }
347
348 for (i = 0; i < LOOPS; i++) {
349 struct io_uring_cqe *cqes[N];
350
351 if (io_uring_cq_has_overflow(&ring)) {
352 /*
353 * Flush any overflowed CQEs and process those. Actively
354 * flush these to make sure CQEs arrive in vague order
355 * of being sent.
356 */
357 ret = io_uring_get_events(&ring);
358 if (ret != 0) {
359 fprintf(stderr,
360 "io_uring_get_events returned %d\n",
361 ret);
362 goto err;
363 }
364 } else if (!cqe_dropped) {
365 for (j = 0; j < queued; j++) {
366 submit_one_nop(&ring, queue[j]);
367 outstanding++;
368 }
369 queued = 0;
370 }
371
372 /* We have lost some random cqes, stop if no remaining. */
373 if (cqe_dropped && outstanding == *ring.cq.koverflow)
374 break;
375
376 ret = io_uring_wait_cqe(&ring, &cqes[0]);
377 if (ret == -EBADR) {
378 cqe_dropped = true;
379 fprintf(stderr, "CQE dropped\n");
380 continue;
381 } else if (ret != 0) {
382 fprintf(stderr, "io_uring_wait_cqes failed %d\n", ret);
383 goto err;
384 }
385 cqe_count = 1;
386 if (batch) {
387 ret = io_uring_peek_batch_cqe(&ring, &cqes[0], 2);
388 if (ret < 0) {
389 fprintf(stderr,
390 "io_uring_peek_batch_cqe failed %d\n",
391 ret);
392 goto err;
393 }
394 cqe_count = ret;
395 }
396 for (j = 0; j < cqe_count; j++) {
397 assert(cqes[j]->user_data < N);
398 ud = cqes[j]->user_data;
399 completions[ud]++;
400 assert(queued < QUEUE_LENGTH);
401 queue[queued++] = (int)ud;
402 }
403 io_uring_cq_advance(&ring, cqe_count);
404 outstanding -= cqe_count;
405 }
406
407 /* See if there were any drops by flushing the CQ ring *and* overflow */
408 do {
409 struct io_uring_cqe *cqe;
410
411 ret = io_uring_get_events(&ring);
412 if (ret < 0) {
413 if (ret == -EBADR) {
414 fprintf(stderr, "CQE dropped\n");
415 cqe_dropped = true;
416 break;
417 }
418 goto err;
419 }
420 if (outstanding && !io_uring_cq_ready(&ring))
421 ret = io_uring_wait_cqe_timeout(&ring, &cqe, NULL);
422
423 if (ret && ret != -ETIME) {
424 if (ret == -EBADR) {
425 fprintf(stderr, "CQE dropped\n");
426 cqe_dropped = true;
427 break;
428 }
429 fprintf(stderr, "wait_cqe_timeout = %d\n", ret);
430 goto err;
431 }
432 count = io_uring_cq_ready(&ring);
433 io_uring_cq_advance(&ring, count);
434 outstanding -= count;
435 } while (count);
436
437 io_uring_queue_exit(&ring);
438
439 /* Make sure that completions come back in the same order they were
440 * sent. If they come back unfairly then this will concentrate on a
441 * couple of indices.
442 */
443 for (i = 1; !cqe_dropped && i < N; i++) {
444 if (abs(completions[i] - completions[i - 1]) > 1) {
445 fprintf(stderr, "bad completion size %d %d\n",
446 completions[i], completions[i - 1]);
447 goto err;
448 }
449 }
450 return 0;
451 err:
452 io_uring_queue_exit(&ring);
453 return 1;
454 }
455
main(int argc,char * argv[])456 int main(int argc, char *argv[])
457 {
458 const char *fname = ".cq-overflow";
459 unsigned iters, drops;
460 unsigned long usecs;
461 int ret;
462 int i;
463 bool can_defer;
464
465 if (argc > 1)
466 return T_EXIT_SKIP;
467
468 can_defer = t_probe_defer_taskrun();
469 for (i = 0; i < 16; i++) {
470 bool batch = i & 1;
471 int mult = (i & 2) ? 1 : 2;
472 bool poll = i & 4;
473 bool defer = i & 8;
474
475 if (defer && !can_defer)
476 continue;
477
478 ret = test_overflow_handling(batch, mult, poll, defer);
479 if (ret) {
480 fprintf(stderr, "test_overflow_handling("
481 "batch=%d, mult=%d, poll=%d, defer=%d) failed\n",
482 batch, mult, poll, defer);
483 goto err;
484 }
485 }
486
487 ret = test_overflow();
488 if (ret) {
489 fprintf(stderr, "test_overflow failed\n");
490 return ret;
491 }
492
493 t_create_file(fname, FILE_SIZE);
494
495 vecs = t_create_buffers(BUFFERS, BS);
496
497 iters = 0;
498 usecs = 1000;
499 do {
500 drops = 0;
501
502 ret = test_io(fname, usecs, &drops, 0);
503 if (ret == T_EXIT_SKIP)
504 break;
505 else if (ret != T_EXIT_PASS) {
506 fprintf(stderr, "test_io nofault failed\n");
507 goto err;
508 }
509 if (drops)
510 break;
511 usecs = (usecs * 12) / 10;
512 iters++;
513 } while (iters < 40);
514
515 if (test_io(fname, usecs, &drops, 0) == T_EXIT_FAIL) {
516 fprintf(stderr, "test_io nofault failed\n");
517 goto err;
518 }
519
520 if (test_io(fname, usecs, &drops, 1) == T_EXIT_FAIL) {
521 fprintf(stderr, "test_io fault failed\n");
522 goto err;
523 }
524
525 unlink(fname);
526 return T_EXIT_PASS;
527 err:
528 unlink(fname);
529 return T_EXIT_FAIL;
530 }
531