• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Description: run various CQ ring overflow tests
4  *
5  */
6 #include <errno.h>
7 #include <stdio.h>
8 #include <unistd.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <fcntl.h>
12 #include <assert.h>
13 
14 #include "helpers.h"
15 #include "liburing.h"
16 
17 #define FILE_SIZE	(256 * 1024)
18 #define BS		4096
19 #define BUFFERS		(FILE_SIZE / BS)
20 
21 static struct iovec *vecs;
22 
23 #define ENTRIES	8
24 
25 /*
26  * io_uring has rare cases where CQEs are lost.
27  * This happens when there is no space in the CQ ring, and also there is no
28  * GFP_ATOMIC memory available. In reality this probably means that the process
29  * is about to be killed as many other things might start failing, but we still
30  * want to test that liburing and the kernel deal with this properly. The fault
31  * injection framework allows us to test this scenario. Unfortunately this
32  * requires some system wide changes and so we do not enable this by default.
33  * The tests in this file should work in both cases (where overflows are queued
34  * and where they are dropped) on recent kernels.
35  *
36  * In order to test dropped CQEs you should enable fault injection in the kernel
37  * config:
38  *
39  * CONFIG_FAULT_INJECTION=y
40  * CONFIG_FAILSLAB=y
41  * CONFIG_FAULT_INJECTION_DEBUG_FS=y
42  *
43  * and then run the test as follows:
44  * echo Y > /sys/kernel/debug/failslab/task-filter
45  * echo 100 > /sys/kernel/debug/failslab/probability
46  * echo 0 > /sys/kernel/debug/failslab/verbose
47  * echo 100000 > /sys/kernel/debug/failslab/times
48  * bash -c "echo 1 > /proc/self/make-it-fail && exec ./cq-overflow.t"
49  */
50 
test_io(const char * file,unsigned long usecs,unsigned * drops,int fault)51 static int test_io(const char *file, unsigned long usecs, unsigned *drops,
52 		   int fault)
53 {
54 	struct io_uring_sqe *sqe;
55 	struct io_uring_cqe *cqe;
56 	struct io_uring_params p;
57 	unsigned reaped, total;
58 	struct io_uring ring;
59 	int nodrop, i, fd, ret;
60 	bool cqe_dropped = false;
61 
62 	fd = open(file, O_RDONLY | O_DIRECT);
63 	if (fd < 0) {
64 		if (errno == EINVAL)
65 			return T_EXIT_SKIP;
66 		perror("file open");
67 		return T_EXIT_FAIL;
68 	}
69 
70 	memset(&p, 0, sizeof(p));
71 	ret = io_uring_queue_init_params(ENTRIES, &ring, &p);
72 	if (ret) {
73 		close(fd);
74 		fprintf(stderr, "ring create failed: %d\n", ret);
75 		return T_EXIT_FAIL;
76 	}
77 	nodrop = 0;
78 	if (p.features & IORING_FEAT_NODROP)
79 		nodrop = 1;
80 
81 	total = 0;
82 	for (i = 0; i < BUFFERS / 2; i++) {
83 		off_t offset;
84 
85 		sqe = io_uring_get_sqe(&ring);
86 		if (!sqe) {
87 			fprintf(stderr, "sqe get failed\n");
88 			goto err;
89 		}
90 		offset = BS * (rand() % BUFFERS);
91 		if (fault && i == ENTRIES + 4)
92 			vecs[i].iov_base = NULL;
93 		io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
94 
95 		ret = io_uring_submit(&ring);
96 		if (nodrop && ret == -EBUSY) {
97 			*drops = 1;
98 			total = i;
99 			break;
100 		} else if (ret != 1) {
101 			fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
102 			total = i;
103 			break;
104 		}
105 		total++;
106 	}
107 
108 	if (*drops)
109 		goto reap_it;
110 
111 	usleep(usecs);
112 
113 	for (i = total; i < BUFFERS; i++) {
114 		off_t offset;
115 
116 		sqe = io_uring_get_sqe(&ring);
117 		if (!sqe) {
118 			fprintf(stderr, "sqe get failed\n");
119 			goto err;
120 		}
121 		offset = BS * (rand() % BUFFERS);
122 		io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
123 
124 		ret = io_uring_submit(&ring);
125 		if (nodrop && ret == -EBUSY) {
126 			*drops = 1;
127 			break;
128 		} else if (ret != 1) {
129 			fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
130 			break;
131 		}
132 		total++;
133 	}
134 
135 reap_it:
136 	reaped = 0;
137 	do {
138 		if (nodrop && !cqe_dropped) {
139 			/* nodrop should never lose events unless cqe_dropped */
140 			if (reaped == total)
141 				break;
142 		} else {
143 			if (reaped + *ring.cq.koverflow == total)
144 				break;
145 		}
146 		ret = io_uring_wait_cqe(&ring, &cqe);
147 		if (nodrop && ret == -EBADR) {
148 			cqe_dropped = true;
149 			continue;
150 		} else if (ret) {
151 			fprintf(stderr, "wait_cqe=%d\n", ret);
152 			goto err;
153 		}
154 		if (cqe->res != BS) {
155 			if (!(fault && cqe->res == -EFAULT)) {
156 				fprintf(stderr, "cqe res %d, wanted %d\n",
157 						cqe->res, BS);
158 				goto err;
159 			}
160 		}
161 		io_uring_cqe_seen(&ring, cqe);
162 		reaped++;
163 	} while (1);
164 
165 	if (!io_uring_peek_cqe(&ring, &cqe)) {
166 		fprintf(stderr, "found unexpected completion\n");
167 		goto err;
168 	}
169 
170 	if (!nodrop || cqe_dropped) {
171 		*drops = *ring.cq.koverflow;
172 	} else if (*ring.cq.koverflow) {
173 		fprintf(stderr, "Found %u overflows\n", *ring.cq.koverflow);
174 		goto err;
175 	}
176 
177 	io_uring_queue_exit(&ring);
178 	close(fd);
179 	return T_EXIT_PASS;
180 err:
181 	if (fd != -1)
182 		close(fd);
183 	io_uring_queue_exit(&ring);
184 	return T_EXIT_SKIP;
185 }
186 
reap_events(struct io_uring * ring,unsigned nr_events,int do_wait)187 static int reap_events(struct io_uring *ring, unsigned nr_events, int do_wait)
188 {
189 	struct io_uring_cqe *cqe;
190 	int i, ret = 0, seq = 0;
191 	unsigned int start_overflow = *ring->cq.koverflow;
192 	bool dropped = false;
193 
194 	for (i = 0; i < nr_events; i++) {
195 		if (do_wait)
196 			ret = io_uring_wait_cqe(ring, &cqe);
197 		else
198 			ret = io_uring_peek_cqe(ring, &cqe);
199 		if (do_wait && ret == -EBADR) {
200 			unsigned int this_drop = *ring->cq.koverflow -
201 				start_overflow;
202 
203 			dropped = true;
204 			start_overflow = *ring->cq.koverflow;
205 			assert(this_drop > 0);
206 			i += (this_drop - 1);
207 			continue;
208 		} else if (ret) {
209 			if (ret != -EAGAIN)
210 				fprintf(stderr, "cqe peek failed: %d\n", ret);
211 			break;
212 		}
213 		if (!dropped && cqe->user_data != seq) {
214 			fprintf(stderr, "cqe sequence out-of-order\n");
215 			fprintf(stderr, "got %d, wanted %d\n", (int) cqe->user_data,
216 					seq);
217 			return -EINVAL;
218 		}
219 		seq++;
220 		io_uring_cqe_seen(ring, cqe);
221 	}
222 
223 	return i ? i : ret;
224 }
225 
226 /*
227  * Submit some NOPs and watch if the overflow is correct
228  */
test_overflow(void)229 static int test_overflow(void)
230 {
231 	struct io_uring ring;
232 	struct io_uring_params p;
233 	struct io_uring_sqe *sqe;
234 	unsigned pending;
235 	int ret, i, j;
236 
237 	memset(&p, 0, sizeof(p));
238 	ret = io_uring_queue_init_params(4, &ring, &p);
239 	if (ret) {
240 		fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
241 		return 1;
242 	}
243 
244 	/* submit 4x4 SQEs, should overflow the ring by 8 */
245 	pending = 0;
246 	for (i = 0; i < 4; i++) {
247 		for (j = 0; j < 4; j++) {
248 			sqe = io_uring_get_sqe(&ring);
249 			if (!sqe) {
250 				fprintf(stderr, "get sqe failed\n");
251 				goto err;
252 			}
253 
254 			io_uring_prep_nop(sqe);
255 			sqe->user_data = (i * 4) + j;
256 		}
257 
258 		ret = io_uring_submit(&ring);
259 		if (ret == 4) {
260 			pending += 4;
261 			continue;
262 		}
263 		if (p.features & IORING_FEAT_NODROP) {
264 			if (ret == -EBUSY)
265 				break;
266 		}
267 		fprintf(stderr, "sqe submit failed: %d\n", ret);
268 		goto err;
269 	}
270 
271 	/* we should now have 8 completions ready */
272 	ret = reap_events(&ring, pending, 0);
273 	if (ret < 0)
274 		goto err;
275 
276 	if (!(p.features & IORING_FEAT_NODROP)) {
277 		if (*ring.cq.koverflow != 8) {
278 			fprintf(stderr, "cq ring overflow %d, expected 8\n",
279 					*ring.cq.koverflow);
280 			goto err;
281 		}
282 	}
283 	io_uring_queue_exit(&ring);
284 	return 0;
285 err:
286 	io_uring_queue_exit(&ring);
287 	return 1;
288 }
289 
290 
submit_one_nop(struct io_uring * ring,int ud)291 static void submit_one_nop(struct io_uring *ring, int ud)
292 {
293 	struct io_uring_sqe *sqe;
294 	int ret;
295 
296 	sqe = io_uring_get_sqe(ring);
297 	assert(sqe);
298 	io_uring_prep_nop(sqe);
299 	sqe->user_data = ud;
300 	ret = io_uring_submit(ring);
301 	assert(ret == 1);
302 }
303 
304 /*
305  * Create an overflow condition and ensure that SQEs are still processed
306  */
test_overflow_handling(bool batch,int cqe_multiple,bool poll,bool defer)307 static int test_overflow_handling(bool batch, int cqe_multiple, bool poll,
308 				  bool defer)
309 {
310 	struct io_uring ring;
311 	struct io_uring_params p;
312 	int ret, i, j, ud, cqe_count;
313 	unsigned int count;
314 	int const N = 8;
315 	int const LOOPS = 128;
316 	int const QUEUE_LENGTH = 1024;
317 	int completions[N];
318 	int queue[QUEUE_LENGTH];
319 	int queued = 0;
320 	int outstanding = 0;
321 	bool cqe_dropped = false;
322 
323 	memset(&completions, 0, sizeof(int) * N);
324 	memset(&p, 0, sizeof(p));
325 	p.cq_entries = 2 * cqe_multiple;
326 	p.flags |= IORING_SETUP_CQSIZE;
327 
328 	if (poll)
329 		p.flags |= IORING_SETUP_IOPOLL;
330 
331 	if (defer)
332 		p.flags |= IORING_SETUP_SINGLE_ISSUER |
333 			   IORING_SETUP_DEFER_TASKRUN;
334 
335 	ret = io_uring_queue_init_params(2, &ring, &p);
336 	if (ret) {
337 		fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
338 		return 1;
339 	}
340 
341 	assert(p.cq_entries < N);
342 	/* submit N SQEs, some should overflow */
343 	for (i = 0; i < N; i++) {
344 		submit_one_nop(&ring, i);
345 		outstanding++;
346 	}
347 
348 	for (i = 0; i < LOOPS; i++) {
349 		struct io_uring_cqe *cqes[N];
350 
351 		if (io_uring_cq_has_overflow(&ring)) {
352 			/*
353 			 * Flush any overflowed CQEs and process those. Actively
354 			 * flush these to make sure CQEs arrive in vague order
355 			 * of being sent.
356 			 */
357 			ret = io_uring_get_events(&ring);
358 			if (ret != 0) {
359 				fprintf(stderr,
360 					"io_uring_get_events returned %d\n",
361 					ret);
362 				goto err;
363 			}
364 		} else if (!cqe_dropped) {
365 			for (j = 0; j < queued; j++) {
366 				submit_one_nop(&ring, queue[j]);
367 				outstanding++;
368 			}
369 			queued = 0;
370 		}
371 
372 		/* We have lost some random cqes, stop if no remaining. */
373 		if (cqe_dropped && outstanding == *ring.cq.koverflow)
374 			break;
375 
376 		ret = io_uring_wait_cqe(&ring, &cqes[0]);
377 		if (ret == -EBADR) {
378 			cqe_dropped = true;
379 			fprintf(stderr, "CQE dropped\n");
380 			continue;
381 		} else if (ret != 0) {
382 			fprintf(stderr, "io_uring_wait_cqes failed %d\n", ret);
383 			goto err;
384 		}
385 		cqe_count = 1;
386 		if (batch) {
387 			ret = io_uring_peek_batch_cqe(&ring, &cqes[0], 2);
388 			if (ret < 0) {
389 				fprintf(stderr,
390 					"io_uring_peek_batch_cqe failed %d\n",
391 					ret);
392 				goto err;
393 			}
394 			cqe_count = ret;
395 		}
396 		for (j = 0; j < cqe_count; j++) {
397 			assert(cqes[j]->user_data < N);
398 			ud = cqes[j]->user_data;
399 			completions[ud]++;
400 			assert(queued < QUEUE_LENGTH);
401 			queue[queued++] = (int)ud;
402 		}
403 		io_uring_cq_advance(&ring, cqe_count);
404 		outstanding -= cqe_count;
405 	}
406 
407 	/* See if there were any drops by flushing the CQ ring *and* overflow */
408 	do {
409 		struct io_uring_cqe *cqe;
410 
411 		ret = io_uring_get_events(&ring);
412 		if (ret < 0) {
413 			if (ret == -EBADR) {
414 				fprintf(stderr, "CQE dropped\n");
415 				cqe_dropped = true;
416 				break;
417 			}
418 			goto err;
419 		}
420 		if (outstanding && !io_uring_cq_ready(&ring))
421 			ret = io_uring_wait_cqe_timeout(&ring, &cqe, NULL);
422 
423 		if (ret && ret != -ETIME) {
424 			if (ret == -EBADR) {
425 				fprintf(stderr, "CQE dropped\n");
426 				cqe_dropped = true;
427 				break;
428 			}
429 			fprintf(stderr, "wait_cqe_timeout = %d\n", ret);
430 			goto err;
431 		}
432 		count = io_uring_cq_ready(&ring);
433 		io_uring_cq_advance(&ring, count);
434 		outstanding -= count;
435 	} while (count);
436 
437 	io_uring_queue_exit(&ring);
438 
439 	/* Make sure that completions come back in the same order they were
440 	 * sent. If they come back unfairly then this will concentrate on a
441 	 * couple of indices.
442 	 */
443 	for (i = 1; !cqe_dropped && i < N; i++) {
444 		if (abs(completions[i] - completions[i - 1]) > 1) {
445 			fprintf(stderr, "bad completion size %d %d\n",
446 				completions[i], completions[i - 1]);
447 			goto err;
448 		}
449 	}
450 	return 0;
451 err:
452 	io_uring_queue_exit(&ring);
453 	return 1;
454 }
455 
main(int argc,char * argv[])456 int main(int argc, char *argv[])
457 {
458 	const char *fname = ".cq-overflow";
459 	unsigned iters, drops;
460 	unsigned long usecs;
461 	int ret;
462 	int i;
463 	bool can_defer;
464 
465 	if (argc > 1)
466 		return T_EXIT_SKIP;
467 
468 	can_defer = t_probe_defer_taskrun();
469 	for (i = 0; i < 16; i++) {
470 		bool batch = i & 1;
471 		int mult = (i & 2) ? 1 : 2;
472 		bool poll = i & 4;
473 		bool defer = i & 8;
474 
475 		if (defer && !can_defer)
476 			continue;
477 
478 		ret = test_overflow_handling(batch, mult, poll, defer);
479 		if (ret) {
480 			fprintf(stderr, "test_overflow_handling("
481 				"batch=%d, mult=%d, poll=%d, defer=%d) failed\n",
482 				batch, mult, poll, defer);
483 			goto err;
484 		}
485 	}
486 
487 	ret = test_overflow();
488 	if (ret) {
489 		fprintf(stderr, "test_overflow failed\n");
490 		return ret;
491 	}
492 
493 	t_create_file(fname, FILE_SIZE);
494 
495 	vecs = t_create_buffers(BUFFERS, BS);
496 
497 	iters = 0;
498 	usecs = 1000;
499 	do {
500 		drops = 0;
501 
502 		ret = test_io(fname, usecs, &drops, 0);
503 		if (ret == T_EXIT_SKIP)
504 			break;
505 		else if (ret != T_EXIT_PASS) {
506 			fprintf(stderr, "test_io nofault failed\n");
507 			goto err;
508 		}
509 		if (drops)
510 			break;
511 		usecs = (usecs * 12) / 10;
512 		iters++;
513 	} while (iters < 40);
514 
515 	if (test_io(fname, usecs, &drops, 0) == T_EXIT_FAIL) {
516 		fprintf(stderr, "test_io nofault failed\n");
517 		goto err;
518 	}
519 
520 	if (test_io(fname, usecs, &drops, 1) == T_EXIT_FAIL) {
521 		fprintf(stderr, "test_io fault failed\n");
522 		goto err;
523 	}
524 
525 	unlink(fname);
526 	return T_EXIT_PASS;
527 err:
528 	unlink(fname);
529 	return T_EXIT_FAIL;
530 }
531