• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* SPDX-License-Identifier: MIT */
2 /*
3  * Description: run various CQ ring overflow tests
4  *
5  */
6 #include <errno.h>
7 #include <stdio.h>
8 #include <unistd.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <fcntl.h>
12 #include <assert.h>
13 
14 #include "helpers.h"
15 #include "liburing.h"
16 
17 #define FILE_SIZE	(256 * 1024)
18 #define BS		4096
19 #define BUFFERS		(FILE_SIZE / BS)
20 
21 static struct iovec *vecs;
22 
23 #define ENTRIES	8
24 
25 /*
26  * io_uring has rare cases where CQEs are lost.
27  * This happens when there is no space in the CQ ring, and also there is no
28  * GFP_ATOMIC memory available. In reality this probably means that the process
29  * is about to be killed as many other things might start failing, but we still
30  * want to test that liburing and the kernel deal with this properly. The fault
31  * injection framework allows us to test this scenario. Unfortunately this
32  * requires some system wide changes and so we do not enable this by default.
33  * The tests in this file should work in both cases (where overflows are queued
34  * and where they are dropped) on recent kernels.
35  *
36  * In order to test dropped CQEs you should enable fault injection in the kernel
37  * config:
38  *
39  * CONFIG_FAULT_INJECTION=y
40  * CONFIG_FAILSLAB=y
41  * CONFIG_FAULT_INJECTION_DEBUG_FS=y
42  *
43  * and then run the test as follows:
44  * echo Y > /sys/kernel/debug/failslab/task-filter
45  * echo 100 > /sys/kernel/debug/failslab/probability
46  * echo 0 > /sys/kernel/debug/failslab/verbose
47  * echo 100000 > /sys/kernel/debug/failslab/times
48  * bash -c "echo 1 > /proc/self/make-it-fail && exec ./cq-overflow.t"
49  */
50 
test_io(const char * file,unsigned long usecs,unsigned * drops,int fault)51 static int test_io(const char *file, unsigned long usecs, unsigned *drops,
52 		   int fault)
53 {
54 	struct io_uring_sqe *sqe;
55 	struct io_uring_cqe *cqe;
56 	struct io_uring_params p;
57 	unsigned reaped, total;
58 	struct io_uring ring;
59 	int nodrop, i, fd, ret;
60 	bool cqe_dropped = false;
61 
62 	fd = open(file, O_RDONLY | O_DIRECT);
63 	if (fd < 0) {
64 		if (errno == EINVAL)
65 			return T_EXIT_SKIP;
66 		perror("file open");
67 		return T_EXIT_FAIL;
68 	}
69 
70 	memset(&p, 0, sizeof(p));
71 	ret = io_uring_queue_init_params(ENTRIES, &ring, &p);
72 	if (ret) {
73 		close(fd);
74 		fprintf(stderr, "ring create failed: %d\n", ret);
75 		return T_EXIT_FAIL;
76 	}
77 	nodrop = 0;
78 	if (p.features & IORING_FEAT_NODROP)
79 		nodrop = 1;
80 
81 	total = 0;
82 	for (i = 0; i < BUFFERS / 2; i++) {
83 		off_t offset;
84 
85 		sqe = io_uring_get_sqe(&ring);
86 		if (!sqe) {
87 			fprintf(stderr, "sqe get failed\n");
88 			goto err;
89 		}
90 		offset = BS * (rand() % BUFFERS);
91 		if (fault && i == ENTRIES + 4) {
92 			free(vecs[i].iov_base);
93 			vecs[i].iov_base = NULL;
94 		}
95 		io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
96 
97 		ret = io_uring_submit(&ring);
98 		if (nodrop && ret == -EBUSY) {
99 			*drops = 1;
100 			total = i;
101 			break;
102 		} else if (ret != 1) {
103 			fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
104 			total = i;
105 			break;
106 		}
107 		total++;
108 	}
109 
110 	if (*drops)
111 		goto reap_it;
112 
113 	usleep(usecs);
114 
115 	for (i = total; i < BUFFERS; i++) {
116 		off_t offset;
117 
118 		sqe = io_uring_get_sqe(&ring);
119 		if (!sqe) {
120 			fprintf(stderr, "sqe get failed\n");
121 			goto err;
122 		}
123 		offset = BS * (rand() % BUFFERS);
124 		io_uring_prep_readv(sqe, fd, &vecs[i], 1, offset);
125 
126 		ret = io_uring_submit(&ring);
127 		if (nodrop && ret == -EBUSY) {
128 			*drops = 1;
129 			break;
130 		} else if (ret != 1) {
131 			fprintf(stderr, "submit got %d, wanted %d\n", ret, 1);
132 			break;
133 		}
134 		total++;
135 	}
136 
137 reap_it:
138 	reaped = 0;
139 	do {
140 		if (nodrop && !cqe_dropped) {
141 			/* nodrop should never lose events unless cqe_dropped */
142 			if (reaped == total)
143 				break;
144 		} else {
145 			if (reaped + *ring.cq.koverflow == total)
146 				break;
147 		}
148 		ret = io_uring_wait_cqe(&ring, &cqe);
149 		if (nodrop && ret == -EBADR) {
150 			cqe_dropped = true;
151 			continue;
152 		} else if (ret) {
153 			fprintf(stderr, "wait_cqe=%d\n", ret);
154 			goto err;
155 		}
156 		if (cqe->res != BS) {
157 			if (!(fault && cqe->res == -EFAULT)) {
158 				fprintf(stderr, "cqe res %d, wanted %d\n",
159 						cqe->res, BS);
160 				goto err;
161 			}
162 		}
163 		io_uring_cqe_seen(&ring, cqe);
164 		reaped++;
165 	} while (1);
166 
167 	if (!io_uring_peek_cqe(&ring, &cqe)) {
168 		fprintf(stderr, "found unexpected completion\n");
169 		goto err;
170 	}
171 
172 	if (!nodrop || cqe_dropped) {
173 		*drops = *ring.cq.koverflow;
174 	} else if (*ring.cq.koverflow) {
175 		fprintf(stderr, "Found %u overflows\n", *ring.cq.koverflow);
176 		goto err;
177 	}
178 
179 	io_uring_queue_exit(&ring);
180 	close(fd);
181 	return T_EXIT_PASS;
182 err:
183 	if (fd != -1)
184 		close(fd);
185 	io_uring_queue_exit(&ring);
186 	return T_EXIT_SKIP;
187 }
188 
reap_events(struct io_uring * ring,unsigned nr_events,int do_wait)189 static int reap_events(struct io_uring *ring, unsigned nr_events, int do_wait)
190 {
191 	struct io_uring_cqe *cqe;
192 	int i, ret = 0, seq = 0;
193 	unsigned int start_overflow = *ring->cq.koverflow;
194 	bool dropped = false;
195 
196 	for (i = 0; i < nr_events; i++) {
197 		if (do_wait)
198 			ret = io_uring_wait_cqe(ring, &cqe);
199 		else
200 			ret = io_uring_peek_cqe(ring, &cqe);
201 		if (do_wait && ret == -EBADR) {
202 			unsigned int this_drop = *ring->cq.koverflow -
203 				start_overflow;
204 
205 			dropped = true;
206 			start_overflow = *ring->cq.koverflow;
207 			assert(this_drop > 0);
208 			i += (this_drop - 1);
209 			continue;
210 		} else if (ret) {
211 			if (ret != -EAGAIN)
212 				fprintf(stderr, "cqe peek failed: %d\n", ret);
213 			break;
214 		}
215 		if (!dropped && cqe->user_data != seq) {
216 			fprintf(stderr, "cqe sequence out-of-order\n");
217 			fprintf(stderr, "got %d, wanted %d\n", (int) cqe->user_data,
218 					seq);
219 			return -EINVAL;
220 		}
221 		seq++;
222 		io_uring_cqe_seen(ring, cqe);
223 	}
224 
225 	return i ? i : ret;
226 }
227 
228 /*
229  * Submit some NOPs and watch if the overflow is correct
230  */
test_overflow(void)231 static int test_overflow(void)
232 {
233 	struct io_uring ring;
234 	struct io_uring_params p;
235 	struct io_uring_sqe *sqe;
236 	unsigned pending;
237 	int ret, i, j;
238 
239 	memset(&p, 0, sizeof(p));
240 	ret = io_uring_queue_init_params(4, &ring, &p);
241 	if (ret) {
242 		fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
243 		return 1;
244 	}
245 
246 	/* submit 4x4 SQEs, should overflow the ring by 8 */
247 	pending = 0;
248 	for (i = 0; i < 4; i++) {
249 		for (j = 0; j < 4; j++) {
250 			sqe = io_uring_get_sqe(&ring);
251 			if (!sqe) {
252 				fprintf(stderr, "get sqe failed\n");
253 				goto err;
254 			}
255 
256 			io_uring_prep_nop(sqe);
257 			sqe->user_data = (i * 4) + j;
258 		}
259 
260 		ret = io_uring_submit(&ring);
261 		if (ret == 4) {
262 			pending += 4;
263 			continue;
264 		}
265 		if (p.features & IORING_FEAT_NODROP) {
266 			if (ret == -EBUSY)
267 				break;
268 		}
269 		fprintf(stderr, "sqe submit failed: %d\n", ret);
270 		goto err;
271 	}
272 
273 	/* we should now have 8 completions ready */
274 	ret = reap_events(&ring, pending, 0);
275 	if (ret < 0)
276 		goto err;
277 
278 	if (!(p.features & IORING_FEAT_NODROP)) {
279 		if (*ring.cq.koverflow != 8) {
280 			fprintf(stderr, "cq ring overflow %d, expected 8\n",
281 					*ring.cq.koverflow);
282 			goto err;
283 		}
284 	}
285 	io_uring_queue_exit(&ring);
286 	return 0;
287 err:
288 	io_uring_queue_exit(&ring);
289 	return 1;
290 }
291 
292 
submit_one_nop(struct io_uring * ring,int ud)293 static void submit_one_nop(struct io_uring *ring, int ud)
294 {
295 	struct io_uring_sqe *sqe;
296 	int ret;
297 
298 	sqe = io_uring_get_sqe(ring);
299 	assert(sqe);
300 	io_uring_prep_nop(sqe);
301 	sqe->user_data = ud;
302 	ret = io_uring_submit(ring);
303 	assert(ret == 1);
304 }
305 
306 /*
307  * Create an overflow condition and ensure that SQEs are still processed
308  */
test_overflow_handling(bool batch,int cqe_multiple,bool poll,bool defer)309 static int test_overflow_handling(bool batch, int cqe_multiple, bool poll,
310 				  bool defer)
311 {
312 	struct io_uring ring;
313 	struct io_uring_params p;
314 	int ret, i, j, ud, cqe_count;
315 	unsigned int count;
316 	int const N = 8;
317 	int const LOOPS = 128;
318 	int const QUEUE_LENGTH = 1024;
319 	int completions[N];
320 	int queue[QUEUE_LENGTH];
321 	int queued = 0;
322 	int outstanding = 0;
323 	bool cqe_dropped = false;
324 
325 	memset(&completions, 0, sizeof(int) * N);
326 	memset(&p, 0, sizeof(p));
327 	p.cq_entries = 2 * cqe_multiple;
328 	p.flags |= IORING_SETUP_CQSIZE;
329 
330 	if (poll)
331 		p.flags |= IORING_SETUP_IOPOLL;
332 
333 	if (defer)
334 		p.flags |= IORING_SETUP_SINGLE_ISSUER |
335 			   IORING_SETUP_DEFER_TASKRUN;
336 
337 	ret = io_uring_queue_init_params(2, &ring, &p);
338 	if (ret) {
339 		fprintf(stderr, "io_uring_queue_init failed %d\n", ret);
340 		return 1;
341 	}
342 
343 	assert(p.cq_entries < N);
344 	/* submit N SQEs, some should overflow */
345 	for (i = 0; i < N; i++) {
346 		submit_one_nop(&ring, i);
347 		outstanding++;
348 	}
349 
350 	for (i = 0; i < LOOPS; i++) {
351 		struct io_uring_cqe *cqes[N];
352 
353 		if (io_uring_cq_has_overflow(&ring)) {
354 			/*
355 			 * Flush any overflowed CQEs and process those. Actively
356 			 * flush these to make sure CQEs arrive in vague order
357 			 * of being sent.
358 			 */
359 			ret = io_uring_get_events(&ring);
360 			if (ret != 0) {
361 				fprintf(stderr,
362 					"io_uring_get_events returned %d\n",
363 					ret);
364 				goto err;
365 			}
366 		} else if (!cqe_dropped) {
367 			for (j = 0; j < queued; j++) {
368 				submit_one_nop(&ring, queue[j]);
369 				outstanding++;
370 			}
371 			queued = 0;
372 		}
373 
374 		/* We have lost some random cqes, stop if no remaining. */
375 		if (cqe_dropped && outstanding == *ring.cq.koverflow)
376 			break;
377 
378 		ret = io_uring_wait_cqe(&ring, &cqes[0]);
379 		if (ret == -EBADR) {
380 			cqe_dropped = true;
381 			fprintf(stderr, "CQE dropped\n");
382 			continue;
383 		} else if (ret != 0) {
384 			fprintf(stderr, "io_uring_wait_cqes failed %d\n", ret);
385 			goto err;
386 		}
387 		cqe_count = 1;
388 		if (batch) {
389 			ret = io_uring_peek_batch_cqe(&ring, &cqes[0], 2);
390 			if (ret < 0) {
391 				fprintf(stderr,
392 					"io_uring_peek_batch_cqe failed %d\n",
393 					ret);
394 				goto err;
395 			}
396 			cqe_count = ret;
397 		}
398 		for (j = 0; j < cqe_count; j++) {
399 			assert(cqes[j]->user_data < N);
400 			ud = cqes[j]->user_data;
401 			completions[ud]++;
402 			assert(queued < QUEUE_LENGTH);
403 			queue[queued++] = (int)ud;
404 		}
405 		io_uring_cq_advance(&ring, cqe_count);
406 		outstanding -= cqe_count;
407 	}
408 
409 	/* See if there were any drops by flushing the CQ ring *and* overflow */
410 	do {
411 		struct io_uring_cqe *cqe;
412 
413 		ret = io_uring_get_events(&ring);
414 		if (ret < 0) {
415 			if (ret == -EBADR) {
416 				fprintf(stderr, "CQE dropped\n");
417 				cqe_dropped = true;
418 				break;
419 			}
420 			goto err;
421 		}
422 		if (outstanding && !io_uring_cq_ready(&ring))
423 			ret = io_uring_wait_cqe_timeout(&ring, &cqe, NULL);
424 
425 		if (ret && ret != -ETIME) {
426 			if (ret == -EBADR) {
427 				fprintf(stderr, "CQE dropped\n");
428 				cqe_dropped = true;
429 				break;
430 			}
431 			fprintf(stderr, "wait_cqe_timeout = %d\n", ret);
432 			goto err;
433 		}
434 		count = io_uring_cq_ready(&ring);
435 		io_uring_cq_advance(&ring, count);
436 		outstanding -= count;
437 	} while (count);
438 
439 	io_uring_queue_exit(&ring);
440 
441 	/* Make sure that completions come back in the same order they were
442 	 * sent. If they come back unfairly then this will concentrate on a
443 	 * couple of indices.
444 	 */
445 	for (i = 1; !cqe_dropped && i < N; i++) {
446 		if (abs(completions[i] - completions[i - 1]) > 1) {
447 			fprintf(stderr, "bad completion size %d %d\n",
448 				completions[i], completions[i - 1]);
449 			goto err;
450 		}
451 	}
452 	return 0;
453 err:
454 	io_uring_queue_exit(&ring);
455 	return 1;
456 }
457 
main(int argc,char * argv[])458 int main(int argc, char *argv[])
459 {
460 	const char *fname = ".cq-overflow";
461 	unsigned iters, drops;
462 	unsigned long usecs;
463 	int ret;
464 	int i;
465 	bool can_defer;
466 
467 	if (argc > 1)
468 		return T_EXIT_SKIP;
469 
470 	can_defer = t_probe_defer_taskrun();
471 	for (i = 0; i < 16; i++) {
472 		bool batch = i & 1;
473 		int mult = (i & 2) ? 1 : 2;
474 		bool poll = i & 4;
475 		bool defer = i & 8;
476 
477 		if (defer && !can_defer)
478 			continue;
479 
480 		ret = test_overflow_handling(batch, mult, poll, defer);
481 		if (ret) {
482 			fprintf(stderr, "test_overflow_handling("
483 				"batch=%d, mult=%d, poll=%d, defer=%d) failed\n",
484 				batch, mult, poll, defer);
485 			goto err;
486 		}
487 	}
488 
489 	ret = test_overflow();
490 	if (ret) {
491 		fprintf(stderr, "test_overflow failed\n");
492 		return ret;
493 	}
494 
495 	t_create_file(fname, FILE_SIZE);
496 
497 	vecs = t_create_buffers(BUFFERS, BS);
498 
499 	iters = 0;
500 	usecs = 1000;
501 	do {
502 		drops = 0;
503 
504 		ret = test_io(fname, usecs, &drops, 0);
505 		if (ret == T_EXIT_SKIP)
506 			break;
507 		else if (ret != T_EXIT_PASS) {
508 			fprintf(stderr, "test_io nofault failed\n");
509 			goto err;
510 		}
511 		if (drops)
512 			break;
513 		usecs = (usecs * 12) / 10;
514 		iters++;
515 	} while (iters < 40);
516 
517 	if (test_io(fname, usecs, &drops, 0) == T_EXIT_FAIL) {
518 		fprintf(stderr, "test_io nofault failed\n");
519 		goto err;
520 	}
521 
522 	if (test_io(fname, usecs, &drops, 1) == T_EXIT_FAIL) {
523 		fprintf(stderr, "test_io fault failed\n");
524 		goto err;
525 	}
526 
527 	unlink(fname);
528 	if(vecs != NULL) {
529 		for (i = 0; i < BUFFERS; i++)
530 			free(vecs[i].iov_base);
531 	}
532 	free(vecs);
533 	return T_EXIT_PASS;
534 err:
535 	unlink(fname);
536 	if(vecs != NULL) {
537 		for (i = 0; i < BUFFERS; i++)
538 			free(vecs[i].iov_base);
539 	}
540 	free(vecs);
541 	return T_EXIT_FAIL;
542 }
543