1 /* SPDX-License-Identifier: MIT */
2 /*
3 * Description: run various reads tests, verifying data
4 *
5 */
6 #include <errno.h>
7 #include <stdio.h>
8 #include <unistd.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <fcntl.h>
12 #include <assert.h>
13 #include <sys/ioctl.h>
14 #include <sys/stat.h>
15 #include <linux/fs.h>
16
17 #include "helpers.h"
18 #include "liburing.h"
19
20 #define FSIZE 128*1024*1024
21 #define CHUNK_SIZE 131072
22 #define PUNCH_SIZE 32768
23
24 /*
25 * 8 because it fits within the on-stack iov, 16 because it's larger than 8
26 */
27 #define MIN_VECS 8
28 #define MAX_VECS 16
29
30 /*
31 * Can be anything, let's just do something for a bit of parallelism
32 */
33 #define READ_BATCH 16
34
verify_buf_sync(void * buf,size_t size,bool registered)35 static void verify_buf_sync(void *buf, size_t size, bool registered)
36 {
37 #if defined(__hppa__)
38 if (registered) {
39 unsigned long off = (unsigned long) buf & 4095;
40 unsigned long p = (unsigned long) buf & ~4095;
41 int i;
42
43 size += off;
44 for (i = 0; i < size; i += 32)
45 asm volatile("fdc 0(%0)" : : "r" (p + i));
46 }
47 #endif
48 }
49
50 /*
51 * Each offset in the file has the offset / sizeof(int) stored for every
52 * sizeof(int) address.
53 */
verify_buf(void * buf,size_t size,off_t off,bool registered)54 static int verify_buf(void *buf, size_t size, off_t off, bool registered)
55 {
56 int i, u_in_buf = size / sizeof(unsigned int);
57 unsigned int *ptr;
58
59 verify_buf_sync(buf, size, registered);
60
61 off /= sizeof(unsigned int);
62 ptr = buf;
63 for (i = 0; i < u_in_buf; i++) {
64 if (off != *ptr) {
65 fprintf(stderr, "Found %u, wanted %llu\n", *ptr,
66 (unsigned long long) off);
67 return 1;
68 }
69 ptr++;
70 off++;
71 }
72
73 return 0;
74 }
75
test_truncate(struct io_uring * ring,const char * fname,int buffered,int vectored,int provide_buf)76 static int test_truncate(struct io_uring *ring, const char *fname, int buffered,
77 int vectored, int provide_buf)
78 {
79 struct io_uring_cqe *cqe;
80 struct io_uring_sqe *sqe;
81 struct iovec vec;
82 struct stat sb;
83 off_t punch_off, off, file_size;
84 void *buf = NULL;
85 int u_in_buf, i, ret, fd, first_pass = 1;
86 unsigned int *ptr;
87
88 if (buffered)
89 fd = open(fname, O_RDWR);
90 else
91 fd = open(fname, O_DIRECT | O_RDWR);
92 if (fd < 0) {
93 perror("open");
94 return 1;
95 }
96
97 if (fstat(fd, &sb) < 0) {
98 perror("stat");
99 close(fd);
100 return 1;
101 }
102
103 if (S_ISREG(sb.st_mode)) {
104 file_size = sb.st_size;
105 } else if (S_ISBLK(sb.st_mode)) {
106 unsigned long long bytes;
107
108 if (ioctl(fd, BLKGETSIZE64, &bytes) < 0) {
109 perror("ioctl");
110 close(fd);
111 return 1;
112 }
113 file_size = bytes;
114 } else {
115 goto out;
116 }
117
118 if (file_size < CHUNK_SIZE)
119 goto out;
120
121 t_posix_memalign(&buf, 4096, CHUNK_SIZE);
122
123 off = file_size - (CHUNK_SIZE / 2);
124 punch_off = off + CHUNK_SIZE / 4;
125
126 u_in_buf = CHUNK_SIZE / sizeof(unsigned int);
127 ptr = buf;
128 for (i = 0; i < u_in_buf; i++) {
129 *ptr = i;
130 ptr++;
131 }
132 ret = pwrite(fd, buf, CHUNK_SIZE / 2, off);
133 if (ret < 0) {
134 perror("pwrite");
135 goto err;
136 } else if (ret != CHUNK_SIZE / 2)
137 goto out;
138
139 again:
140 /*
141 * Read in last bit of file so it's known cached, then remove half of that
142 * last bit so we get a short read that needs retry
143 */
144 ret = pread(fd, buf, CHUNK_SIZE / 2, off);
145 if (ret < 0) {
146 perror("pread");
147 goto err;
148 } else if (ret != CHUNK_SIZE / 2)
149 goto out;
150
151 if (posix_fadvise(fd, punch_off, CHUNK_SIZE / 4, POSIX_FADV_DONTNEED) < 0) {
152 perror("posix_fadivse");
153 goto err;
154 }
155
156 if (provide_buf) {
157 sqe = io_uring_get_sqe(ring);
158 io_uring_prep_provide_buffers(sqe, buf, CHUNK_SIZE, 1, 0, 0);
159 ret = io_uring_submit(ring);
160 if (ret != 1) {
161 fprintf(stderr, "submit failed %d\n", ret);
162 goto err;
163 }
164 ret = io_uring_wait_cqe(ring, &cqe);
165 if (ret < 0) {
166 fprintf(stderr, "wait completion %d\n", ret);
167 goto err;
168 }
169 ret = cqe->res;
170 io_uring_cqe_seen(ring, cqe);
171 if (ret) {
172 fprintf(stderr, "Provide buffer failed %d\n", ret);
173 goto err;
174 }
175 }
176
177 sqe = io_uring_get_sqe(ring);
178 if (!sqe) {
179 fprintf(stderr, "get sqe failed\n");
180 goto err;
181 }
182
183 if (vectored) {
184 assert(!provide_buf);
185 vec.iov_base = buf;
186 vec.iov_len = CHUNK_SIZE;
187 io_uring_prep_readv(sqe, fd, &vec, 1, off);
188 } else {
189 if (provide_buf) {
190 io_uring_prep_read(sqe, fd, NULL, CHUNK_SIZE, off);
191 sqe->flags |= IOSQE_BUFFER_SELECT;
192 } else {
193 io_uring_prep_read(sqe, fd, buf, CHUNK_SIZE, off);
194 }
195 }
196 memset(buf, 0, CHUNK_SIZE);
197
198 ret = io_uring_submit(ring);
199 if (ret != 1) {
200 fprintf(stderr, "Submit failed %d\n", ret);
201 goto err;
202 }
203
204 ret = io_uring_wait_cqe(ring, &cqe);
205 if (ret < 0) {
206 fprintf(stderr, "wait completion %d\n", ret);
207 goto err;
208 }
209
210 ret = cqe->res;
211 io_uring_cqe_seen(ring, cqe);
212 if (ret != CHUNK_SIZE / 2) {
213 fprintf(stderr, "Unexpected truncated read %d\n", ret);
214 goto err;
215 }
216
217 if (verify_buf(buf, CHUNK_SIZE / 2, 0, false))
218 goto err;
219
220 /*
221 * Repeat, but punch first part instead of last
222 */
223 if (first_pass) {
224 punch_off = file_size - CHUNK_SIZE / 4;
225 first_pass = 0;
226 goto again;
227 }
228
229 out:
230 free(buf);
231 close(fd);
232 return 0;
233 err:
234 free(buf);
235 close(fd);
236 return 1;
237 }
238
239 enum {
240 PUNCH_NONE,
241 PUNCH_FRONT,
242 PUNCH_MIDDLE,
243 PUNCH_END,
244 };
245
246 /*
247 * For each chunk in file, DONTNEED a start, end, or middle segment of it.
248 * We enter here with the file fully cached every time, either freshly
249 * written or after other reads. This forces (at least) the buffered reads
250 * to be handled incrementally, exercising that path.
251 */
do_punch(int fd)252 static int do_punch(int fd)
253 {
254 off_t offset = 0;
255 int punch_type;
256
257 while (offset + CHUNK_SIZE <= FSIZE) {
258 off_t punch_off;
259
260 punch_type = rand() % (PUNCH_END + 1);
261 switch (punch_type) {
262 default:
263 case PUNCH_NONE:
264 punch_off = -1; /* gcc... */
265 break;
266 case PUNCH_FRONT:
267 punch_off = offset;
268 break;
269 case PUNCH_MIDDLE:
270 punch_off = offset + PUNCH_SIZE;
271 break;
272 case PUNCH_END:
273 punch_off = offset + CHUNK_SIZE - PUNCH_SIZE;
274 break;
275 }
276
277 offset += CHUNK_SIZE;
278 if (punch_type == PUNCH_NONE)
279 continue;
280 if (posix_fadvise(fd, punch_off, PUNCH_SIZE, POSIX_FADV_DONTNEED) < 0) {
281 perror("posix_fadivse");
282 return 1;
283 }
284 }
285
286 return 0;
287 }
288
provide_buffers(struct io_uring * ring,void ** buf)289 static int provide_buffers(struct io_uring *ring, void **buf)
290 {
291 struct io_uring_cqe *cqe;
292 struct io_uring_sqe *sqe;
293 int i, ret;
294
295 /* real use case would have one buffer chopped up, but... */
296 for (i = 0; i < READ_BATCH; i++) {
297 sqe = io_uring_get_sqe(ring);
298 io_uring_prep_provide_buffers(sqe, buf[i], CHUNK_SIZE, 1, 0, i);
299 }
300
301 ret = io_uring_submit(ring);
302 if (ret != READ_BATCH) {
303 fprintf(stderr, "Submit failed %d\n", ret);
304 return 1;
305 }
306
307 for (i = 0; i < READ_BATCH; i++) {
308 ret = io_uring_wait_cqe(ring, &cqe);
309 if (ret) {
310 fprintf(stderr, "wait cqe %d\n", ret);
311 return 1;
312 }
313 if (cqe->res < 0) {
314 fprintf(stderr, "cqe res provide %d\n", cqe->res);
315 return 1;
316 }
317 io_uring_cqe_seen(ring, cqe);
318 }
319
320 return 0;
321 }
322
test(struct io_uring * ring,const char * fname,int buffered,int vectored,int small_vecs,int registered,int provide)323 static int test(struct io_uring *ring, const char *fname, int buffered,
324 int vectored, int small_vecs, int registered, int provide)
325 {
326 struct iovec vecs[READ_BATCH][MAX_VECS];
327 struct io_uring_cqe *cqe;
328 struct io_uring_sqe *sqe;
329 void *buf[READ_BATCH];
330 int ret, fd, flags;
331 int i, j, nr_vecs;
332 off_t off, voff;
333 size_t left;
334
335 if (registered) {
336 assert(!provide);
337 assert(!vectored && !small_vecs);
338 }
339 if (provide) {
340 assert(!registered);
341 assert(!vectored && !small_vecs);
342 }
343
344 flags = O_RDONLY;
345 if (!buffered)
346 flags |= O_DIRECT;
347 fd = open(fname, flags);
348 if (fd < 0) {
349 perror("open");
350 return 1;
351 }
352
353 if (do_punch(fd))
354 return 1;
355
356 if (vectored) {
357 if (small_vecs)
358 nr_vecs = MIN_VECS;
359 else
360 nr_vecs = MAX_VECS;
361
362 for (j = 0; j < READ_BATCH; j++) {
363 for (i = 0; i < nr_vecs; i++) {
364 void *ptr;
365
366 t_posix_memalign(&ptr, 4096, CHUNK_SIZE / nr_vecs);
367 vecs[j][i].iov_base = ptr;
368 vecs[j][i].iov_len = CHUNK_SIZE / nr_vecs;
369 }
370 }
371 } else {
372 for (j = 0; j < READ_BATCH; j++)
373 t_posix_memalign(&buf[j], 4096, CHUNK_SIZE);
374 nr_vecs = 0;
375 }
376
377 if (registered) {
378 struct iovec v[READ_BATCH];
379
380 for (i = 0; i < READ_BATCH; i++) {
381 v[i].iov_base = buf[i];
382 v[i].iov_len = CHUNK_SIZE;
383 }
384 ret = t_register_buffers(ring, v, READ_BATCH);
385 if (ret) {
386 if (ret == T_SETUP_SKIP) {
387 ret = 0;
388 goto free_bufs;
389 }
390 goto err;
391 }
392 }
393
394 i = 0;
395 left = FSIZE;
396 off = 0;
397 while (left) {
398 int pending = 0;
399
400 if (provide && provide_buffers(ring, buf))
401 goto err;
402
403 for (i = 0; i < READ_BATCH; i++) {
404 size_t this = left;
405
406 if (this > CHUNK_SIZE)
407 this = CHUNK_SIZE;
408
409 sqe = io_uring_get_sqe(ring);
410 if (!sqe) {
411 fprintf(stderr, "get sqe failed\n");
412 goto err;
413 }
414
415 if (vectored) {
416 io_uring_prep_readv(sqe, fd, vecs[i], nr_vecs, off);
417 } else {
418 if (registered) {
419 io_uring_prep_read_fixed(sqe, fd, buf[i], this, off, i);
420 } else if (provide) {
421 io_uring_prep_read(sqe, fd, NULL, this, off);
422 sqe->flags |= IOSQE_BUFFER_SELECT;
423 } else {
424 io_uring_prep_read(sqe, fd, buf[i], this, off);
425 }
426 }
427 sqe->user_data = ((uint64_t)off << 32) | i;
428 off += this;
429 left -= this;
430 pending++;
431 if (!left)
432 break;
433 }
434
435 ret = io_uring_submit(ring);
436 if (ret != pending) {
437 fprintf(stderr, "sqe submit failed: %d\n", ret);
438 goto err;
439 }
440
441 for (i = 0; i < pending; i++) {
442 int index;
443
444 ret = io_uring_wait_cqe(ring, &cqe);
445 if (ret < 0) {
446 fprintf(stderr, "wait completion %d\n", ret);
447 goto err;
448 }
449 if (cqe->res < 0) {
450 fprintf(stderr, "bad read %d, read %d\n", cqe->res, i);
451 goto err;
452 }
453 if (cqe->res < CHUNK_SIZE) {
454 fprintf(stderr, "short read %d, read %d\n", cqe->res, i);
455 goto err;
456 }
457 if (cqe->flags & IORING_CQE_F_BUFFER)
458 index = cqe->flags >> 16;
459 else
460 index = cqe->user_data & 0xffffffff;
461 voff = cqe->user_data >> 32;
462 io_uring_cqe_seen(ring, cqe);
463 if (vectored) {
464 for (j = 0; j < nr_vecs; j++) {
465 void *buf = vecs[index][j].iov_base;
466 size_t len = vecs[index][j].iov_len;
467
468 if (verify_buf(buf, len, voff, registered))
469 goto err;
470 voff += len;
471 }
472 } else {
473 if (verify_buf(buf[index], CHUNK_SIZE, voff, registered))
474 goto err;
475 }
476 }
477 }
478
479 ret = 0;
480 done:
481 if (registered)
482 io_uring_unregister_buffers(ring);
483 free_bufs:
484 if (vectored) {
485 for (j = 0; j < READ_BATCH; j++)
486 for (i = 0; i < nr_vecs; i++)
487 free(vecs[j][i].iov_base);
488 } else {
489 for (j = 0; j < READ_BATCH; j++)
490 free(buf[j]);
491 }
492 close(fd);
493 return ret;
494 err:
495 ret = 1;
496 goto done;
497 }
498
fill_pattern(const char * fname)499 static int fill_pattern(const char *fname)
500 {
501 size_t left = FSIZE;
502 unsigned int val, *ptr;
503 void *buf;
504 int fd, i;
505
506 fd = open(fname, O_WRONLY);
507 if (fd < 0) {
508 perror("open");
509 return 1;
510 }
511
512 val = 0;
513 buf = t_malloc(4096);
514 while (left) {
515 int u_in_buf = 4096 / sizeof(val);
516 size_t this = left;
517
518 if (this > 4096)
519 this = 4096;
520 ptr = buf;
521 for (i = 0; i < u_in_buf; i++) {
522 *ptr = val;
523 val++;
524 ptr++;
525 }
526 if (write(fd, buf, 4096) != 4096)
527 return 1;
528 left -= 4096;
529 }
530
531 fsync(fd);
532 close(fd);
533 free(buf);
534 return 0;
535 }
536
main(int argc,char * argv[])537 int main(int argc, char *argv[])
538 {
539 struct io_uring ring;
540 const char *fname;
541 char buf[32];
542 int ret;
543
544 srand(getpid());
545
546 if (argc > 1) {
547 fname = argv[1];
548 } else {
549 sprintf(buf, ".file-verify.%d", getpid());
550 fname = buf;
551 t_create_file(fname, FSIZE);
552 }
553
554 ret = io_uring_queue_init(READ_BATCH, &ring, 0);
555 if (ret) {
556 fprintf(stderr, "ring setup failed: %d\n", ret);
557 goto err;
558 }
559
560 if (fill_pattern(fname))
561 goto err;
562
563 ret = test(&ring, fname, 1, 0, 0, 0, 0);
564 if (ret) {
565 fprintf(stderr, "Buffered novec test failed\n");
566 goto err;
567 }
568 ret = test(&ring, fname, 1, 0, 0, 1, 0);
569 if (ret) {
570 fprintf(stderr, "Buffered novec reg test failed\n");
571 goto err;
572 }
573 ret = test(&ring, fname, 1, 0, 0, 0, 1);
574 if (ret) {
575 fprintf(stderr, "Buffered novec provide test failed\n");
576 goto err;
577 }
578 ret = test(&ring, fname, 1, 1, 0, 0, 0);
579 if (ret) {
580 fprintf(stderr, "Buffered vec test failed\n");
581 goto err;
582 }
583 ret = test(&ring, fname, 1, 1, 1, 0, 0);
584 if (ret) {
585 fprintf(stderr, "Buffered small vec test failed\n");
586 goto err;
587 }
588
589 ret = test(&ring, fname, 0, 0, 0, 0, 0);
590 if (ret) {
591 fprintf(stderr, "O_DIRECT novec test failed\n");
592 goto err;
593 }
594 ret = test(&ring, fname, 0, 0, 0, 1, 0);
595 if (ret) {
596 fprintf(stderr, "O_DIRECT novec reg test failed\n");
597 goto err;
598 }
599 ret = test(&ring, fname, 0, 0, 0, 0, 1);
600 if (ret) {
601 fprintf(stderr, "O_DIRECT novec provide test failed\n");
602 goto err;
603 }
604 ret = test(&ring, fname, 0, 1, 0, 0, 0);
605 if (ret) {
606 fprintf(stderr, "O_DIRECT vec test failed\n");
607 goto err;
608 }
609 ret = test(&ring, fname, 0, 1, 1, 0, 0);
610 if (ret) {
611 fprintf(stderr, "O_DIRECT small vec test failed\n");
612 goto err;
613 }
614
615 ret = test_truncate(&ring, fname, 1, 0, 0);
616 if (ret) {
617 fprintf(stderr, "Buffered end truncate read failed\n");
618 goto err;
619 }
620 ret = test_truncate(&ring, fname, 1, 1, 0);
621 if (ret) {
622 fprintf(stderr, "Buffered end truncate vec read failed\n");
623 goto err;
624 }
625 ret = test_truncate(&ring, fname, 1, 0, 1);
626 if (ret) {
627 fprintf(stderr, "Buffered end truncate pbuf read failed\n");
628 goto err;
629 }
630
631 ret = test_truncate(&ring, fname, 0, 0, 0);
632 if (ret) {
633 fprintf(stderr, "O_DIRECT end truncate read failed\n");
634 goto err;
635 }
636 ret = test_truncate(&ring, fname, 0, 1, 0);
637 if (ret) {
638 fprintf(stderr, "O_DIRECT end truncate vec read failed\n");
639 goto err;
640 }
641 ret = test_truncate(&ring, fname, 0, 0, 1);
642 if (ret) {
643 fprintf(stderr, "O_DIRECT end truncate pbuf read failed\n");
644 goto err;
645 }
646
647 if (buf == fname)
648 unlink(fname);
649 return T_EXIT_PASS;
650 err:
651 if (buf == fname)
652 unlink(fname);
653 return T_EXIT_FAIL;
654 }
655