1 /*
2 * version of copy command using async i/o
3 * From: Stephen Hemminger <shemminger@osdl.org>
4 * Modified by Daniel McNeil <daniel@osdl.org> for testing aio.
5 * - added -a alignment
6 * - added -b blksize option
7 * _ added -s size option
8 * - added -f open_flag option
9 * - added -w (no write) option (reads from source only)
10 * - added -n (num aio) option
11 * - added -z (zero dest) opton (writes zeros to dest only)
12 * - added -D delay_ms option
13 *
14 * Copy file by using a async I/O state machine.
15 * 1. Start read request
16 * 2. When read completes turn it into a write request
17 * 3. When write completes decrement counter and free resources
18 *
19 *
20 * Usage: aiocp [-b blksize] -n [num_aio] [-w] [-z] [-s filesize]
21 * [-f DIRECT|TRUNC|CREAT|SYNC|LARGEFILE] src dest
22 */
23
24 #define _GNU_SOURCE
25
26 #include <unistd.h>
27 #include <stdio.h>
28 #include <sys/types.h>
29 #include <sys/stat.h>
30 #include <sys/param.h>
31 #include <fcntl.h>
32 #include <errno.h>
33 #include <stdlib.h>
34 #include <mntent.h>
35 #include <sys/select.h>
36 #include <sys/mount.h>
37
38 #include "config.h"
39 #include "tst_res_flags.h"
40
41 #ifdef HAVE_LIBAIO
42 #include <libaio.h>
43
44 #define AIO_BLKSIZE (64*1024)
45 #define AIO_MAXIO 32
46
47 static int aio_blksize = AIO_BLKSIZE;
48 static int aio_maxio = AIO_MAXIO;
49
50 static int busy = 0; // # of I/O's in flight
51 static int tocopy = 0; // # of blocks left to copy
52 static int srcfd; // source fd
53 static int srcfd2; // source fd - end of file non-sector
54 static int dstfd = -1; // destination file descriptor
55 static int dstfd2 = -1; // Handle end of file for non-sector size
56 static const char *dstname = NULL;
57 static const char *srcname = NULL;
58 static int source_open_flag = O_RDONLY; /* open flags on source file */
59 static int dest_open_flag = O_WRONLY; /* open flags on dest file */
60 static int no_write; /* do not write */
61 static int zero; /* write zero's only */
62
63 static int debug;
64 static int count_io_q_waits; /* how many time io_queue_wait called */
65
66 struct iocb **iocb_free; /* array of pointers to iocb */
67 int iocb_free_count; /* current free count */
68 int alignment = 512; /* buffer alignment */
69
70 struct timeval delay; /* delay between i/o */
71
dev_block_size_by_path(const char * path)72 static int dev_block_size_by_path(const char *path)
73 {
74 FILE *f;
75 struct mntent *mnt;
76 size_t prefix_len, prefix_max = 0;
77 char dev_name[1024];
78 int fd, size;
79
80 if (!path)
81 return 0;
82
83 f = setmntent("/proc/mounts", "r");
84 if (!f) {
85 fprintf(stderr, "Failed to open /proc/mounts\n");
86 return 0;
87 }
88
89 while ((mnt = getmntent(f))) {
90 /* Skip pseudo fs */
91 if (mnt->mnt_fsname[0] != '/')
92 continue;
93
94 prefix_len = strlen(mnt->mnt_dir);
95
96 if (prefix_len > prefix_max &&
97 !strncmp(path, mnt->mnt_dir, prefix_len)) {
98 prefix_max = prefix_len;
99 strncpy(dev_name, mnt->mnt_fsname, sizeof(dev_name));
100 dev_name[sizeof(dev_name)-1] = '\0';
101 }
102 }
103
104 endmntent(f);
105
106 if (!prefix_max) {
107 fprintf(stderr, "Path '%s' not found in /proc/mounts\n", path);
108 return 0;
109 }
110
111 printf("Path '%s' is on device '%s'\n", path, dev_name);
112
113 fd = open(dev_name, O_RDONLY);
114 if (!fd) {
115 fprintf(stderr, "open('%s'): %s\n", dev_name, strerror(errno));
116 return 0;
117 }
118
119 if (ioctl(fd, BLKSSZGET, &size)) {
120 fprintf(stderr, "ioctl(BLKSSZGET): %s\n", strerror(errno));
121 close(fd);
122 return 0;
123 }
124
125 close(fd);
126 printf("'%s' has block size %i\n", dev_name, size);
127
128 return size;
129 }
130
init_iocb(int n,int iosize)131 int init_iocb(int n, int iosize)
132 {
133 void *buf;
134 int i;
135
136 if ((iocb_free = malloc(n * sizeof(struct iocb *))) == 0) {
137 return -1;
138 }
139
140 for (i = 0; i < n; i++) {
141 if (!
142 (iocb_free[i] = malloc(sizeof(struct iocb))))
143 return -1;
144 if (posix_memalign(&buf, alignment, iosize))
145 return -1;
146 if (debug > 1) {
147 printf("buf allocated at 0x%p, align:%d\n",
148 buf, alignment);
149 }
150 if (zero) {
151 /*
152 * We are writing zero's to dstfd
153 */
154 memset(buf, 0, iosize);
155 }
156 io_prep_pread(iocb_free[i], -1, buf, iosize, 0);
157 }
158 iocb_free_count = i;
159 return 0;
160 }
161
alloc_iocb(void)162 static struct iocb *alloc_iocb(void)
163 {
164 if (!iocb_free_count)
165 return 0;
166 return iocb_free[--iocb_free_count];
167 }
168
free_iocb(struct iocb * io)169 void free_iocb(struct iocb *io)
170 {
171 iocb_free[iocb_free_count++] = io;
172 }
173
174 /*
175 * io_wait_run() - wait for an io_event and then call the callback.
176 */
io_wait_run(io_context_t ctx,struct timespec * to)177 int io_wait_run(io_context_t ctx, struct timespec *to)
178 {
179 struct io_event events[aio_maxio];
180 struct io_event *ep;
181 int ret, n;
182
183 /*
184 * get up to aio_maxio events at a time.
185 */
186 ret = n = io_getevents(ctx, 1, aio_maxio, events, to);
187
188 /*
189 * Call the callback functions for each event.
190 */
191 for (ep = events; n-- > 0; ep++) {
192 io_callback_t cb = (io_callback_t) ep->data;
193 struct iocb *iocb = ep->obj;
194
195 if (debug > 1) {
196 fprintf(stderr, "ev:%p iocb:%p res:%ld res2:%ld\n",
197 ep, iocb, ep->res, ep->res2);
198 }
199 cb(ctx, iocb, ep->res, ep->res2);
200 }
201 return ret;
202 }
203
204 /* Fatal error handler */
io_error(const char * func,int rc)205 static void io_error(const char *func, int rc)
206 {
207 if (rc == -ENOSYS)
208 fprintf(stderr, "AIO not in this kernel\n");
209 else if (rc < 0)
210 fprintf(stderr, "%s: %s\n", func, strerror(-rc));
211 else
212 fprintf(stderr, "%s: error %d\n", func, rc);
213
214 if (dstfd > 0)
215 close(dstfd);
216 if (dstname && dest_open_flag & O_CREAT)
217 unlink(dstname);
218 exit(1);
219 }
220
221 /*
222 * Write complete callback.
223 * Adjust counts and free resources
224 */
wr_done(io_context_t ctx,struct iocb * iocb,long res,long res2)225 static void wr_done(io_context_t ctx, struct iocb *iocb, long res, long res2)
226 {
227 if (res2 != 0) {
228 io_error("aio write", res2);
229 }
230 if (res != iocb->u.c.nbytes) {
231 fprintf(stderr, "write missed bytes expect %lu got %ld\n",
232 iocb->u.c.nbytes, res);
233 exit(1);
234 }
235 --tocopy;
236 --busy;
237 free_iocb(iocb);
238 if (debug)
239 write(2, "w", 1);
240 }
241
242 /*
243 * Read complete callback.
244 * Change read iocb into a write iocb and start it.
245 */
rd_done(io_context_t ctx,struct iocb * iocb,long res,long res2)246 static void rd_done(io_context_t ctx, struct iocb *iocb, long res, long res2)
247 {
248 /* library needs accessors to look at iocb? */
249 int iosize = iocb->u.c.nbytes;
250 char *buf = iocb->u.c.buf;
251 off_t offset = iocb->u.c.offset;
252
253 if (res2 != 0)
254 io_error("aio read", res2);
255 if (res != iosize) {
256 fprintf(stderr, "read missing bytes expect %lu got %ld\n",
257 iocb->u.c.nbytes, res);
258 exit(1);
259 }
260
261 /* turn read into write */
262 if (no_write) {
263 --tocopy;
264 --busy;
265 free_iocb(iocb);
266 } else {
267 int fd;
268 if (iocb->aio_fildes == srcfd)
269 fd = dstfd;
270 else
271 fd = dstfd2;
272 io_prep_pwrite(iocb, fd, buf, iosize, offset);
273 io_set_callback(iocb, wr_done);
274 if (1 != (res = io_submit(ctx, 1, &iocb)))
275 io_error("io_submit write", res);
276 }
277 if (debug)
278 write(2, "r", 1);
279 if (debug > 1)
280 printf("%d", iosize);
281 }
282
usage(void)283 static void usage(void)
284 {
285 fprintf(stderr,
286 "Usage: aiocp [-a align] [-s size] [-b blksize] [-n num_io]"
287 " [-f open_flag] SOURCE DEST\n"
288 "This copies from SOURCE to DEST using AIO.\n\n"
289 "Usage: aiocp [options] -w SOURCE\n"
290 "This does sequential AIO reads (no writes).\n\n"
291 "Usage: aiocp [options] -z DEST\n"
292 "This does sequential AIO writes of zeros.\n");
293
294 exit(1);
295 }
296
297 /*
298 * Scale value by kilo, mega, or giga.
299 */
scale_by_kmg(long long value,char scale)300 long long scale_by_kmg(long long value, char scale)
301 {
302 switch (scale) {
303 case 'g':
304 case 'G':
305 value *= 1024;
306 case 'm':
307 case 'M':
308 value *= 1024;
309 case 'k':
310 case 'K':
311 value *= 1024;
312 break;
313 case '\0':
314 break;
315 default:
316 usage();
317 break;
318 }
319 return value;
320 }
321
main(int argc,char * const * argv)322 int main(int argc, char *const *argv)
323 {
324 struct stat st;
325 off_t length = 0, offset = 0;
326 off_t leftover = 0;
327 io_context_t myctx;
328 int c;
329 extern char *optarg;
330 extern int optind, opterr, optopt;
331
332 while ((c = getopt(argc, argv, "a:b:df:n:s:wzD:")) != -1) {
333 char *endp;
334
335 switch (c) {
336 case 'a': /* alignment of data buffer */
337 alignment = strtol(optarg, &endp, 0);
338 alignment = (long)scale_by_kmg((long long)alignment,
339 *endp);
340 break;
341 case 'f': /* use these open flags */
342 if (strcmp(optarg, "LARGEFILE") == 0 ||
343 strcmp(optarg, "O_LARGEFILE") == 0) {
344 source_open_flag |= O_LARGEFILE;
345 dest_open_flag |= O_LARGEFILE;
346 } else if (strcmp(optarg, "TRUNC") == 0 ||
347 strcmp(optarg, "O_TRUNC") == 0) {
348 dest_open_flag |= O_TRUNC;
349 } else if (strcmp(optarg, "SYNC") == 0 ||
350 strcmp(optarg, "O_SYNC") == 0) {
351 dest_open_flag |= O_SYNC;
352 } else if (strcmp(optarg, "DIRECT") == 0 ||
353 strcmp(optarg, "O_DIRECT") == 0) {
354 source_open_flag |= O_DIRECT;
355 dest_open_flag |= O_DIRECT;
356 } else if (strncmp(optarg, "CREAT", 5) == 0 ||
357 strncmp(optarg, "O_CREAT", 5) == 0) {
358 dest_open_flag |= O_CREAT;
359 }
360 break;
361 case 'd':
362 debug++;
363 break;
364 case 'D':
365 delay.tv_usec = atoi(optarg);
366 break;
367 case 'b': /* block size */
368 aio_blksize = strtol(optarg, &endp, 0);
369 aio_blksize =
370 (long)scale_by_kmg((long long)aio_blksize, *endp);
371 break;
372
373 case 'n': /* num io */
374 aio_maxio = strtol(optarg, &endp, 0);
375 break;
376 case 's': /* size to transfer */
377 length = strtoll(optarg, &endp, 0);
378 length = scale_by_kmg(length, *endp);
379 break;
380 case 'w': /* no write */
381 no_write = 1;
382 break;
383 case 'z': /* write zero's */
384 zero = 1;
385 break;
386
387 default:
388 usage();
389 }
390 }
391
392 argc -= optind;
393 argv += optind;
394
395 if (argc < 1) {
396 usage();
397 }
398 if (!zero) {
399 if ((srcfd = open(srcname = *argv, source_open_flag)) < 0) {
400 perror(srcname);
401 exit(1);
402 }
403 argv++;
404 argc--;
405 if (fstat(srcfd, &st) < 0) {
406 perror("fstat");
407 exit(1);
408 }
409 if (length == 0)
410 length = st.st_size;
411 }
412
413 if (!no_write) {
414 /*
415 * We are either copying or writing zeros to dstname
416 */
417 if (argc < 1) {
418 usage();
419 }
420 if ((dstfd = open(dstname = *argv, dest_open_flag, 0666)) < 0) {
421 perror(dstname);
422 exit(1);
423 }
424 if (zero) {
425 /*
426 * get size of dest, if we are zeroing it.
427 * TODO: handle devices.
428 */
429 if (fstat(dstfd, &st) < 0) {
430 perror("fstat");
431 exit(1);
432 }
433 if (length == 0)
434 length = st.st_size;
435 }
436 }
437 /*
438 * O_DIRECT cannot handle non-sector sizes
439 */
440 if (dest_open_flag & O_DIRECT) {
441 int src_alignment = dev_block_size_by_path(srcname);
442 int dst_alignment = dev_block_size_by_path(dstname);
443
444 /*
445 * Given we expect the block sizes to be multiple of 2 the
446 * larger is always divideable by the smaller, so we only need
447 * to care about maximum.
448 */
449 if (src_alignment > dst_alignment)
450 dst_alignment = src_alignment;
451
452 if (alignment < dst_alignment) {
453 alignment = dst_alignment;
454 printf("Forcing aligment to %i\n", alignment);
455 }
456
457 if (aio_blksize % alignment) {
458 printf("Block size is not multiple of drive block size\n");
459 printf("Skipping the test!\n");
460 exit(0);
461 }
462
463 leftover = length % alignment;
464 if (leftover) {
465 int flag;
466
467 length -= leftover;
468 if (!zero) {
469 flag = source_open_flag & ~O_DIRECT;
470 srcfd2 = open(srcname, flag);
471 if (srcfd2 < 0) {
472 perror(srcname);
473 exit(1);
474 }
475 }
476 if (!no_write) {
477 flag = (O_SYNC | dest_open_flag) &
478 ~(O_DIRECT | O_CREAT);
479 dstfd2 = open(dstname, flag);
480 if (dstfd2 < 0) {
481 perror(dstname);
482 exit(1);
483 }
484 }
485 }
486 }
487
488 /* initialize state machine */
489 memset(&myctx, 0, sizeof(myctx));
490 io_queue_init(aio_maxio, &myctx);
491 tocopy = howmany(length, aio_blksize);
492
493 if (init_iocb(aio_maxio, aio_blksize) < 0) {
494 fprintf(stderr, "Error allocating the i/o buffers\n");
495 exit(1);
496 }
497
498 while (tocopy > 0) {
499 int i, rc;
500 /* Submit as many reads as once as possible upto aio_maxio */
501 int n = MIN(MIN(aio_maxio - busy, aio_maxio),
502 howmany(length - offset, aio_blksize));
503 if (n > 0) {
504 struct iocb *ioq[n];
505
506 for (i = 0; i < n; i++) {
507 struct iocb *io = alloc_iocb();
508 int iosize = MIN(length - offset, aio_blksize);
509
510 if (zero) {
511 /*
512 * We are writing zero's to dstfd
513 */
514 io_prep_pwrite(io, dstfd, io->u.c.buf,
515 iosize, offset);
516 io_set_callback(io, wr_done);
517 } else {
518 io_prep_pread(io, srcfd, io->u.c.buf,
519 iosize, offset);
520 io_set_callback(io, rd_done);
521 }
522 ioq[i] = io;
523 offset += iosize;
524 }
525
526 rc = io_submit(myctx, n, ioq);
527 if (rc < 0)
528 io_error("io_submit", rc);
529
530 busy += n;
531 if (debug > 1)
532 printf("io_submit(%d) busy:%d\n", n, busy);
533 if (delay.tv_usec) {
534 struct timeval t = delay;
535 (void)select(0, 0, 0, 0, &t);
536 }
537 }
538
539 /*
540 * We have submitted all the i/o requests. Wait for at least one to complete
541 * and call the callbacks.
542 */
543 count_io_q_waits++;
544 rc = io_wait_run(myctx, 0);
545 if (rc < 0)
546 io_error("io_wait_run", rc);
547
548 if (debug > 1) {
549 printf("io_wait_run: rc == %d\n", rc);
550 printf("busy:%d aio_maxio:%d tocopy:%d\n",
551 busy, aio_maxio, tocopy);
552 }
553 }
554
555 if (leftover) {
556 /* non-sector size end of file */
557 struct iocb *io = alloc_iocb();
558 int rc;
559 if (zero) {
560 /*
561 * We are writing zero's to dstfd2
562 */
563 io_prep_pwrite(io, dstfd2, io->u.c.buf,
564 leftover, offset);
565 io_set_callback(io, wr_done);
566 } else {
567 io_prep_pread(io, srcfd2, io->u.c.buf,
568 leftover, offset);
569 io_set_callback(io, rd_done);
570 }
571 rc = io_submit(myctx, 1, &io);
572 if (rc < 0)
573 io_error("io_submit", rc);
574 count_io_q_waits++;
575 rc = io_wait_run(myctx, 0);
576 if (rc < 0)
577 io_error("io_wait_run", rc);
578 }
579
580 if (srcfd != -1)
581 close(srcfd);
582 if (dstfd != -1)
583 close(dstfd);
584 exit(0);
585 }
586
587 /*
588 * Results look like:
589 * [alanm@toolbox ~/MOT3]$ ../taio -d kernel-source-2.4.8-0.4g.ppc.rpm abc
590 * rrrrrrrrrrrrrrrwwwrwrrwwrrwrwwrrwrwrwwrrwrwrrrrwwrwwwrrwrrrwwwwwwwwwwwwwwwww
591 * rrrrrrrrrrrrrrwwwrrwrwrwrwrrwwwwwwwwwwwwwwrrrrrrrrrrrrrrrrrrwwwwrwrwwrwrwrwr
592 * wrrrrrrrwwwwwwwwwwwwwrrrwrrrwrrwrwwwwwwwwwwrrrrwwrwrrrrrrrrrrrwwwwwwwwwwwrww
593 * wwwrrrrrrrrwwrrrwwrwrwrwwwrrrrrrrwwwrrwwwrrwrwwwwwwwwrrrrrrrwwwrrrrrrrwwwwww
594 * wwwwwwwrwrrrrrrrrwrrwrrwrrwrwrrrwrrrwrrrwrwwwwwwwwwwwwwwwwwwrrrwwwrrrrrrrrrr
595 * rrwrrrrrrwrrwwwwwwwwwwwwwwwwrwwwrrwrwwrrrrrrrrrrrrrrrrrrrwwwwwwwwwwwwwwwwwww
596 * rrrrrwrrwrwrwrrwrrrwwwwwwwwrrrrwrrrwrwwrwrrrwrrwrrrrwwwwwwwrwrwwwwrwwrrrwrrr
597 * rrrwwwwwwwrrrrwwrrrrrrrrrrrrwrwrrrrwwwwwwwwwwwwwwrwrrrrwwwwrwrrrrwrwwwrrrwww
598 * rwwrrrrrrrwrrrrrrrrrrrrwwwwrrrwwwrwrrwwwwwwwwwwwwwwwwwwwwwrrrrrrrwwwwwwwrw
599 */
600
601 #else
main(void)602 int main(void)
603 {
604 fprintf(stderr, "test requires libaio and it's development packages\n");
605 return TCONF;
606 }
607 #endif
608