1 /*
2 * Copyright (c) 2014-2022 Douglas Gilbert.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 *
26 * SPDX-License-Identifier: BSD-2-Clause
27 */
28
29 #include <iostream>
30 #include <vector>
31 #include <map>
32 #include <list>
33 #include <system_error>
34 #include <thread>
35 #include <mutex>
36 #include <chrono>
37 #include <atomic>
38 #include <random>
39
40 #include <unistd.h>
41 #include <fcntl.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <stdarg.h>
45 #include <string.h>
46 #include <poll.h>
47 #include <errno.h>
48 #include <ctype.h>
49 #include <time.h>
50 #include <limits.h>
51 #include <getopt.h>
52 #define __STDC_FORMAT_MACROS 1
53 #include <inttypes.h>
54 #include <sys/ioctl.h>
55 #include <sys/types.h>
56 #include <sys/stat.h>
57 #include <sys/mman.h>
58 #include <sys/resource.h> /* getrusage */
59
60
61 #ifdef HAVE_CONFIG_H
62 #include "config.h"
63 #endif
64
65 #ifndef HAVE_LINUX_SG_V4_HDR
66
67 /* Kernel uapi header contain __user decorations on user space pointers
68 * to indicate they are unsafe in the kernel space. However glibc takes
69 * all those __user decorations out from headers in /usr/include/linux .
70 * So to stop compile errors when directly importing include/uapi/scsi/sg.h
71 * undef __user before doing that include. */
72 #define __user
73
74 /* Want to block the original sg.h header from also being included. That
75 * causes lots of multiple definition errors. This will only work if this
76 * header is included _before_ the original sg.h header. */
77 #define _SCSI_GENERIC_H /* original kernel header guard */
78 #define _SCSI_SG_H /* glibc header guard */
79
80 #include "uapi_sg.h" /* local copy of include/uapi/scsi/sg.h */
81
82 #else
83 #define __user
84 #endif /* end of: ifndef HAVE_LINUX_SG_V4_HDR */
85
86 #include "sg_lib.h"
87 #include "sg_io_linux.h"
88 #include "sg_unaligned.h"
89 #include "sg_pt.h"
90 #include "sg_cmds.h"
91
92 static const char * version_str = "1.42 20220425";
93 static const char * util_name = "sg_tst_async";
94
95 /* This is a test program for checking the async usage of the Linux sg
96 * driver. Each thread opens 1 file descriptor to the next sg device (1
97 * or more can be given on the command line) and then starts up to
98 * num_per_thread commands or more while checking with the poll command (or
99 * ioctl(SG_GET_NUM_WAITING) ) for the completion of those commands. Each
100 * command has a unique "pack_id" which is a sequence starting at 1.
101 * Either TEST UNIT UNIT, READ(16) or WRITE(16) commands are issued.
102 *
103 * This is C++ code with some things from C++11 (e.g. threads) and was
104 * only just able to compile (when some things were reverted) with gcc/g++
105 * version 4.7.3 found in Ubuntu 13.04 . C++11 "feature complete" support
106 * was not available until g++ version 4.8.1 . It should build okay on
107 * recent distributions.
108 *
109 * The build uses various object files from the <sg3_utils>/lib directory
110 * which is assumed to be a sibling of this examples directory. Those
111 * object files in the lib directory can be built with:
112 * cd <sg3_utils_package_root> ; ./configure ; cd lib; make
113 * cd ../testing
114 * make sg_tst_async
115 *
116 * Currently this utility is Linux only and uses the sg driver. The bsg
117 * driver is known to be broken (it doesn't match responses to the
118 * correct file descriptor that requested them). Around Linux kernel 4.15
119 * the async capability of the bsg driver was removed. So this test code
120 * no longer appiles to the bsg driver.
121 *
122 * BEWARE: >>> This utility will modify a logical block (default LBA 1000)
123 * on the given device _when_ the '-W' option is given.
124 *
125 */
126
127 using namespace std;
128 using namespace std::chrono;
129
130 #define DEF_NUM_PER_THREAD 1000
131 #define DEF_NUM_THREADS 4
132 #define DEF_WAIT_MS 10 /* 0: yield or no wait */
133 #define DEF_NANOSEC_WAIT 25000 /* 25 microsecs */
134 #define DEF_TIMEOUT_MS 20000 /* 20 seconds */
135 #define DEF_LB_SZ 512
136 #define DEF_BLOCKING 0
137 #define DEF_DIRECT false /* true: direct_io */
138 #define DEF_MMAP_IO false /* true: mmap-ed IO with sg */
139 #define DEF_NO_XFER 0
140 #define DEF_LBA 1000U
141
142 #define MAX_Q_PER_FD 16383 /* sg driver per file descriptor limit */
143 #define MAX_CONSEC_NOMEMS 4 /* was 16 */
144 #define URANDOM_DEV "/dev/urandom"
145
146 #ifndef SG_FLAG_Q_AT_TAIL
147 #define SG_FLAG_Q_AT_TAIL 0x10
148 #endif
149 #ifndef SG_FLAG_Q_AT_HEAD
150 #define SG_FLAG_Q_AT_HEAD 0x20
151 #endif
152
153
154 #define DEF_PT_TIMEOUT 60 /* 60 seconds */
155
156 #define EBUFF_SZ 256
157
158 static mutex console_mutex;
159 static mutex rand_lba_mutex;
160 static atomic<int> async_starts(0);
161 static atomic<int> sync_starts(0);
162 static atomic<int> async_finishes(0);
163 static atomic<int> start_ebusy_count(0);
164 static atomic<int> start_e2big_count(0);
165 static atomic<int> start_eagain_count(0);
166 static atomic<int> fin_eagain_count(0);
167 static atomic<int> fin_ebusy_count(0);
168 static atomic<int> start_edom_count(0);
169 static atomic<int> enomem_count(0);
170 static atomic<int> uniq_pack_id(1);
171 // static atomic<int> generic_errs(0);
172
173 static int page_size = 4096; /* rough guess, will ask sysconf() */
174
175 enum command2execute {SCSI_TUR, SCSI_READ16, SCSI_WRITE16};
176 /* Linux Block layer queue disciplines: */
177 enum blkLQDiscipline {BLQ_DEFAULT, BLQ_AT_HEAD, BLQ_AT_TAIL};
178 /* Queue disciplines of this utility. When both completions and
179 * queuing a new command are both possible: */
180 enum myQDiscipline {MYQD_LOW, /* favour completions over new cmds */
181 MYQD_MEDIUM,
182 MYQD_HIGH}; /* favour new cmds over completions */
183
184 struct opts_t {
185 vector<const char *> dev_names;
186 vector<int> blk_szs;
187 bool block;
188 bool cmd_time;
189 bool direct;
190 bool excl;
191 bool generic_sync;
192 bool masync;
193 bool mmap_io;
194 bool no_xfer;
195 bool pack_id_force;
196 bool sg_vn_ge_40000;
197 bool sg_vn_ge_40030;
198 bool submit;
199 bool verbose_given;
200 bool v3;
201 bool v3_given;
202 bool v4;
203 bool v4_given;
204 bool version_given;
205 int maxq_per_thread;
206 int num_per_thread;
207 uint64_t lba;
208 unsigned int hi_lba; /* last one, inclusive range */
209 vector<unsigned int> hi_lbas; /* only used when hi_lba=-1 */
210 int lb_sz;
211 int num_lbs;
212 int ovn; /* override number for submission */
213 int stats;
214 int verbose;
215 int wait_ms;
216 command2execute c2e;
217 blkLQDiscipline blqd; /* --qat= 0|1 -> at_head|at_tail */
218 myQDiscipline myqd; /* --qfav= value (def: 2 --> MYQD_HIGH) */
219 };
220
221 static struct opts_t a_opts; /* Expect zero fill on simple types */
222
223 static int pr_rusage(int id);
224
225 #if 0
226 class Rand_uint {
227 public:
228 Rand_uint(unsigned int lo, unsigned int hi) : p{lo, hi} {}
229 unsigned int operator()() const { return r(); }
230 private:
231 uniform_int_distribution<unsigned int>::param_type p;
232 auto r = bind(uniform_int_distribution<unsigned int>{p},
233 default_random_engine());
234 /* compiler thinks auto should be a static, bs again? */
235 };
236 #endif
237
238 #if 0
239 class Rand_uint {
240 public:
241 Rand_uint(unsigned int lo, unsigned int hi, unsigned int my_seed)
242 : r(bind(uniform_int_distribution<unsigned int>{lo, hi},
243 default_random_engine())) { r.seed(myseed); }
244 unsigned int operator()() const { return r(); }
245 private:
246 function<unsigned int()> r;
247 };
248 #endif
249
250 /* Use this class to wrap C++11 <random> features to produce uniform random
251 * unsigned ints in the range [lo, hi] (inclusive) given a_seed */
252 class Rand_uint {
253 public:
Rand_uint(unsigned int lo,unsigned int hi,unsigned int a_seed)254 Rand_uint(unsigned int lo, unsigned int hi, unsigned int a_seed)
255 : uid(lo, hi), dre(a_seed) { }
256 /* uid ctor takes inclusive range when integral type */
257
get()258 unsigned int get() { return uid(dre); }
259
260 private:
261 uniform_int_distribution<unsigned int> uid;
262 default_random_engine dre;
263 };
264
265 static struct option long_options[] = {
266 {"v3", no_argument, 0, '3'},
267 {"v4", no_argument, 0, '4'},
268 {"more-async", no_argument, 0, 'a'},
269 {"more_async", no_argument, 0, 'a'},
270 {"masync", no_argument, 0, 'a'},
271 {"cmd-time", no_argument, 0, 'c'},
272 {"cmd_time", no_argument, 0, 'c'},
273 {"direct", no_argument, 0, 'd'},
274 {"excl", no_argument, 0, 'e'},
275 {"force", no_argument, 0, 'f'},
276 {"generic-sync", no_argument, 0, 'g'},
277 {"generic_sync", no_argument, 0, 'g'},
278 {"help", no_argument, 0, 'h'},
279 {"lba", required_argument, 0, 'l'},
280 {"lbsz", required_argument, 0, 'L'},
281 {"maxqpt", required_argument, 0, 'M'},
282 {"mmap-io", no_argument, 0, 'm'},
283 {"mmap_io", no_argument, 0, 'm'},
284 {"numpt", required_argument, 0, 'n'},
285 {"num-pt", required_argument, 0, 'n'},
286 {"num_pt", required_argument, 0, 'n'},
287 {"noxfer", no_argument, 0, 'N'},
288 {"override", required_argument, 0, 'O'},
289 {"pack-id", no_argument, 0, 'p'},
290 {"pack_id", no_argument, 0, 'p'},
291 {"qat", required_argument, 0, 'q'},
292 {"qfav", required_argument, 0, 'Q'},
293 {"read", no_argument, 0, 'R'},
294 {"stats", no_argument, 0, 'S'},
295 {"submit", no_argument, 0, 'u'},
296 {"szlb", required_argument, 0, 's'},
297 {"tnum", required_argument, 0, 't'},
298 {"tur", no_argument, 0, 'T'},
299 {"verbose", no_argument, 0, 'v'},
300 {"version", no_argument, 0, 'V'},
301 {"wait", required_argument, 0, 'w'},
302 {"write", no_argument, 0, 'W'},
303 {0, 0, 0, 0},
304 };
305
306
307 static void
usage(void)308 usage(void)
309 {
310 printf("Usage: %s [--cmd-time] [--direct] [--excl] [--force]\n"
311 " [--generic-sync] [--help] [--lba=LBA+] "
312 "[--lbsz=LBSZ]\n"
313 " [--masync] [--maxqpt=QPT] [--mmap-io] "
314 "[--no-waitq]\n"
315 " [--noxfer] [--numpt=NPT] [--override=OVN] "
316 "[--pack-id]\n"
317 " [--qat=AT] [-qfav=FAV] [--read] [--stats] "
318 "[--submit]\n"
319 " [--szlb=LB[,NLBS]] [--tnum=NT] [--tur] "
320 "[--v3] [--v4]\n"
321 " [--verbose] [--version] [--wait=MS] "
322 "[--write]\n"
323 " <sg_disk_device>*\n",
324 util_name);
325 printf(" where\n");
326 printf(" --cmd-time|-c calculate per command average time (ns)\n");
327 printf(" --direct|-d do direct_io (def: indirect)\n");
328 printf(" --excl|-e do wait_exclusive calls\n");
329 printf(" --force|-f force: any sg device (def: only scsi_debug "
330 "owned)\n");
331 printf(" WARNING: <lba> written to if '-W' given\n");
332 printf(" --generic-sync|-g use generic synchronous SG_IO ioctl "
333 "instead\n");
334 printf(" of Linux sg driver assuming /dev/sg* "
335 "(def)\n");
336 printf(" --help|-h print this usage message then exit\n");
337 printf(" --lba=LBA|-l LBA logical block to access (def: %u)\n",
338 DEF_LBA);
339 printf(" --lba=LBA,HI_LBA|-l LBA,HI_LBA logical block range "
340 "(inclusive)\n"
341 " if hi_lba=-1 assume last block on "
342 "device\n");
343 printf(" --lbsz=LBSZ|-L LBSZ logical block size in bytes (def: "
344 "512)\n"
345 " should be power of 2 (0 --> 512)\n");
346 printf(" --masync|-a set 'more async' flag on devices\n");
347 printf(" --maxqpt=QPT|-M QPT maximum commands queued per thread "
348 "(def:%d)\n", MAX_Q_PER_FD);
349 printf(" --mmap-io|-m mmap-ed IO (1 cmd outstanding per thread)\n");
350 printf(" --noxfer|-N no data xfer (def: xfer on READ and "
351 "WRITE)\n");
352 printf(" --numpt=NPT|-n NPT number of commands per thread "
353 "(def: %d)\n", DEF_NUM_PER_THREAD);
354 printf(" --override OVN|-O OVN override FAV=2 when OVN queue "
355 "depth\n"
356 " reached (def: 0 -> no override)\n");
357 printf(" --pack-id|-p set FORCE_PACK_ID, pack-id input to "
358 "read/finish\n");
359 printf(" --qat=AT|-q AT AT=0: q_at_head; AT=1: q_at_tail (def: "
360 "(drv): head)\n");
361 printf(" --qfav=FAV|-Q FAV FAV=0: favour completions (smaller q),\n"
362 " FAV=1: medium,\n"
363 " FAV=2: favour submissions (larger q, "
364 "default)\n");
365 printf(" --read|-R do READs (def: TUR)\n");
366 printf(" --stats|-S show more statistics on completion\n");
367 printf(" --submit|-u use SG_IOSUBMIT+SG_IORECEIVE instead of "
368 "write+read\n");
369 printf(" --szlb=LB[,NLBS]| LB is logical block size (def: 512)\n");
370 printf(" -s LB[,NLBS] NLBS is number of logical blocks (def: "
371 "1)\n");
372 printf(" --tnum=NT|-t NT number of threads (def: %d)\n",
373 DEF_NUM_THREADS);
374 printf(" --tur|-T do TEST UNIT READYs (default is TURs)\n");
375 printf(" --v3|-3 use sg v3 interface (def: v3 if driver < "
376 "3.9)\n");
377 printf(" --v4|-4 use sg v4 interface (def if v4 driver). Sets "
378 "--submit\n");
379 printf(" --verbose|-v increase verbosity\n");
380 printf(" --version|-V print version number then exit\n");
381 printf(" --wait=MS|-w MS >0: poll(<wait_ms>); =0: poll(0); (def: "
382 "%d)\n", DEF_WAIT_MS);
383 printf(" --write|-W do WRITEs (def: TUR)\n\n");
384 printf("Multiple threads send READ(16), WRITE(16) or TEST UNIT READY "
385 "(TUR) SCSI\ncommands. There can be 1 or more <sg_disk_device>s "
386 "and each thread takes\nthe next in a round robin fashion. "
387 "Each thread queues up to NT commands.\nOne block is transferred "
388 "by each READ and WRITE; zeros are written. If a\nlogical block "
389 "range is given, a uniform distribution generates a pseudo\n"
390 "random sequence of LBAs. Set environment variable\n"
391 "SG3_UTILS_LINUX_NANO to get command timings in nanoseconds\n");
392 }
393
394 #ifdef __GNUC__
395 static int pr2serr_lk(const char * fmt, ...)
396 __attribute__ ((format (printf, 1, 2)));
397 static void pr_errno_lk(int e_no, const char * fmt, ...)
398 __attribute__ ((format (printf, 2, 3)));
399 #else
400 static int pr2serr_lk(const char * fmt, ...);
401 static void pr_errno_lk(int e_no, const char * fmt, ...);
402 #endif
403
404
405 static int
pr2serr_lk(const char * fmt,...)406 pr2serr_lk(const char * fmt, ...)
407 {
408 int n;
409 va_list args;
410 lock_guard<mutex> lg(console_mutex);
411
412 va_start(args, fmt);
413 n = vfprintf(stderr, fmt, args);
414 va_end(args);
415 return n;
416 }
417
418 static void
pr_errno_lk(int e_no,const char * fmt,...)419 pr_errno_lk(int e_no, const char * fmt, ...)
420 {
421 char b[160];
422 va_list args;
423 lock_guard<mutex> lg(console_mutex);
424
425 va_start(args, fmt);
426 vsnprintf(b, sizeof(b), fmt, args);
427 fprintf(stderr, "%s: %s\n", b, strerror(e_no));
428 va_end(args);
429 }
430
431 static unsigned int
get_urandom_uint(void)432 get_urandom_uint(void)
433 {
434 unsigned int res = 0;
435 lock_guard<mutex> lg(rand_lba_mutex);
436
437 int fd = open(URANDOM_DEV, O_RDONLY);
438 if (fd >= 0) {
439 uint8_t b[sizeof(unsigned int)];
440 int n = read(fd, b, sizeof(unsigned int));
441
442 if (sizeof(unsigned int) == n)
443 memcpy(&res, b, sizeof(unsigned int));
444 close(fd);
445 }
446 return res;
447 }
448
449 #define TUR_CMD_LEN 6
450 #define READ16_CMD_LEN 16
451 #define READ16_REPLY_LEN 4096
452 #define WRITE16_REPLY_LEN 4096
453 #define WRITE16_CMD_LEN 16
454
455 /* Returns 0 if command injected okay, return -1 for error and 2 for
456 * not done due to queue data size limit struck. */
457 static int
start_sg3_cmd(int sg_fd,command2execute cmd2exe,int pack_id,uint64_t lba,uint8_t * lbp,int xfer_bytes,int flags,bool submit,unsigned int & enomem,unsigned int & eagains,unsigned int & ebusy,unsigned int & e2big,unsigned int & edom)458 start_sg3_cmd(int sg_fd, command2execute cmd2exe, int pack_id, uint64_t lba,
459 uint8_t * lbp, int xfer_bytes, int flags, bool submit,
460 unsigned int & enomem, unsigned int & eagains,
461 unsigned int & ebusy, unsigned int & e2big, unsigned int & edom)
462 {
463 struct sg_io_hdr pt;
464 struct sg_io_v4 p4t;
465 uint8_t turCmdBlk[TUR_CMD_LEN] = {0, 0, 0, 0, 0, 0};
466 uint8_t r16CmdBlk[READ16_CMD_LEN] =
467 {0x88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
468 uint8_t w16CmdBlk[WRITE16_CMD_LEN] =
469 {0x8a, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
470 uint8_t sense_buffer[64] SG_C_CPP_ZERO_INIT;
471 const char * np = NULL;
472 struct sg_io_hdr * ptp;
473
474 if (submit) { /* nest a v3 interface inside a store for v4 */
475 memset(&p4t, 0, sizeof(p4t));
476 ptp = (struct sg_io_hdr *)&p4t; /* p4t is larger than pt */
477 } else {
478 ptp = &pt;
479 memset(ptp, 0, sizeof(*ptp));
480 }
481 switch (cmd2exe) {
482 case SCSI_TUR:
483 np = "TEST UNIT READY";
484 ptp->cmdp = turCmdBlk;
485 ptp->cmd_len = sizeof(turCmdBlk);
486 ptp->dxfer_direction = SG_DXFER_NONE;
487 break;
488 case SCSI_READ16:
489 np = "READ(16)";
490 if (lba > 0xffffffff)
491 sg_put_unaligned_be32(lba >> 32, &r16CmdBlk[2]);
492 sg_put_unaligned_be32(lba & 0xffffffff, &r16CmdBlk[6]);
493 ptp->cmdp = r16CmdBlk;
494 ptp->cmd_len = sizeof(r16CmdBlk);
495 ptp->dxfer_direction = SG_DXFER_FROM_DEV;
496 ptp->dxferp = lbp;
497 ptp->dxfer_len = xfer_bytes;
498 break;
499 case SCSI_WRITE16:
500 np = "WRITE(16)";
501 if (lba > 0xffffffff)
502 sg_put_unaligned_be32(lba >> 32, &w16CmdBlk[2]);
503 sg_put_unaligned_be32(lba & 0xffffffff, &w16CmdBlk[6]);
504 ptp->cmdp = w16CmdBlk;
505 ptp->cmd_len = sizeof(w16CmdBlk);
506 ptp->dxfer_direction = SG_DXFER_TO_DEV;
507 ptp->dxferp = lbp;
508 ptp->dxfer_len = xfer_bytes;
509 break;
510 }
511 ptp->interface_id = 'S';
512 ptp->mx_sb_len = sizeof(sense_buffer);
513 ptp->sbp = sense_buffer; /* ignored .... */
514 ptp->timeout = DEF_TIMEOUT_MS;
515 ptp->pack_id = pack_id;
516 ptp->flags = flags;
517
518 for (int k = 0;
519 (submit ? ioctl(sg_fd, SG_IOSUBMIT_V3, ptp) :
520 write(sg_fd, ptp, sizeof(*ptp)) < 0);
521 ++k) {
522 if ((ENOMEM == errno) && (k < MAX_CONSEC_NOMEMS)) {
523 ++enomem;
524 this_thread::yield();
525 continue;
526 } else if (EAGAIN == errno) {
527 ++eagains;
528 this_thread::yield();
529 continue;
530 } else if (EBUSY == errno) {
531 ++ebusy;
532 this_thread::yield();
533 continue;
534 } else if (E2BIG == errno) {
535 ++e2big;
536 return 2;
537 } else if (EDOM == errno)
538 ++edom;
539 else if (ENOMEM == errno)
540 pr_rusage(-1);
541 pr_errno_lk(errno, "%s: %s, pack_id=%d", __func__, np, pack_id);
542 return -1;
543 }
544 return 0;
545 }
546
547 static int
finish_sg3_cmd(int sg_fd,command2execute cmd2exe,int & pack_id,bool receive,int wait_ms,unsigned int & enomem,unsigned int & eagains,unsigned int & ebusys,unsigned int & nanosecs)548 finish_sg3_cmd(int sg_fd, command2execute cmd2exe, int & pack_id,
549 bool receive, int wait_ms, unsigned int & enomem,
550 unsigned int & eagains, unsigned int & ebusys,
551 unsigned int & nanosecs)
552 {
553 bool ok;
554 int res, k;
555 uint8_t sense_buffer[64] SG_C_CPP_ZERO_INIT;
556 const char * np = NULL;
557 struct sg_io_hdr pt;
558 struct sg_io_hdr * ptp;
559 struct sg_io_v4 p4t;
560
561 if (receive) { /* nest a v3 interface inside a store for v4 */
562 memset(&p4t, 0, sizeof(p4t));
563 ptp = (struct sg_io_hdr *)&p4t; /* p4t is larger than pt */
564 } else {
565 ptp = &pt;
566 memset(ptp, 0, sizeof(*ptp));
567 }
568 switch (cmd2exe) {
569 case SCSI_TUR:
570 np = "TEST UNIT READY";
571 ptp->dxfer_direction = SG_DXFER_NONE;
572 break;
573 case SCSI_READ16:
574 np = "READ(16)";
575 ptp->dxfer_direction = SG_DXFER_FROM_DEV;
576 break;
577 case SCSI_WRITE16:
578 np = "WRITE(16)";
579 ptp->dxfer_direction = SG_DXFER_TO_DEV;
580 break;
581 }
582 ptp->interface_id = 'S';
583 ptp->mx_sb_len = sizeof(sense_buffer);
584 ptp->sbp = sense_buffer;
585 ptp->timeout = DEF_TIMEOUT_MS;
586 /* if SG_SET_FORCE_PACK_ID, then need to set ptp->dxfer_direction */
587 ptp->pack_id = pack_id;
588
589 k = 0;
590 while ((((res = receive ? ioctl(sg_fd, SG_IORECEIVE_V3, ptp) :
591 read(sg_fd, ptp, sizeof(*ptp)))) < 0) &&
592 ((EAGAIN == errno) || (EBUSY == errno) || (ENOMEM == errno))) {
593 if (ENOMEM == errno)
594 ++enomem;
595 else if (EAGAIN == errno)
596 ++eagains;
597 else
598 ++ebusys;
599 ++k;
600 if (k > 10000) {
601 pr2serr_lk("%s: sg_fd=%d: after %d EAGAINs, unable to find "
602 "pack_id=%d\n", __func__, sg_fd, k, pack_id);
603 return -1; /* crash out */
604 }
605 if (wait_ms > 0)
606 this_thread::sleep_for(milliseconds{wait_ms});
607 else if (0 == wait_ms)
608 this_thread::yield();
609 else if (-2 == wait_ms)
610 sleep(0); // process yield ??
611 }
612 if (res < 0) {
613 if (ENOMEM == errno)
614 pr_rusage(-1);
615 pr_errno_lk(errno, "%s: %s", __func__, np);
616 return -1;
617 }
618 /* now for the error processing */
619 pack_id = ptp->pack_id;
620 ok = false;
621 switch (sg_err_category3(ptp)) {
622 case SG_LIB_CAT_CLEAN:
623 ok = true;
624 break;
625 case SG_LIB_CAT_RECOVERED:
626 pr2serr_lk("%s: Recovered error on %s, continuing\n", __func__, np);
627 ok = true;
628 break;
629 default: /* won't bother decoding other categories */
630 {
631 lock_guard<mutex> lg(console_mutex);
632 sg_chk_n_print3(np, ptp, 1);
633 }
634 break;
635 }
636 if (ok)
637 nanosecs = ptp->duration;
638 return ok ? 0 : -1;
639 }
640
641 /* Returns 0 if command injected okay, return -1 for error and 2 for
642 * not done due to queue data size limit struck. */
643 static int
start_sg4_cmd(int sg_fd,command2execute cmd2exe,int pack_id,uint64_t lba,uint8_t * lbp,int xfer_bytes,int flags,bool submit,unsigned int & enomem,unsigned int & eagains,unsigned int & ebusy,unsigned int & e2big,unsigned int & edom)644 start_sg4_cmd(int sg_fd, command2execute cmd2exe, int pack_id, uint64_t lba,
645 uint8_t * lbp, int xfer_bytes, int flags, bool submit,
646 unsigned int & enomem, unsigned int & eagains,
647 unsigned int & ebusy, unsigned int & e2big, unsigned int & edom)
648 {
649 struct sg_io_v4 p4t;
650 uint8_t turCmdBlk[TUR_CMD_LEN] = {0, 0, 0, 0, 0, 0};
651 uint8_t r16CmdBlk[READ16_CMD_LEN] =
652 {0x88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
653 uint8_t w16CmdBlk[WRITE16_CMD_LEN] =
654 {0x8a, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
655 uint8_t sense_buffer[64] SG_C_CPP_ZERO_INIT;
656 const char * np = NULL;
657 struct sg_io_v4 * ptp;
658
659 if (! submit) {
660 pr2serr_lk("%s: logic error, submit must be true, isn't\n", __func__);
661 return -1;
662 }
663 ptp = &p4t;
664 memset(ptp, 0, sizeof(*ptp));
665 switch (cmd2exe) {
666 case SCSI_TUR:
667 np = "TEST UNIT READY";
668 ptp->request = (uint64_t)turCmdBlk;
669 ptp->request_len = sizeof(turCmdBlk);
670 break;
671 case SCSI_READ16:
672 np = "READ(16)";
673 if (lba > 0xffffffff)
674 sg_put_unaligned_be32(lba >> 32, &r16CmdBlk[2]);
675 sg_put_unaligned_be32(lba & 0xffffffff, &r16CmdBlk[6]);
676 ptp->request = (uint64_t)r16CmdBlk;
677 ptp->request_len = sizeof(r16CmdBlk);
678 ptp->din_xferp = (uint64_t)lbp;
679 ptp->din_xfer_len = xfer_bytes;
680 break;
681 case SCSI_WRITE16:
682 np = "WRITE(16)";
683 if (lba > 0xffffffff)
684 sg_put_unaligned_be32(lba >> 32, &w16CmdBlk[2]);
685 sg_put_unaligned_be32(lba & 0xffffffff, &w16CmdBlk[6]);
686 ptp->request = (uint64_t)w16CmdBlk;
687 ptp->request_len = sizeof(w16CmdBlk);
688 ptp->dout_xferp = (uint64_t)lbp;
689 ptp->dout_xfer_len = xfer_bytes;
690 break;
691 }
692 ptp->guard = 'Q';
693 ptp->max_response_len = sizeof(sense_buffer);
694 ptp->response = (uint64_t)sense_buffer; /* ignored .... */
695 ptp->timeout = DEF_TIMEOUT_MS;
696 ptp->request_extra = pack_id;
697 ptp->flags = flags;
698
699 for (int k = 0; ioctl(sg_fd, SG_IOSUBMIT, ptp) < 0; ++k) {
700 if ((ENOMEM == errno) && (k < MAX_CONSEC_NOMEMS)) {
701 ++enomem;
702 this_thread::yield();
703 continue;
704 } else if (EAGAIN == errno) {
705 ++eagains;
706 this_thread::yield();
707 continue;
708 } else if (EBUSY == errno) {
709 ++ebusy;
710 this_thread::yield();
711 continue;
712 } else if (E2BIG == errno) {
713 ++e2big;
714 return 2;
715 } else if (EDOM == errno)
716 ++edom;
717 else if (ENOMEM == errno)
718 pr_rusage(-1);
719 pr_errno_lk(errno, "%s: %s, pack_id=%d", __func__, np, pack_id);
720 return -1;
721 }
722 return 0;
723 }
724
725 static int
finish_sg4_cmd(int sg_fd,command2execute cmd2exe,int & pack_id,bool receive,int wait_ms,unsigned int & enomem,unsigned int & eagains,unsigned int & ebusys,unsigned int & nanosecs)726 finish_sg4_cmd(int sg_fd, command2execute cmd2exe, int & pack_id,
727 bool receive, int wait_ms, unsigned int & enomem,
728 unsigned int & eagains, unsigned int & ebusys,
729 unsigned int & nanosecs)
730 {
731 bool ok;
732 int res, k;
733 uint8_t sense_buffer[64] SG_C_CPP_ZERO_INIT;
734 const char * np = NULL;
735 struct sg_io_v4 * ptp;
736 struct sg_io_v4 p4t;
737
738 if (! receive) {
739 pr2serr_lk("%s: logic error, receive must be true, isn't\n",
740 __func__);
741 return -1;
742 }
743 ptp = &p4t;
744 memset(ptp, 0, sizeof(*ptp));
745 switch (cmd2exe) {
746 case SCSI_TUR:
747 np = "TEST UNIT READY";
748 break;
749 case SCSI_READ16:
750 np = "READ(16)";
751 break;
752 case SCSI_WRITE16:
753 np = "WRITE(16)";
754 break;
755 }
756 ptp->guard = 'Q';
757 ptp->max_response_len = sizeof(sense_buffer);
758 ptp->response = (uint64_t)sense_buffer;
759 ptp->timeout = DEF_TIMEOUT_MS;
760 /* if SG_SET_FORCE_PACK_ID, then need to set ptp->dxfer_direction */
761 ptp->request_extra = pack_id;
762
763 k = 0;
764 while ((((res = ioctl(sg_fd, SG_IORECEIVE, ptp))) < 0) &&
765 ((EAGAIN == errno) || (EBUSY == errno))) {
766 if (EAGAIN == errno)
767 ++eagains;
768 else
769 ++ebusys;
770 ++k;
771 if (k > 10000) {
772 pr2serr_lk("%s: sg_fd=%d: after %d EAGAINs, unable to find "
773 "pack_id=%d\n", __func__, sg_fd, k, pack_id);
774 return -1; /* crash out */
775 }
776 if (wait_ms > 0)
777 this_thread::sleep_for(milliseconds{wait_ms});
778 else if (0 == wait_ms)
779 this_thread::yield();
780 else if (-2 == wait_ms)
781 sleep(0); // process yield ??
782 }
783 if (res < 0) {
784 if (ENOMEM == errno) {
785 ++enomem;
786 pr_rusage(-1);
787 }
788 pr_errno_lk(errno, "%s: %s", __func__, np);
789 return -1;
790 }
791 /* now for the error processing */
792 pack_id = ptp->request_extra;
793 ok = false;
794 res = sg_err_category_new(ptp->device_status, ptp->transport_status,
795 ptp->driver_status,
796 (const uint8_t *)ptp->response,
797 ptp->response_len);
798 switch (res) {
799 case SG_LIB_CAT_CLEAN:
800 ok = true;
801 break;
802 case SG_LIB_CAT_RECOVERED:
803 pr2serr_lk("%s: Recovered error on %s, continuing\n", __func__, np);
804 ok = true;
805 break;
806 default: /* won't bother decoding other categories */
807 {
808 lock_guard<mutex> lg(console_mutex);
809
810 sg_linux_sense_print(np, ptp->device_status,
811 ptp->transport_status,
812 ptp->driver_status,
813 (const uint8_t *)ptp->response,
814 ptp->response_len, true);
815 }
816 break;
817 }
818 if (ok)
819 nanosecs = ptp->duration;
820 return ok ? 0 : -1;
821 }
822
823 static int
num_submitted(int sg_fd)824 num_submitted(int sg_fd)
825 {
826 uint32_t num_subm_wait = 0;
827 struct sg_extended_info sei;
828 struct sg_extended_info *seip = &sei;
829 const char * err = NULL;
830
831 memset(seip, 0, sizeof(*seip));
832 seip->sei_wr_mask |= SG_SEIM_READ_VAL;
833 seip->sei_rd_mask |= SG_SEIM_READ_VAL;
834 seip->read_value = SG_SEIRV_SUBMITTED;
835 if (ioctl(sg_fd, SG_SET_GET_EXTENDED, seip) < 0)
836 err = "ioctl(SG_SET_GET_EXTENDED) failed\n";
837 else
838 num_subm_wait = seip->read_value;
839 if (err)
840 pr2serr_lk("%s: %s, errno=%d\n", __func__, err, errno);
841 return err ? -1 : (int)num_subm_wait;
842 }
843
844 static int
pr_rusage(int id)845 pr_rusage(int id)
846 {
847 int res;
848 struct rusage ru;
849
850 res = getrusage(RUSAGE_SELF /* RUSAGE_THREAD */, &ru);
851 if (res < 0) {
852 pr2serr_lk("%d->id: %s: getrusage() failed, errno=%d\n", id,
853 __func__, errno);
854 return res;
855 }
856 pr2serr_lk("%d->id: maxrss=%ldKB nvcsw=%ld nivcsw=%ld majflt=%ld\n", id,
857 ru.ru_maxrss, ru.ru_nvcsw, ru.ru_nivcsw, ru.ru_majflt);
858 return 0;
859 }
860
861 static void
work_sync_thread(int id,const char * dev_name,unsigned int,struct opts_t * op)862 work_sync_thread(int id, const char * dev_name, unsigned int /* hi_lba */,
863 struct opts_t * op)
864 {
865 bool is_rw = (SCSI_TUR != op->c2e);
866 int k, sg_fd, err, rs, n, sense_cat, ret;
867 int vb = op->verbose;
868 int num_errs = 0;
869 int thr_sync_starts = 0;
870 struct sg_pt_base * ptp = NULL;
871 uint8_t cdb[6];
872 uint8_t sense_b[32] SG_C_CPP_ZERO_INIT;
873 char b[120];
874
875 if (is_rw) {
876 pr2serr_lk("id=%d: only support TUR here for now\n", id);
877 goto err_out;
878 }
879 if (op->verbose)
880 pr2serr_lk("id=%d: using libsgutils generic sync passthrough\n", id);
881
882 if ((sg_fd = sg_cmds_open_device(dev_name, false /* ro */, vb)) < 0) {
883 pr2serr_lk("id=%d: error opening file: %s: %s\n", id, dev_name,
884 safe_strerror(-sg_fd));
885 if (ENOMEM == -sg_fd)
886 pr_rusage(id);
887 goto err_out;
888 }
889 if (vb > 2)
890 pr2serr_lk(">>>> id=%d: open(%s) --> fd=%d\n", id, dev_name, sg_fd);
891
892 ptp = construct_scsi_pt_obj_with_fd(sg_fd, vb);
893 err = 0;
894 if ((NULL == ptp) || ((err = get_scsi_pt_os_err(ptp)))) {
895 ret = sg_convert_errno(err ? err : ENOMEM);
896 sg_exit2str(ret, true, sizeof(b), b);
897 pr2serr_lk("id=%d: construct_scsi_pt_obj_with_fd: %s\n", id, b);
898 goto err_out;
899 }
900 for (k = 0; k < op->num_per_thread; ++k) {
901 /* Might get Unit Attention on first invocation */
902 memset(cdb, 0, sizeof(cdb)); /* TUR's cdb is 6 zeros */
903 set_scsi_pt_cdb(ptp, cdb, sizeof(cdb));
904 set_scsi_pt_sense(ptp, sense_b, sizeof(sense_b));
905 set_scsi_pt_packet_id(ptp, uniq_pack_id.fetch_add(1));
906 ++thr_sync_starts;
907 rs = do_scsi_pt(ptp, -1, DEF_PT_TIMEOUT, vb);
908 n = sg_cmds_process_resp(ptp, "Test unit ready", rs,
909 (0 == k), vb, &sense_cat);
910 if (-1 == n) {
911 ret = sg_convert_errno(get_scsi_pt_os_err(ptp));
912 sg_exit2str(ret, true, sizeof(b), b);
913 pr2serr_lk("id=%d: do_scsi_pt: %s\n", id, b);
914 goto err_out;
915 } else if (-2 == n) {
916 switch (sense_cat) {
917 case SG_LIB_CAT_RECOVERED:
918 case SG_LIB_CAT_NO_SENSE:
919 break;
920 case SG_LIB_CAT_NOT_READY:
921 ++num_errs;
922 if (1 == op->num_per_thread) {
923 pr2serr_lk("id=%d: device not ready\n", id);
924 }
925 break;
926 case SG_LIB_CAT_UNIT_ATTENTION:
927 ++num_errs;
928 if (vb)
929 pr2serr_lk("Ignoring Unit attention (sense key)\n");
930 break;
931 default:
932 ++num_errs;
933 if (1 == op->num_per_thread) {
934 sg_get_category_sense_str(sense_cat, sizeof(b), b, vb);
935 pr2serr_lk("%s\n", b);
936 goto err_out;
937 }
938 break;
939 }
940 }
941 clear_scsi_pt_obj(ptp);
942 }
943 err_out:
944 if (ptp)
945 destruct_scsi_pt_obj(ptp);
946 if (num_errs > 0)
947 pr2serr_lk("id=%d: number of errors: %d\n", id, num_errs);
948 sync_starts += thr_sync_starts;
949 }
950
951 static void
work_thread(int id,struct opts_t * op)952 work_thread(int id, struct opts_t * op)
953 {
954 bool is_rw = (SCSI_TUR != op->c2e);
955 bool need_finish, repeat;
956 bool once = false;
957 bool once1000 = false;
958 bool once_2000 = false;
959 bool once_4000 = false;
960 bool once5000 = false;
961 bool once_6000 = false;
962 bool once_7000 = false;
963 bool once10_000 = false;
964 bool once20_000 = false;
965 int open_flags = O_RDWR;
966 int thr_async_starts = 0;
967 int thr_async_finishes = 0;
968 int vb = op->verbose;
969 int k, n, res, sg_fd, num_outstanding, do_inc, npt, pack_id, sg_flags;
970 int num_waiting_read, sz, encore_pack_id, ask, j, m, o;
971 int prev_pack_id, blk_sz;
972 unsigned int thr_enomem_count = 0;
973 unsigned int thr_start_eagain_count = 0;
974 unsigned int thr_start_ebusy_count = 0;
975 unsigned int thr_start_e2big_count = 0;
976 unsigned int thr_fin_eagain_count = 0;
977 unsigned int thr_fin_ebusy_count = 0;
978 unsigned int thr_start_edom_count = 0;
979 int needed_sz = op->lb_sz * op->num_lbs;
980 unsigned int nanosecs;
981 unsigned int hi_lba;
982 uint64_t lba;
983 uint64_t sum_nanosecs = 0;
984 uint8_t * lbp;
985 uint8_t * free_lbp = NULL;
986 uint8_t * wrkMmap = NULL;
987 const char * dev_name;
988 const char * err = NULL;
989 Rand_uint * ruip = NULL;
990 char ebuff[EBUFF_SZ];
991 struct pollfd pfd[1];
992 list<pair<uint8_t *, uint8_t *> > free_lst; /* of aligned lb buffers */
993 map<int, pair<uint8_t *, uint8_t *> > pi2buff;/* pack_id -> lb buffer */
994 map<int, uint64_t> pi_2_lba; /* pack_id -> LBA */
995 pair<uint8_t *, uint8_t *> encore_lbps;
996
997 /* device name and hi_lba may depend on id */
998 n = op->dev_names.size();
999 dev_name = op->dev_names[id % n];
1000 if (op->blk_szs.size() >= (unsigned)n)
1001 blk_sz = op->blk_szs[id % n];
1002 else
1003 blk_sz = DEF_LB_SZ;
1004 if ((UINT_MAX == op->hi_lba) && (n == (int)op->hi_lbas.size()))
1005 hi_lba = op->hi_lbas[id % n];
1006 else
1007 hi_lba = op->hi_lba;
1008
1009 if (vb) {
1010 if ((vb > 1) && hi_lba)
1011 pr2serr_lk("Enter work_t_id=%d using %s\n"
1012 " LBA range: 0x%x to 0x%x (inclusive)\n",
1013 id, dev_name, (unsigned int)op->lba, hi_lba);
1014 else
1015 pr2serr_lk("Enter work_t_id=%d using %s\n", id, dev_name);
1016 }
1017 if (op->generic_sync) {
1018 work_sync_thread(id, dev_name, hi_lba, op);
1019 return;
1020 }
1021 if (! op->block)
1022 open_flags |= O_NONBLOCK;
1023
1024 sg_fd = open(dev_name, open_flags);
1025 if (sg_fd < 0) {
1026 pr_errno_lk(errno, "%s: id=%d, error opening file: %s", __func__, id,
1027 dev_name);
1028 if (ENOMEM == -sg_fd)
1029 pr_rusage(id);
1030 return;
1031 }
1032 if (vb > 2)
1033 pr2serr_lk(">>>> id=%d: open(%s) --> fd=%d\n", id, dev_name, sg_fd);
1034 if (op->pack_id_force) {
1035 k = 1;
1036 if (ioctl(sg_fd, SG_SET_FORCE_PACK_ID, &k) < 0)
1037 pr2serr_lk("ioctl(SG_SET_FORCE_PACK_ID) failed, errno=%d %s\n",
1038 errno, strerror(errno));
1039 }
1040 if (op->sg_vn_ge_40000) {
1041 if (ioctl(sg_fd, SG_GET_RESERVED_SIZE, &k) >= 0) {
1042 if (needed_sz > k)
1043 ioctl(sg_fd, SG_SET_RESERVED_SIZE, &needed_sz);
1044 }
1045 if (op->sg_vn_ge_40030 && (op->cmd_time || op->masync)) {
1046 struct sg_extended_info sei;
1047 struct sg_extended_info * seip;
1048
1049 seip = &sei;
1050 memset(seip, 0, sizeof(*seip));
1051 seip->sei_wr_mask |= SG_SEIM_CTL_FLAGS;
1052 seip->sei_rd_mask |= SG_SEIM_CTL_FLAGS;
1053 if (op->cmd_time) {
1054 seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_TIME_IN_NS;
1055 seip->ctl_flags_rd_mask |= SG_CTL_FLAGM_TIME_IN_NS;
1056 seip->ctl_flags |= SG_CTL_FLAGM_TIME_IN_NS;
1057 }
1058 if (op->masync) {
1059 seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_MORE_ASYNC;
1060 seip->ctl_flags |= SG_CTL_FLAGM_MORE_ASYNC;
1061 }
1062 if (op->excl) {
1063 seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_EXCL_WAITQ;
1064 seip->ctl_flags |= SG_CTL_FLAGM_EXCL_WAITQ;
1065 }
1066 if (ioctl(sg_fd, SG_SET_GET_EXTENDED, seip) < 0) {
1067 pr2serr_lk("ioctl(EXTENDED(TIME_IN_NS)) failed, errno=%d %s\n",
1068 errno, strerror(errno));
1069 }
1070 if (op->cmd_time &&
1071 (! (SG_CTL_FLAGM_TIME_IN_NS & seip->ctl_flags))) {
1072 memset(seip, 0, sizeof(*seip));
1073 seip->sei_rd_mask |= SG_SEIM_CTL_FLAGS;
1074 seip->sei_wr_mask |= SG_SEIM_CTL_FLAGS;
1075 seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_TIME_IN_NS;
1076 seip->ctl_flags |= SG_CTL_FLAGM_TIME_IN_NS;
1077 if (ioctl(sg_fd, SG_SET_GET_EXTENDED, seip) < 0)
1078 pr2serr_lk("ioctl(EXTENDED(TIME_IN_NS)) failed, "
1079 "errno=%d %s\n", errno, strerror(errno));
1080 else if (vb > 1)
1081 pr2serr_lk("t_id: %d: set TIME_IN_NS flag\n", id);
1082 }
1083 }
1084 }
1085 if (is_rw && op->mmap_io) {
1086
1087 if (ioctl(sg_fd, SG_GET_RESERVED_SIZE, &sz) < 0) {
1088 pr2serr_lk("t_id=%d: ioctl(SG_GET_RESERVED_SIZE) errno=%d\n",
1089 id, errno);
1090 return;
1091 }
1092 if (sz < needed_sz) {
1093 sz = needed_sz;
1094 if (ioctl(sg_fd, SG_SET_RESERVED_SIZE, &sz) < 0) {
1095 pr2serr_lk("t_id=%d: ioctl(SG_SET_RESERVED_SIZE) errno=%d\n",
1096 id, errno);
1097 return;
1098 }
1099 if (ioctl(sg_fd, SG_GET_RESERVED_SIZE, &sz) < 0) {
1100 pr2serr_lk("t_id=%d: ioctl(SG_GET_RESERVED_SIZE) errno=%d\n",
1101 id, errno);
1102 return;
1103 }
1104 if (sz < needed_sz) {
1105 pr2serr_lk("t_id=%d: unable to grow reserve buffer to %d "
1106 "bytes\n", id, needed_sz);
1107 return;
1108 }
1109 }
1110 wrkMmap = (uint8_t *)mmap(NULL, needed_sz, PROT_READ | PROT_WRITE,
1111 MAP_SHARED, sg_fd, 0);
1112 if (MAP_FAILED == wrkMmap) {
1113 int ern = errno;
1114
1115 pr2serr_lk("t_id=%d: mmap() failed, errno=%d\n", id, ern);
1116 return;
1117 }
1118 }
1119 pfd[0].fd = sg_fd;
1120 pfd[0].events = POLLIN;
1121 if (is_rw && hi_lba) {
1122 unsigned int seed = get_urandom_uint();
1123
1124 if (vb > 1)
1125 pr2serr_lk(" id=%d, /dev/urandom seed=0x%x\n", id, seed);
1126 ruip = new Rand_uint((unsigned int)op->lba, hi_lba, seed);
1127 }
1128
1129 sg_flags = 0;
1130 if (BLQ_AT_TAIL == op->blqd)
1131 sg_flags |= SG_FLAG_Q_AT_TAIL;
1132 else if (BLQ_AT_HEAD == op->blqd)
1133 sg_flags |= SG_FLAG_Q_AT_HEAD;
1134 if (op->direct)
1135 sg_flags |= SG_FLAG_DIRECT_IO;
1136 if (op->mmap_io)
1137 sg_flags |= SG_FLAG_MMAP_IO;
1138 if (op->no_xfer)
1139 sg_flags |= SG_FLAG_NO_DXFER;
1140 if (vb > 1)
1141 pr2serr_lk(" id=%d, sg_flags=0x%x, %s cmds\n", id, sg_flags,
1142 ((SCSI_TUR == op->c2e) ? "TUR":
1143 ((SCSI_READ16 == op->c2e) ? "READ" : "WRITE")));
1144
1145 npt = op->num_per_thread;
1146 need_finish = false;
1147 lba = 0;
1148 pack_id = 0;
1149 prev_pack_id = 0;
1150 encore_pack_id = 0;
1151 do_inc = 0;
1152 /* main loop, continues until num_per_thread exhausted and there are
1153 * no more outstanding responses */
1154 for (k = 0, m = 0, o=0, num_outstanding = 0; (k < npt) || num_outstanding;
1155 k = do_inc ? k + 1 : k, ++o) {
1156 int num_to_read = 0;
1157
1158 if (do_inc)
1159 m = 0;
1160 else {
1161 ++m;
1162 if (m > 100) {
1163 if (vb)
1164 pr2serr_lk("%d->id: no main loop inc =%d times\n", id, m);
1165 m = 0;
1166 }
1167 }
1168 if (vb && (! once1000) && (num_outstanding >= 1000)) {
1169 int num_waiting;
1170 int num_subm = (op->sg_vn_ge_40030) ? num_submitted(sg_fd) :
1171 pi2buff.size();
1172
1173 once1000 = true;
1174 if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting) < 0) {
1175 err = "ioctl(SG_GET_NUM_WAITING) failed";
1176 break;
1177 }
1178 pr2serr_lk("%d->id: once 1000: k=%d, submitted=%d waiting=%d; "
1179 "pi2buff.sz=%u\n", id, k, num_subm, num_waiting,
1180 (uint32_t)pi2buff.size());
1181 pr_rusage(id);
1182 }
1183 if (vb && ! once5000 && num_outstanding >= 5000) {
1184 int num_waiting;
1185 int num_subm = (op->sg_vn_ge_40030) ? num_submitted(sg_fd) :
1186 pi2buff.size();
1187
1188 once5000 = true;
1189 if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting) < 0) {
1190 err = "ioctl(SG_GET_NUM_WAITING) failed";
1191 break;
1192 }
1193 pr2serr_lk("%d->id: once 5000: k=%d, submitted=%d waiting=%d\n",
1194 id, k, num_subm, num_waiting);
1195 pr_rusage(id);
1196 }
1197 if (vb && ! once_7000 && num_outstanding >= 7000) {
1198 int num_waiting;
1199 int num_subm = (op->sg_vn_ge_40030) ? num_submitted(sg_fd) :
1200 pi2buff.size();
1201
1202 once_7000 = true;
1203 if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting) < 0) {
1204 err = "ioctl(SG_GET_NUM_WAITING) failed";
1205 break;
1206 }
1207 pr2serr_lk("%d->id: once 7000: k=%d, submitted=%d waiting=%d\n",
1208 id, k, num_subm, num_waiting);
1209 pr_rusage(id);
1210 }
1211 if (vb && ! once10_000 && num_outstanding >= 10000) {
1212 int num_waiting;
1213 int num_subm = (op->sg_vn_ge_40030) ? num_submitted(sg_fd) :
1214 pi2buff.size();
1215
1216 once10_000 = true;
1217 if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting) < 0) {
1218 err = "ioctl(SG_GET_NUM_WAITING) failed";
1219 break;
1220 }
1221 pr2serr_lk("%d->id: once 10^4: k=%d, submitted=%d waiting=%d\n",
1222 id, k, num_subm, num_waiting);
1223 pr_rusage(id);
1224 }
1225 if (vb && ! once20_000 && num_outstanding >= 20000) {
1226 int num_waiting;
1227 int num_subm = (op->sg_vn_ge_40030) ? num_submitted(sg_fd) :
1228 pi2buff.size();
1229
1230 once20_000 = true;
1231 if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting) < 0) {
1232 err = "ioctl(SG_GET_NUM_WAITING) failed";
1233 break;
1234 }
1235 pr2serr_lk("%d->id: once 20000: k=%d, submitted=%d waiting=%d\n",
1236 id, k, num_subm, num_waiting);
1237 pr_rusage(id);
1238 }
1239 do_inc = 0;
1240 if ((num_outstanding < op->maxq_per_thread) && (k < npt)) {
1241 do_inc = 1;
1242 if (need_finish) {
1243 pack_id = encore_pack_id;
1244 need_finish = false;
1245 repeat = true;
1246 } else {
1247 prev_pack_id = pack_id;
1248 pack_id = uniq_pack_id.fetch_add(1);
1249 repeat = false;
1250 }
1251 if (is_rw) { /* get new lb buffer or one from free list */
1252 if (free_lst.empty()) {
1253 lbp = sg_memalign(op->lb_sz * op->num_lbs, 0, &free_lbp,
1254 false);
1255 if (NULL == lbp) {
1256 err = "out of memory";
1257 break;
1258 }
1259 } else if (! repeat) {
1260 lbp = free_lst.back().first;
1261 free_lbp = free_lst.back().second;
1262 free_lst.pop_back();
1263 } else {
1264 lbp = encore_lbps.first;
1265 free_lbp = encore_lbps.second;
1266 if (vb && !once && free_lst.size() > 1000) {
1267 once = true;
1268 pr2serr_lk("%d->id: free_lst.size() over 1000\n", id);
1269 }
1270 if (vb && !once_2000 && free_lst.size() > 2000) {
1271 once_2000 = true;
1272 pr2serr_lk("%d->id: free_lst.size() over 2000\n", id);
1273 }
1274 if (vb && !once_6000 && free_lst.size() > 6000) {
1275 once_2000 = true;
1276 pr2serr_lk("%d->id: free_lst.size() over 6000\n", id);
1277 }
1278 }
1279 } else
1280 lbp = NULL;
1281 if (is_rw) {
1282 if (ruip) {
1283 if (! repeat) {
1284 lba = ruip->get(); /* fetch a random LBA */
1285 if (vb > 3)
1286 pr2serr_lk(" id=%d: start IO at lba=0x%" PRIx64
1287 "\n", id, lba);
1288 }
1289 } else
1290 lba = op->lba;
1291 } else
1292 lba = 0;
1293 if (vb > 4)
1294 pr2serr_lk("t_id=%d: starting pack_id=%d\n", id, pack_id);
1295 res = (op->v4) ?
1296 start_sg4_cmd(sg_fd, op->c2e, pack_id, lba, lbp,
1297 blk_sz * op->num_lbs, sg_flags, op->submit,
1298 thr_enomem_count, thr_start_eagain_count,
1299 thr_start_ebusy_count, thr_start_e2big_count,
1300 thr_start_edom_count) :
1301 start_sg3_cmd(sg_fd, op->c2e, pack_id, lba, lbp,
1302 blk_sz * op->num_lbs, sg_flags, op->submit,
1303 thr_enomem_count, thr_start_eagain_count,
1304 thr_start_ebusy_count, thr_start_e2big_count,
1305 thr_start_edom_count);
1306 if (res) {
1307 if (res > 1) { /* here if E2BIG, start not done, try finish */
1308 do_inc = 0;
1309 need_finish = true;
1310 encore_pack_id = pack_id;
1311 pack_id = prev_pack_id;
1312 encore_lbps = make_pair(lbp, free_lbp);
1313 if (vb > 2)
1314 pr2serr_lk("t_id=%d: E2BIG hit, prev_pack_id=%d, "
1315 "encore_pack_id=%d\n", id, prev_pack_id,
1316 encore_pack_id);
1317 } else {
1318 err = "start_sg3_cmd()";
1319 break;
1320 }
1321 } else { /* no error */
1322 ++thr_async_starts;
1323 ++num_outstanding;
1324 pi2buff[pack_id] = make_pair(lbp, free_lbp);
1325 if (ruip)
1326 pi_2_lba[pack_id] = lba;
1327 }
1328 if (vb && !once && (pi2buff.size() > 1000)) {
1329 once = true;
1330 pr2serr_lk("%d->id: pi2buff.size() over 1000 (b)\n", id);
1331 }
1332 if (vb && !once_2000 && free_lst.size() > 2000) {
1333 once_2000 = true;
1334 pr2serr_lk("%d->id: free_lst.size() over 2000 (b)\n", id);
1335 }
1336 if (vb && !once_6000 && free_lst.size() > 6000) {
1337 once_2000 = true;
1338 pr2serr_lk("%d->id: free_lst.size() over 6000 (b)\n", id);
1339 }
1340 }
1341 if (need_finish) {
1342 num_waiting_read = 0;
1343 if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting_read) < 0) {
1344 err = "ioctl(SG_GET_NUM_WAITING) failed";
1345 break;
1346 } else if (vb > 3)
1347 pr2serr_lk("t_id=%d: num_waiting_read=%d\n", id,
1348 num_waiting_read);
1349 if (num_waiting_read > 0)
1350 num_to_read = num_waiting_read;
1351 else {
1352 struct timespec tspec = {0, 100000 /* 100 usecs */};
1353
1354 nanosleep(&tspec, NULL);
1355 if (vb > 3)
1356 pr2serr_lk("t_id=%d: E2BIG, 100 usecs sleep\n", id);
1357 // err = "strange, E2BIG but nothing to read";
1358 // break;
1359 }
1360 } else if ((num_outstanding >= op->maxq_per_thread) || (k >= npt)) {
1361 /* full queue or finished injecting */
1362 num_waiting_read = 0;
1363 if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting_read) < 0) {
1364 err = "ioctl(SG_GET_NUM_WAITING) failed";
1365 break;
1366 }
1367 if (1 == num_waiting_read)
1368 num_to_read = num_waiting_read;
1369 else if (num_waiting_read > 0) {
1370 if (k >= npt)
1371 num_to_read = num_waiting_read;
1372 else {
1373 switch (op->myqd) {
1374 case MYQD_LOW:
1375 num_to_read = num_waiting_read;
1376 break;
1377 case MYQD_MEDIUM:
1378 num_to_read = num_waiting_read / 2;
1379 break;
1380 case MYQD_HIGH:
1381 default:
1382 if (op->ovn > 0) {
1383 if (op->sg_vn_ge_40030) {
1384 int num_subm = num_submitted(sg_fd);
1385
1386 if (num_subm > op->ovn) {
1387 num_to_read = num_waiting_read > 0 ?
1388 num_waiting_read : 1;
1389 break;
1390 }
1391 } else {
1392 if (num_waiting_read > (op->ovn / 2)) {
1393 num_to_read = num_waiting_read / 2;
1394 break;
1395 }
1396 }
1397 }
1398 num_to_read = 1;
1399 break;
1400 }
1401 }
1402 } else { /* nothing waiting to be read */
1403 if (op->sg_vn_ge_40030) {
1404 int val = num_submitted(sg_fd);
1405
1406 if (0 == val) {
1407 err = "nothing submitted now ??";
1408 break;
1409 } else if (val < 0) {
1410 err = "num_submitted failed";
1411 break;
1412 }
1413 }
1414 n = (op->wait_ms > 0) ? op->wait_ms : 0;
1415 if (n > 0) {
1416 for (j = 0; (j < 1000000) &&
1417 (0 == (res = poll(pfd, 1, n)));
1418 ++j)
1419 ;
1420 if (j >= 1000000) {
1421 err = "poll() looped 1 million times";
1422 break;
1423 }
1424 if (res < 0) {
1425 err = "poll(wait_ms) failed";
1426 break;
1427 }
1428 } else {
1429 struct timespec ts;
1430
1431 ts.tv_sec = 0;
1432 ts.tv_nsec = DEF_NANOSEC_WAIT;
1433 if (nanosleep(&ts, NULL) < 0) {
1434 err = "nanosleep() failed";
1435 break;
1436 }
1437 }
1438 }
1439 } else { /* not full, not finished injecting */
1440 if (MYQD_HIGH == op->myqd) {
1441 num_to_read = 0;
1442 if (op->ovn) {
1443 if (op->sg_vn_ge_40030) {
1444 int num_subm = num_submitted(sg_fd);
1445
1446 if (num_subm > op->ovn)
1447 num_to_read = num_waiting_read > 0 ?
1448 num_waiting_read : 1;
1449 } else {
1450 num_waiting_read = 0;
1451 if (ioctl(sg_fd, SG_GET_NUM_WAITING,
1452 &num_waiting_read) < 0) {
1453 err = "ioctl(SG_GET_NUM_WAITING) failed";
1454 break;
1455 }
1456 if (num_waiting_read > (op->ovn / 2))
1457 num_to_read = num_waiting_read / 2;
1458 }
1459 }
1460 } else {
1461 num_waiting_read = 0;
1462 if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting_read) < 0) {
1463 err = "ioctl(SG_GET_NUM_WAITING) failed";
1464 break;
1465 }
1466 if (num_waiting_read > 0)
1467 num_to_read = num_waiting_read /
1468 ((MYQD_LOW == op->myqd) ? 1 : 2);
1469 else
1470 num_to_read = 0;
1471 }
1472 }
1473
1474 if (vb && !once_4000 && (num_to_read > 4000)) {
1475 once_4000 = true;
1476 pr2serr_lk("%d->id: num_to_read=%d\n", id, num_to_read);
1477 }
1478 while (num_to_read > 0) {
1479 --num_to_read;
1480 if (op->pack_id_force) {
1481 j = pi2buff.size();
1482 if (j > 0)
1483 pack_id = pi2buff.begin()->first;
1484 else
1485 pack_id = -1;
1486 } else
1487 pack_id = -1;
1488 ask = pack_id;
1489 res = (op->v4) ?
1490 finish_sg4_cmd(sg_fd, op->c2e, pack_id, op->submit,
1491 op->wait_ms, thr_enomem_count,
1492 thr_fin_eagain_count, thr_fin_ebusy_count,
1493 nanosecs) :
1494 finish_sg3_cmd(sg_fd, op->c2e, pack_id, op->submit,
1495 op->wait_ms, thr_enomem_count,
1496 thr_fin_eagain_count, thr_fin_ebusy_count,
1497 nanosecs);
1498 if (res) {
1499 err = "finish_sg3_cmd()";
1500 if (ruip && (pack_id > 0)) {
1501 auto q = pi_2_lba.find(pack_id);
1502
1503 if (q != pi_2_lba.end()) {
1504 snprintf(ebuff, sizeof(ebuff), "%s: lba=0x%" PRIx64 ,
1505 err, q->second);
1506 err = ebuff;
1507 }
1508 }
1509 break;
1510 }
1511 if (op->cmd_time && op->sg_vn_ge_40030)
1512 sum_nanosecs += nanosecs;
1513 ++thr_async_finishes;
1514 --num_outstanding;
1515 if (vb > 4)
1516 pr2serr_lk("t_id=%d: finishing pack_id ask=%d, got=%d, "
1517 "outstanding=%d\n", id, ask, pack_id,
1518 num_outstanding);
1519 auto p = pi2buff.find(pack_id);
1520
1521 if (p == pi2buff.end()) {
1522 snprintf(ebuff, sizeof(ebuff), "pack_id=%d from "
1523 "finish_sg3_cmd() not found\n", pack_id);
1524 if (! err)
1525 err = ebuff;
1526 } else {
1527 lbp = p->second.first;
1528 free_lbp = p->second.second;
1529 pi2buff.erase(p);
1530 if (lbp)
1531 free_lst.push_front(make_pair(lbp, free_lbp));
1532 }
1533 if (ruip && (pack_id > 0)) {
1534 auto q = pi_2_lba.find(pack_id);
1535
1536 if (q != pi_2_lba.end()) {
1537 if (vb > 3)
1538 pr2serr_lk(" id=%d: finish IO at lba=0x%" PRIx64
1539 "\n", id, q->second);
1540 pi_2_lba.erase(q);
1541 }
1542 }
1543 if (err)
1544 break;
1545 } /* end of while loop counting down num_to_read */
1546 if (err)
1547 break;
1548 } /* end of for loop over npt (number per thread) */
1549 if (vb)
1550 pr2serr_lk("%d->id: leaving main thread loop; k=%d, o=%d\n", id, k,
1551 o);
1552 close(sg_fd); // sg driver will handle any commands "in flight"
1553 if (ruip)
1554 delete ruip;
1555
1556 if (err || (k < npt)) {
1557 if (k < npt)
1558 pr2serr_lk("t_id=%d FAILed at iteration %d%s%s\n", id, k,
1559 (err ? ", Reason: " : ""), (err ? err : ""));
1560 else
1561 pr2serr_lk("t_id=%d FAILed on last%s%s\n", id,
1562 (err ? ", Reason: " : ""), (err ? err : ""));
1563 }
1564 n = pi2buff.size();
1565 if (n > 0)
1566 pr2serr_lk("t_id=%d Still %d elements in pi2buff map on "
1567 "exit\n", id, n);
1568 for (k = 0; ! free_lst.empty(); ++k) {
1569 lbp = free_lst.back().first;
1570 free_lbp = free_lst.back().second;
1571 free_lst.back().second = NULL;
1572 free_lst.pop_back();
1573 if (vb > 6)
1574 pr2serr_lk("t_id=%d freeing %p (free_ %p)\n", id, lbp, free_lbp);
1575 if (free_lbp) {
1576 free(free_lbp);
1577 free_lbp = NULL;
1578 }
1579 }
1580 if ((vb > 2) && (k > 0))
1581 pr2serr_lk("%d->id: Maximum number of READ/WRITEs queued: %d\n",
1582 id, k);
1583 async_starts += thr_async_starts;
1584 async_finishes += thr_async_finishes;
1585 start_eagain_count += thr_start_eagain_count;
1586 start_ebusy_count += thr_start_ebusy_count;
1587 start_e2big_count += thr_start_e2big_count;
1588 fin_eagain_count += thr_fin_eagain_count;
1589 fin_ebusy_count += thr_fin_ebusy_count;
1590 enomem_count += thr_enomem_count;
1591 start_edom_count += thr_start_edom_count;
1592 if (op->cmd_time && op->sg_vn_ge_40030 && (npt > 0)) {
1593 pr2serr_lk("t_id=%d average nanosecs per cmd: %" PRId64
1594 "\n", id, sum_nanosecs / npt);
1595 }
1596 }
1597
1598 #define INQ_REPLY_LEN 96
1599 #define INQ_CMD_LEN 6
1600
1601 /* Send INQUIRY and fetches response. If okay puts PRODUCT ID field
1602 * in b (up to m_blen bytes). Does not use O_EXCL flag. Returns 0 on success,
1603 * else -1 . */
1604 static int
do_inquiry_prod_id(const char * dev_name,int block,int & sg_ver_num,char * b,int b_mlen)1605 do_inquiry_prod_id(const char * dev_name, int block, int & sg_ver_num,
1606 char * b, int b_mlen)
1607 {
1608 int sg_fd, ok, ret;
1609 struct sg_io_hdr pt;
1610 uint8_t inqCmdBlk [INQ_CMD_LEN] =
1611 {0x12, 0, 0, 0, INQ_REPLY_LEN, 0};
1612 uint8_t inqBuff[INQ_REPLY_LEN];
1613 uint8_t sense_buffer[64] SG_C_CPP_ZERO_INIT;
1614 int open_flags = O_RDWR; /* O_EXCL | O_RDONLY fails with EPERM */
1615
1616 if (! block)
1617 open_flags |= O_NONBLOCK;
1618 sg_fd = open(dev_name, open_flags);
1619 if (sg_fd < 0) {
1620 pr_errno_lk(errno, "%s: error opening file: %s", __func__, dev_name);
1621 return -1;
1622 }
1623 if (ioctl(sg_fd, SG_GET_VERSION_NUM, &sg_ver_num) < 0)
1624 sg_ver_num = 0;
1625 /* Prepare INQUIRY command */
1626 memset(&pt, 0, sizeof(pt));
1627 pt.interface_id = 'S';
1628 pt.cmd_len = sizeof(inqCmdBlk);
1629 /* pt.iovec_count = 0; */ /* memset takes care of this */
1630 pt.mx_sb_len = sizeof(sense_buffer);
1631 pt.dxfer_direction = SG_DXFER_FROM_DEV;
1632 pt.dxfer_len = INQ_REPLY_LEN;
1633 pt.dxferp = inqBuff;
1634 pt.cmdp = inqCmdBlk;
1635 pt.sbp = sense_buffer;
1636 pt.timeout = 20000; /* 20000 millisecs == 20 seconds */
1637 /* pt.flags = 0; */ /* take defaults: indirect IO, etc */
1638 /* pt.pack_id = 0; */
1639 /* pt.usr_ptr = NULL; */
1640
1641 if (ioctl(sg_fd, SG_IO, &pt) < 0) {
1642 pr_errno_lk(errno, "%s: Inquiry SG_IO ioctl error", __func__);
1643 close(sg_fd);
1644 return -1;
1645 }
1646
1647 /* now for the error processing */
1648 ok = 0;
1649 switch (sg_err_category3(&pt)) {
1650 case SG_LIB_CAT_CLEAN:
1651 ok = 1;
1652 break;
1653 case SG_LIB_CAT_RECOVERED:
1654 pr2serr_lk("Recovered error on INQUIRY, continuing\n");
1655 ok = 1;
1656 break;
1657 default: /* won't bother decoding other categories */
1658 {
1659 lock_guard<mutex> lg(console_mutex);
1660 sg_chk_n_print3("INQUIRY command error", &pt, 1);
1661 }
1662 break;
1663 }
1664 if (ok) {
1665 /* Good, so fetch Product ID from response, copy to 'b' */
1666 if (b_mlen > 0) {
1667 if (b_mlen > 16) {
1668 memcpy(b, inqBuff + 16, 16);
1669 b[16] = '\0';
1670 } else {
1671 memcpy(b, inqBuff + 16, b_mlen - 1);
1672 b[b_mlen - 1] = '\0';
1673 }
1674 }
1675 ret = 0;
1676 } else
1677 ret = -1;
1678
1679 close(sg_fd);
1680 return ret;
1681 }
1682
1683 /* Only allow ranges up to 2**32-1 upper limit, so READ CAPACITY(10)
1684 * sufficient. Return of 0 -> success, -1 -> failure, 2 -> try again */
1685 static int
do_read_capacity(const char * dev_name,int block,unsigned int * last_lba,unsigned int * blk_sz)1686 do_read_capacity(const char * dev_name, int block, unsigned int * last_lba,
1687 unsigned int * blk_sz)
1688 {
1689 int res, sg_fd;
1690 uint8_t rcCmdBlk [10] = {0x25, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1691 uint8_t rcBuff[64];
1692 uint8_t sense_b[64] SG_C_CPP_ZERO_INIT;
1693 sg_io_hdr_t io_hdr SG_C_CPP_ZERO_INIT;
1694 int open_flags = O_RDWR; /* O_EXCL | O_RDONLY fails with EPERM */
1695
1696 if (! block)
1697 open_flags |= O_NONBLOCK;
1698 sg_fd = open(dev_name, open_flags);
1699 if (sg_fd < 0) {
1700 pr_errno_lk(errno, "%s: error opening file: %s", __func__, dev_name);
1701 return -1;
1702 }
1703 /* Prepare READ CAPACITY(10) command */
1704 io_hdr.interface_id = 'S';
1705 io_hdr.cmd_len = sizeof(rcCmdBlk);
1706 io_hdr.mx_sb_len = sizeof(sense_b);
1707 io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
1708 io_hdr.dxfer_len = sizeof(rcBuff);
1709 io_hdr.dxferp = rcBuff;
1710 io_hdr.cmdp = rcCmdBlk;
1711 io_hdr.sbp = sense_b;
1712 io_hdr.timeout = 20000; /* 20000 millisecs == 20 seconds */;
1713
1714 if (ioctl(sg_fd, SG_IO, &io_hdr) < 0) {
1715 pr_errno_lk(errno, "%s (SG_IO) error", __func__);
1716 close(sg_fd);
1717 return -1;
1718 }
1719 res = sg_err_category3(&io_hdr);
1720 if (SG_LIB_CAT_UNIT_ATTENTION == res) {
1721 lock_guard<mutex> lg(console_mutex);
1722 sg_chk_n_print3("read capacity", &io_hdr, 1);
1723 close(sg_fd);
1724 return 2; /* probably have another go ... */
1725 } else if (SG_LIB_CAT_CLEAN != res) {
1726 lock_guard<mutex> lg(console_mutex);
1727 sg_chk_n_print3("read capacity", &io_hdr, 1);
1728 close(sg_fd);
1729 return -1;
1730 }
1731 *last_lba = sg_get_unaligned_be32(&rcBuff[0]);
1732 *blk_sz = sg_get_unaligned_be32(&rcBuff[4]);
1733 close(sg_fd);
1734 return 0;
1735 }
1736
1737
1738 int
main(int argc,char * argv[])1739 main(int argc, char * argv[])
1740 {
1741 bool maxq_per_thread_given = false;
1742 int n;
1743 int force = 0;
1744 int64_t ll;
1745 int num_threads = DEF_NUM_THREADS;
1746 struct timespec start_tm, end_tm;
1747 struct opts_t * op;
1748 const char * cp;
1749
1750 op = &a_opts;
1751 #if 0
1752 memset(op, 0, sizeof(*op)); // C++ doesn't like this
1753 #endif
1754 op->direct = DEF_DIRECT;
1755 op->lba = DEF_LBA;
1756 op->hi_lba = 0;
1757 op->lb_sz = DEF_LB_SZ;
1758 op->maxq_per_thread = MAX_Q_PER_FD;
1759 op->mmap_io = DEF_MMAP_IO;
1760 op->num_per_thread = DEF_NUM_PER_THREAD;
1761 op->num_lbs = 1;
1762 op->no_xfer = !! DEF_NO_XFER;
1763 op->verbose = 0;
1764 op->wait_ms = DEF_WAIT_MS;
1765 op->c2e = SCSI_TUR;
1766 op->blqd = BLQ_DEFAULT;
1767 op->block = !! DEF_BLOCKING;
1768 op->myqd = MYQD_HIGH;
1769 page_size = sysconf(_SC_PAGESIZE);
1770
1771 while (1) {
1772 int option_index = 0;
1773 int c;
1774
1775 c = getopt_long(argc, argv,
1776 "34acdefghl:L:mM:n:NO:pq:Q:Rs:St:TuvVw:W",
1777 long_options, &option_index);
1778 if (c == -1)
1779 break;
1780
1781 switch (c) {
1782 case '3':
1783 op->v3 = true;
1784 op->v3_given = true;
1785 op->v4 = false; /* if '-4 -3' take latter */
1786 op->v4_given = false;
1787 break;
1788 case '4':
1789 op->v4 = true;
1790 op->v4_given = true;
1791 op->v3 = false;
1792 op->v3_given = false;
1793 break;
1794 case 'a':
1795 op->masync = true;
1796 break;
1797 case 'c':
1798 op->cmd_time = true;
1799 break;
1800 case 'd':
1801 op->direct = true;
1802 break;
1803 case 'e':
1804 op->excl = true;
1805 break;
1806 case 'f':
1807 force = true;
1808 break;
1809 case 'g':
1810 op->generic_sync = true;
1811 break;
1812 case 'h':
1813 case '?':
1814 usage();
1815 return 0;
1816 case 'l':
1817 if (isdigit(*optarg)) {
1818 ll = sg_get_llnum(optarg);
1819 if (-1 == ll) {
1820 pr2serr_lk("could not decode lba\n");
1821 return 1;
1822 } else
1823 op->lba = (uint64_t)ll;
1824 cp = strchr(optarg, ',');
1825 if (cp) {
1826 if (0 == strcmp("-1", cp + 1))
1827 op->hi_lba = UINT_MAX;
1828 else {
1829 ll = sg_get_llnum(cp + 1);
1830 if ((-1 == ll) || (ll > UINT_MAX)) {
1831 pr2serr_lk("could not decode hi_lba, or > "
1832 "UINT_MAX\n");
1833 return 1;
1834 } else
1835 op->hi_lba = (unsigned int)ll;
1836 }
1837 }
1838 } else {
1839 pr2serr_lk("--lba= expects a number\n");
1840 return 1;
1841 }
1842 break;
1843 case 'L':
1844 op->lb_sz = sg_get_num(optarg);
1845 if (op->lb_sz < 0) {
1846 pr2serr_lk("--lbsz= expects power of 2\n");
1847 return 1;
1848 }
1849 if (0 == op->lb_sz)
1850 op->lb_sz = DEF_LB_SZ;
1851 break;
1852 case 'm':
1853 op->mmap_io = true;
1854 break;
1855 case 'M':
1856 if (isdigit(*optarg)) {
1857 n = atoi(optarg);
1858 if ((n < 1) || (n > MAX_Q_PER_FD)) {
1859 pr2serr_lk("-M expects a value from 1 to %d\n",
1860 MAX_Q_PER_FD);
1861 return 1;
1862 }
1863 maxq_per_thread_given = true;
1864 op->maxq_per_thread = n;
1865 } else {
1866 pr2serr_lk("--maxqpt= expects a number\n");
1867 return 1;
1868 }
1869 break;
1870 case 'n':
1871 if (isdigit(*optarg))
1872 op->num_per_thread = sg_get_num(optarg);
1873 else {
1874 pr2serr_lk("--numpt= expects a number\n");
1875 return 1;
1876 }
1877 break;
1878 case 'N':
1879 op->no_xfer = true;
1880 break;
1881 case 'O':
1882 if (isdigit(*optarg))
1883 op->ovn = sg_get_num(optarg);
1884 else {
1885 pr2serr_lk("--override= expects a number\n");
1886 return 1;
1887 }
1888 if (op->ovn < 0) {
1889 pr2serr_lk("--override= bad number\n");
1890 return 1;
1891 }
1892 break;
1893 case 'p':
1894 op->pack_id_force = true;
1895 break;
1896 case 'q':
1897 if (isdigit(*optarg)) {
1898 n = atoi(optarg);
1899 if (0 == n)
1900 op->blqd = BLQ_AT_HEAD;
1901 else if (1 == n)
1902 op->blqd = BLQ_AT_TAIL;
1903 } else {
1904 pr2serr_lk("--qat= expects a number: 0 or 1\n");
1905 return 1;
1906 }
1907 break;
1908 case 'Q':
1909 if (isdigit(*optarg)) {
1910 n = atoi(optarg);
1911 if (0 == n)
1912 op->myqd = MYQD_LOW;
1913 else if (1 == n)
1914 op->myqd = MYQD_MEDIUM;
1915 else if (2 == n)
1916 op->myqd = MYQD_HIGH;
1917 } else {
1918 pr2serr_lk("--qfav= expects a number: 0, 1 or 2\n");
1919 return 1;
1920 }
1921 break;
1922 case 'R':
1923 op->c2e = SCSI_READ16;
1924 break;
1925 case 's':
1926 if (isdigit(*optarg)) {
1927 op->lb_sz = atoi(optarg);
1928 if (op->lb_sz < 256) {
1929 cerr << "Strange lb_sz, using 256" << endl;
1930 op->lb_sz = 256;
1931 }
1932 } else {
1933 pr2serr_lk("--szlb= expects a number\n");
1934 return 1;
1935 }
1936 if ((cp = strchr(optarg, ','))) {
1937 n = sg_get_num(cp + 1);
1938 if (n < 1) {
1939 pr2serr_lk("could not decode 2nd part of "
1940 "--szlb=LBS,NLBS\n");
1941 return 1;
1942 }
1943 op->num_lbs = n;
1944 }
1945 break;
1946 case 'S':
1947 ++op->stats;
1948 break;
1949 case 't':
1950 if (isdigit(*optarg))
1951 num_threads = atoi(optarg);
1952 else {
1953 pr2serr_lk("--tnum= expects a number\n");
1954 return 1;
1955 }
1956 break;
1957 case 'T':
1958 op->c2e = SCSI_TUR;
1959 break;
1960 case 'u':
1961 op->submit = true;
1962 break;
1963 case 'v':
1964 op->verbose_given = true;
1965 ++op->verbose;
1966 break;
1967 case 'V':
1968 op->version_given = true;
1969 break;
1970 case 'w':
1971 if ((isdigit(*optarg) || ('-' == *optarg))) {
1972 if ('-' == *optarg)
1973 op->wait_ms = - atoi(optarg + 1);
1974 else
1975 op->wait_ms = atoi(optarg);
1976 } else {
1977 pr2serr_lk("--wait= expects a number\n");
1978 return 1;
1979 }
1980 break;
1981 case 'W':
1982 op->c2e = SCSI_WRITE16;
1983 break;
1984 default:
1985 pr2serr_lk("unrecognised option code 0x%x ??\n", c);
1986 usage();
1987 return 1;
1988 }
1989 }
1990 if (optind < argc) {
1991 for (; optind < argc; ++optind)
1992 op->dev_names.push_back(argv[optind]);
1993 }
1994 #ifdef DEBUG
1995 pr2serr_lk("In DEBUG mode, ");
1996 if (op->verbose_given && op->version_given) {
1997 pr2serr_lk("but override: '-vV' given, zero verbose and continue\n");
1998 op->verbose_given = false;
1999 op->version_given = false;
2000 op->verbose = 0;
2001 } else if (! op->verbose_given) {
2002 pr2serr_lk("set '-vv'\n");
2003 op->verbose = 2;
2004 } else
2005 pr2serr_lk("keep verbose=%d\n", op->verbose);
2006 #else
2007 if (op->verbose_given && op->version_given)
2008 pr2serr_lk("Not in DEBUG mode, so '-vV' has no special action\n");
2009 #endif
2010 if (op->version_given) {
2011 pr2serr_lk("version: %s\n", version_str);
2012 return 0;
2013 }
2014 if (op->mmap_io) {
2015 if (maxq_per_thread_given && (op->maxq_per_thread > 1)) {
2016 pr2serr_lk("With mmap_io selected, QPT cannot exceed 1\n");
2017 return 1;
2018 } else if (op->direct) {
2019 pr2serr_lk("direct IO and mmap-ed IO cannot both be selected\n");
2020 return 1;
2021 } else if (op->generic_sync) {
2022 pr2serr_lk("--generic-sync and and mmap-ed IO are compatible\n");
2023 return 1;
2024 } else
2025 op->maxq_per_thread = 1;
2026 }
2027 if (! op->cmd_time && getenv("SG3_UTILS_LINUX_NANO")) {
2028 op->cmd_time = true;
2029 if (op->verbose)
2030 fprintf(stderr, "setting nanosecond timing due to environment "
2031 "variable: SG3_UTILS_LINUX_NANO\n");
2032 }
2033 if (0 == op->dev_names.size()) {
2034 fprintf(stderr, "No sg_disk_device-s given\n\n");
2035 usage();
2036 return 1;
2037 }
2038 if (op->hi_lba && (op->lba > op->hi_lba)) {
2039 cerr << "lba,hi_lba range is illegal" << endl;
2040 return 1;
2041 }
2042 if (op->v4) {
2043 if (! op->submit) {
2044 op->submit = true;
2045 if (op->verbose > 1)
2046 cerr << "when --v4 is given, --submit will be set" << endl;
2047 }
2048 }
2049
2050 try {
2051 int k, sg_ver_num;
2052 unsigned int last_lba;
2053 unsigned int blk_sz;
2054 struct stat a_stat;
2055
2056 for (k = 0; k < (int)op->dev_names.size(); ++k) {
2057 int res;
2058 const char * dev_name;
2059 char b[128];
2060
2061 dev_name = op->dev_names[k];
2062 if (stat(dev_name, &a_stat) < 0) {
2063 snprintf(b, sizeof(b), "could not stat() %s", dev_name);
2064 perror(b);
2065 return 1;
2066 }
2067 if (! S_ISCHR(a_stat.st_mode)) {
2068 pr2serr_lk("%s should be a sg device which is a char "
2069 "device. %s\n", dev_name, dev_name);
2070 pr2serr_lk("is not a char device and damage could be done "
2071 "if it is a BLOCK\ndevice, exiting ...\n");
2072 return 1;
2073 }
2074 res = do_inquiry_prod_id(dev_name, op->block, sg_ver_num,
2075 b, sizeof(b));
2076 if (! force) {
2077 if (res) {
2078 pr2serr_lk("INQUIRY failed on %s\n", dev_name);
2079 return 1;
2080 }
2081 // For safety, since <lba> written to, only permit scsi_debug
2082 // devices. Bypass this with '-f' option.
2083 if (0 != memcmp("scsi_debug", b, 10)) {
2084 pr2serr_lk("Since this utility may write to LBAs, "
2085 "only devices with the\n"
2086 "product ID 'scsi_debug' accepted. Use '-f' "
2087 "to override.\n");
2088 return 2;
2089 }
2090 }
2091 if (sg_ver_num < 30000) {
2092 pr2serr_lk("%s either not sg device or too old\n", dev_name);
2093 return 2;
2094 } else if (sg_ver_num >= 40030) {
2095 op->sg_vn_ge_40030 = true;
2096 op->sg_vn_ge_40000 = true;
2097 if (! (op->v3_given || op->v4_given)) {
2098 op->v4 = true;
2099 op->v3 = false;
2100 op->submit = true;
2101 }
2102 } else if (sg_ver_num >= 40000) {
2103 op->sg_vn_ge_40030 = false;
2104 op->sg_vn_ge_40000 = true;
2105 if (! (op->v3_given || op->v4_given)) {
2106 op->v4 = true;
2107 op->v3 = false;
2108 op->submit = true;
2109 }
2110 } else {
2111 if (! (op->v3_given || op->v4_given)) {
2112 op->v4 = false;
2113 op->v3 = true;
2114 op->submit = false;
2115 }
2116 }
2117
2118 if ((SCSI_WRITE16 == op->c2e) || (SCSI_READ16 == op->c2e)) {
2119 res = do_read_capacity(dev_name, op->block, &last_lba,
2120 &blk_sz);
2121 if (2 == res)
2122 res = do_read_capacity(dev_name, op->block, &last_lba,
2123 &blk_sz);
2124 if (res) {
2125 pr2serr_lk("READ CAPACITY(10) failed on %s\n", dev_name);
2126 return 1;
2127 }
2128 if (blk_sz != (unsigned int)op->lb_sz) {
2129 pr2serr_lk(">>> Logical block size (%d) of %s\n"
2130 " differs from command line option (or "
2131 "default)\n", blk_sz, dev_name);
2132 pr2serr_lk("... continue anyway\n");
2133 }
2134 op->blk_szs.push_back(blk_sz);
2135 if (UINT_MAX == op->hi_lba)
2136 op->hi_lbas.push_back(last_lba);
2137 }
2138 }
2139
2140 start_tm.tv_sec = 0;
2141 start_tm.tv_nsec = 0;
2142 if (clock_gettime(CLOCK_MONOTONIC, &start_tm) < 0)
2143 perror("clock_gettime failed");
2144
2145 vector<thread *> vt;
2146
2147 /* start multi-threaded section */
2148 for (k = 0; k < num_threads; ++k) {
2149 thread * tp = new thread {work_thread, k, op};
2150 vt.push_back(tp);
2151 }
2152
2153 // g++ 4.7.3 didn't like range-for loop here
2154 for (k = 0; k < (int)vt.size(); ++k)
2155 vt[k]->join();
2156 /* end multi-threaded section, just this main thread left */
2157
2158 for (k = 0; k < (int)vt.size(); ++k)
2159 delete vt[k];
2160
2161 n = uniq_pack_id.load() - 1;
2162 if (((n > 0) || op->generic_sync) &&
2163 (0 == clock_gettime(CLOCK_MONOTONIC, &end_tm))) {
2164 struct timespec res_tm;
2165 double a, b;
2166
2167 if (op->generic_sync)
2168 n = op->num_per_thread * num_threads;
2169 res_tm.tv_sec = end_tm.tv_sec - start_tm.tv_sec;
2170 res_tm.tv_nsec = end_tm.tv_nsec - start_tm.tv_nsec;
2171 if (res_tm.tv_nsec < 0) {
2172 --res_tm.tv_sec;
2173 res_tm.tv_nsec += 1000000000;
2174 }
2175 a = res_tm.tv_sec;
2176 a += (0.000001 * (res_tm.tv_nsec / 1000));
2177 b = (double)n;
2178 if (a > 0.000001) {
2179 printf("Time to complete %d commands was %d.%06d seconds\n",
2180 n, (int)res_tm.tv_sec, (int)(res_tm.tv_nsec / 1000));
2181 printf("Implies %.0f IOPS\n", (b / a));
2182 }
2183 }
2184
2185 if (op->verbose || op->stats) {
2186 cout << "Number of sync_starts: " << sync_starts.load() << endl;
2187 cout << "Number of async_starts: " << async_starts.load() << endl;
2188 cout << "Number of async_finishes: " << async_finishes.load() <<
2189 endl;
2190 cout << "Last pack_id: " << n << endl;
2191 }
2192 n = start_ebusy_count.load();
2193 if (op->verbose || op->stats || (n > 0))
2194 cout << "Number of start EBUSYs: " << n << endl;
2195 n = fin_ebusy_count.load();
2196 if (op->verbose || op->stats || (n > 0))
2197 cout << "Number of finish EBUSYs: " << n << endl;
2198 n = start_eagain_count.load();
2199 if (op->verbose || op->stats || (n > 0))
2200 cout << "Number of start EAGAINs: " << n << endl;
2201 n = fin_eagain_count.load();
2202 if (op->verbose || op->stats || (n > 0))
2203 cout << "Number of finish EAGAINs: " << n << endl;
2204 n = start_e2big_count.load();
2205 if (op->verbose || op->stats || (n > 0))
2206 cout << "Number of E2BIGs: " << n << endl;
2207 n = start_edom_count.load();
2208 if (op->verbose || op->stats || (n > 0))
2209 cout << "Number of EDOMs: " << n << endl;
2210 n = enomem_count.load();
2211 if (op->verbose || op->stats || (n > 0))
2212 cout << "Number of ENOMEMs: " << n << endl;
2213 }
2214 catch(system_error& e) {
2215 cerr << "got a system_error exception: " << e.what() << '\n';
2216 auto ec = e.code();
2217 cerr << "category: " << ec.category().name() << '\n';
2218 cerr << "value: " << ec.value() << '\n';
2219 cerr << "message: " << ec.message() << '\n';
2220 cerr << "\nNote: if g++ may need '-pthread' or similar in "
2221 "compile/link line" << '\n';
2222 }
2223 catch(...) {
2224 cerr << "got another exception: " << '\n';
2225 }
2226 return 0;
2227 }
2228