1 /*
2  * Copyright (c) 2014-2022 Douglas Gilbert.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * SPDX-License-Identifier: BSD-2-Clause
27  */
28 
29 #include <iostream>
30 #include <vector>
31 #include <map>
32 #include <list>
33 #include <system_error>
34 #include <thread>
35 #include <mutex>
36 #include <chrono>
37 #include <atomic>
38 #include <random>
39 
40 #include <unistd.h>
41 #include <fcntl.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <stdarg.h>
45 #include <string.h>
46 #include <poll.h>
47 #include <errno.h>
48 #include <ctype.h>
49 #include <time.h>
50 #include <limits.h>
51 #include <getopt.h>
52 #define __STDC_FORMAT_MACROS 1
53 #include <inttypes.h>
54 #include <sys/ioctl.h>
55 #include <sys/types.h>
56 #include <sys/stat.h>
57 #include <sys/mman.h>
58 #include <sys/resource.h>       /* getrusage */
59 
60 
61 #ifdef HAVE_CONFIG_H
62 #include "config.h"
63 #endif
64 
65 #ifndef HAVE_LINUX_SG_V4_HDR
66 
67 /* Kernel uapi header contain __user decorations on user space pointers
68  * to indicate they are unsafe in the kernel space. However glibc takes
69  * all those __user decorations out from headers in /usr/include/linux .
70  * So to stop compile errors when directly importing include/uapi/scsi/sg.h
71  * undef __user before doing that include. */
72 #define __user
73 
74 /* Want to block the original sg.h header from also being included. That
75  * causes lots of multiple definition errors. This will only work if this
76  * header is included _before_ the original sg.h header.  */
77 #define _SCSI_GENERIC_H         /* original kernel header guard */
78 #define _SCSI_SG_H              /* glibc header guard */
79 
80 #include "uapi_sg.h"    /* local copy of include/uapi/scsi/sg.h */
81 
82 #else
83 #define __user
84 #endif  /* end of: ifndef HAVE_LINUX_SG_V4_HDR */
85 
86 #include "sg_lib.h"
87 #include "sg_io_linux.h"
88 #include "sg_unaligned.h"
89 #include "sg_pt.h"
90 #include "sg_cmds.h"
91 
92 static const char * version_str = "1.42 20220425";
93 static const char * util_name = "sg_tst_async";
94 
95 /* This is a test program for checking the async usage of the Linux sg
96  * driver. Each thread opens 1 file descriptor to the next sg device (1
97  * or more can be given on the command line) and then starts up to
98  * num_per_thread commands or more while checking with the poll command (or
99  * ioctl(SG_GET_NUM_WAITING) ) for the completion of those commands. Each
100  * command has a unique "pack_id" which is a sequence starting at 1.
101  * Either TEST UNIT UNIT, READ(16) or WRITE(16) commands are issued.
102  *
103  * This is C++ code with some things from C++11 (e.g. threads) and was
104  * only just able to compile (when some things were reverted) with gcc/g++
105  * version 4.7.3 found in Ubuntu 13.04 . C++11 "feature complete" support
106  * was not available until g++ version 4.8.1 . It should build okay on
107  * recent distributions.
108  *
109  * The build uses various object files from the <sg3_utils>/lib directory
110  * which is assumed to be a sibling of this examples directory. Those
111  * object files in the lib directory can be built with:
112  *   cd <sg3_utils_package_root> ; ./configure ; cd lib; make
113  *   cd ../testing
114  *   make sg_tst_async
115  *
116  * Currently this utility is Linux only and uses the sg driver. The bsg
117  * driver is known to be broken (it doesn't match responses to the
118  * correct file descriptor that requested them). Around Linux kernel 4.15
119  * the async capability of the bsg driver was removed. So this test code
120  * no longer appiles to the bsg driver.
121  *
122  * BEWARE: >>> This utility will modify a logical block (default LBA 1000)
123  * on the given device _when_ the '-W' option is given.
124  *
125  */
126 
127 using namespace std;
128 using namespace std::chrono;
129 
130 #define DEF_NUM_PER_THREAD 1000
131 #define DEF_NUM_THREADS 4
132 #define DEF_WAIT_MS 10          /* 0: yield or no wait */
133 #define DEF_NANOSEC_WAIT 25000  /* 25 microsecs */
134 #define DEF_TIMEOUT_MS 20000    /* 20 seconds */
135 #define DEF_LB_SZ 512
136 #define DEF_BLOCKING 0
137 #define DEF_DIRECT false        /* true: direct_io */
138 #define DEF_MMAP_IO false       /* true: mmap-ed IO with sg */
139 #define DEF_NO_XFER 0
140 #define DEF_LBA 1000U
141 
142 #define MAX_Q_PER_FD 16383      /* sg driver per file descriptor limit */
143 #define MAX_CONSEC_NOMEMS 4     /* was 16 */
144 #define URANDOM_DEV "/dev/urandom"
145 
146 #ifndef SG_FLAG_Q_AT_TAIL
147 #define SG_FLAG_Q_AT_TAIL 0x10
148 #endif
149 #ifndef SG_FLAG_Q_AT_HEAD
150 #define SG_FLAG_Q_AT_HEAD 0x20
151 #endif
152 
153 
154 #define DEF_PT_TIMEOUT 60       /* 60 seconds */
155 
156 #define EBUFF_SZ 256
157 
158 static mutex console_mutex;
159 static mutex rand_lba_mutex;
160 static atomic<int> async_starts(0);
161 static atomic<int> sync_starts(0);
162 static atomic<int> async_finishes(0);
163 static atomic<int> start_ebusy_count(0);
164 static atomic<int> start_e2big_count(0);
165 static atomic<int> start_eagain_count(0);
166 static atomic<int> fin_eagain_count(0);
167 static atomic<int> fin_ebusy_count(0);
168 static atomic<int> start_edom_count(0);
169 static atomic<int> enomem_count(0);
170 static atomic<int> uniq_pack_id(1);
171 // static atomic<int> generic_errs(0);
172 
173 static int page_size = 4096;   /* rough guess, will ask sysconf() */
174 
175 enum command2execute {SCSI_TUR, SCSI_READ16, SCSI_WRITE16};
176 /* Linux Block layer queue disciplines: */
177 enum blkLQDiscipline {BLQ_DEFAULT, BLQ_AT_HEAD, BLQ_AT_TAIL};
178 /* Queue disciplines of this utility. When both completions and
179  * queuing a new command are both possible: */
180 enum myQDiscipline {MYQD_LOW,   /* favour completions over new cmds */
181                     MYQD_MEDIUM,
182                     MYQD_HIGH}; /* favour new cmds over completions */
183 
184 struct opts_t {
185     vector<const char *> dev_names;
186     vector<int> blk_szs;
187     bool block;
188     bool cmd_time;
189     bool direct;
190     bool excl;
191     bool generic_sync;
192     bool masync;
193     bool mmap_io;
194     bool no_xfer;
195     bool pack_id_force;
196     bool sg_vn_ge_40000;
197     bool sg_vn_ge_40030;
198     bool submit;
199     bool verbose_given;
200     bool v3;
201     bool v3_given;
202     bool v4;
203     bool v4_given;
204     bool version_given;
205     int maxq_per_thread;
206     int num_per_thread;
207     uint64_t lba;
208     unsigned int hi_lba;        /* last one, inclusive range */
209     vector<unsigned int> hi_lbas; /* only used when hi_lba=-1 */
210     int lb_sz;
211     int num_lbs;
212     int ovn;            /* override number for submission */
213     int stats;
214     int verbose;
215     int wait_ms;
216     command2execute c2e;
217     blkLQDiscipline blqd;       /* --qat= 0|1 -> at_head|at_tail */
218     myQDiscipline myqd;         /* --qfav= value (def: 2 --> MYQD_HIGH) */
219 };
220 
221 static struct opts_t a_opts;    /* Expect zero fill on simple types */
222 
223 static int pr_rusage(int id);
224 
225 #if 0
226 class Rand_uint {
227 public:
228     Rand_uint(unsigned int lo, unsigned int hi) : p{lo, hi} {}
229     unsigned int operator()() const { return r(); }
230 private:
231     uniform_int_distribution<unsigned int>::param_type p;
232     auto r = bind(uniform_int_distribution<unsigned int>{p},
233                   default_random_engine());
234     /* compiler thinks auto should be a static, bs again? */
235 };
236 #endif
237 
238 #if 0
239 class Rand_uint {
240 public:
241     Rand_uint(unsigned int lo, unsigned int hi, unsigned int my_seed)
242         : r(bind(uniform_int_distribution<unsigned int>{lo, hi},
243                  default_random_engine())) { r.seed(myseed); }
244     unsigned int operator()() const { return r(); }
245 private:
246     function<unsigned int()> r;
247 };
248 #endif
249 
250 /* Use this class to wrap C++11 <random> features to produce uniform random
251  * unsigned ints in the range [lo, hi] (inclusive) given a_seed */
252 class Rand_uint {
253 public:
Rand_uint(unsigned int lo,unsigned int hi,unsigned int a_seed)254     Rand_uint(unsigned int lo, unsigned int hi, unsigned int a_seed)
255         : uid(lo, hi), dre(a_seed) { }
256     /* uid ctor takes inclusive range when integral type */
257 
get()258     unsigned int get() { return uid(dre); }
259 
260 private:
261     uniform_int_distribution<unsigned int> uid;
262     default_random_engine dre;
263 };
264 
265 static struct option long_options[] = {
266         {"v3", no_argument, 0, '3'},
267         {"v4", no_argument, 0, '4'},
268         {"more-async", no_argument, 0, 'a'},
269         {"more_async", no_argument, 0, 'a'},
270         {"masync", no_argument, 0, 'a'},
271         {"cmd-time", no_argument, 0, 'c'},
272         {"cmd_time", no_argument, 0, 'c'},
273         {"direct", no_argument, 0, 'd'},
274         {"excl", no_argument, 0, 'e'},
275         {"force", no_argument, 0, 'f'},
276         {"generic-sync", no_argument, 0, 'g'},
277         {"generic_sync", no_argument, 0, 'g'},
278         {"help", no_argument, 0, 'h'},
279         {"lba", required_argument, 0, 'l'},
280         {"lbsz", required_argument, 0, 'L'},
281         {"maxqpt", required_argument, 0, 'M'},
282         {"mmap-io", no_argument, 0, 'm'},
283         {"mmap_io", no_argument, 0, 'm'},
284         {"numpt", required_argument, 0, 'n'},
285         {"num-pt", required_argument, 0, 'n'},
286         {"num_pt", required_argument, 0, 'n'},
287         {"noxfer", no_argument, 0, 'N'},
288         {"override", required_argument, 0, 'O'},
289         {"pack-id", no_argument, 0, 'p'},
290         {"pack_id", no_argument, 0, 'p'},
291         {"qat", required_argument, 0, 'q'},
292         {"qfav", required_argument, 0, 'Q'},
293         {"read", no_argument, 0, 'R'},
294         {"stats", no_argument, 0, 'S'},
295         {"submit", no_argument, 0, 'u'},
296         {"szlb", required_argument, 0, 's'},
297         {"tnum", required_argument, 0, 't'},
298         {"tur", no_argument, 0, 'T'},
299         {"verbose", no_argument, 0, 'v'},
300         {"version", no_argument, 0, 'V'},
301         {"wait", required_argument, 0, 'w'},
302         {"write", no_argument, 0, 'W'},
303         {0, 0, 0, 0},
304 };
305 
306 
307 static void
usage(void)308 usage(void)
309 {
310     printf("Usage: %s [--cmd-time] [--direct] [--excl] [--force]\n"
311            "                    [--generic-sync] [--help] [--lba=LBA+] "
312            "[--lbsz=LBSZ]\n"
313            "                    [--masync] [--maxqpt=QPT] [--mmap-io] "
314            "[--no-waitq]\n"
315            "                    [--noxfer] [--numpt=NPT] [--override=OVN] "
316            "[--pack-id]\n"
317            "                    [--qat=AT] [-qfav=FAV] [--read] [--stats] "
318            "[--submit]\n"
319            "                    [--szlb=LB[,NLBS]] [--tnum=NT] [--tur] "
320            "[--v3] [--v4]\n"
321            "                    [--verbose] [--version] [--wait=MS] "
322            "[--write]\n"
323            "                    <sg_disk_device>*\n",
324            util_name);
325     printf("  where\n");
326     printf("    --cmd-time|-c    calculate per command average time (ns)\n");
327     printf("    --direct|-d     do direct_io (def: indirect)\n");
328     printf("    --excl|-e       do wait_exclusive calls\n");
329     printf("    --force|-f      force: any sg device (def: only scsi_debug "
330            "owned)\n");
331     printf("                    WARNING: <lba> written to if '-W' given\n");
332     printf("    --generic-sync|-g    use generic synchronous SG_IO ioctl "
333            "instead\n");
334     printf("                       of Linux sg driver assuming /dev/sg* "
335            "(def)\n");
336     printf("    --help|-h       print this usage message then exit\n");
337     printf("    --lba=LBA|-l LBA    logical block to access (def: %u)\n",
338            DEF_LBA);
339     printf("    --lba=LBA,HI_LBA|-l LBA,HI_LBA    logical block range "
340            "(inclusive)\n"
341            "                          if hi_lba=-1 assume last block on "
342            "device\n");
343     printf("    --lbsz=LBSZ|-L LBSZ    logical block size in bytes (def: "
344            "512)\n"
345            "                           should be power of 2 (0 --> 512)\n");
346     printf("    --masync|-a     set 'more async' flag on devices\n");
347     printf("    --maxqpt=QPT|-M QPT    maximum commands queued per thread "
348            "(def:%d)\n", MAX_Q_PER_FD);
349     printf("    --mmap-io|-m    mmap-ed IO (1 cmd outstanding per thread)\n");
350     printf("    --noxfer|-N          no data xfer (def: xfer on READ and "
351            "WRITE)\n");
352     printf("    --numpt=NPT|-n NPT    number of commands per thread "
353            "(def: %d)\n", DEF_NUM_PER_THREAD);
354     printf("    --override OVN|-O OVN    override FAV=2 when OVN queue "
355            "depth\n"
356            "                             reached (def: 0 -> no override)\n");
357     printf("    --pack-id|-p    set FORCE_PACK_ID, pack-id input to "
358            "read/finish\n");
359     printf("    --qat=AT|-q AT       AT=0: q_at_head; AT=1: q_at_tail (def: "
360            "(drv): head)\n");
361     printf("    --qfav=FAV|-Q FAV    FAV=0: favour completions (smaller q),\n"
362            "                         FAV=1: medium,\n"
363            "                         FAV=2: favour submissions (larger q, "
364            "default)\n");
365     printf("    --read|-R       do READs (def: TUR)\n");
366     printf("    --stats|-S      show more statistics on completion\n");
367     printf("    --submit|-u     use SG_IOSUBMIT+SG_IORECEIVE instead of "
368            "write+read\n");
369     printf("    --szlb=LB[,NLBS]|    LB is logical block size (def: 512)\n");
370     printf("         -s LB[,NLBS]    NLBS is number of logical blocks (def: "
371            "1)\n");
372     printf("    --tnum=NT|-t NT    number of threads (def: %d)\n",
373            DEF_NUM_THREADS);
374     printf("    --tur|-T        do TEST UNIT READYs (default is TURs)\n");
375     printf("    --v3|-3         use sg v3 interface (def: v3 if driver < "
376            "3.9)\n");
377     printf("    --v4|-4         use sg v4 interface (def if v4 driver). Sets "
378            "--submit\n");
379     printf("    --verbose|-v    increase verbosity\n");
380     printf("    --version|-V    print version number then exit\n");
381     printf("    --wait=MS|-w MS    >0: poll(<wait_ms>); =0: poll(0); (def: "
382            "%d)\n", DEF_WAIT_MS);
383     printf("    --write|-W      do WRITEs (def: TUR)\n\n");
384     printf("Multiple threads send READ(16), WRITE(16) or TEST UNIT READY "
385            "(TUR) SCSI\ncommands. There can be 1 or more <sg_disk_device>s "
386            "and each thread takes\nthe next in a round robin fashion. "
387            "Each thread queues up to NT commands.\nOne block is transferred "
388            "by each READ and WRITE; zeros are written. If a\nlogical block "
389            "range is given, a uniform distribution generates a pseudo\n"
390            "random sequence of LBAs. Set environment variable\n"
391            "SG3_UTILS_LINUX_NANO to get command timings in nanoseconds\n");
392 }
393 
394 #ifdef __GNUC__
395 static int pr2serr_lk(const char * fmt, ...)
396         __attribute__ ((format (printf, 1, 2)));
397 static void pr_errno_lk(int e_no, const char * fmt, ...)
398         __attribute__ ((format (printf, 2, 3)));
399 #else
400 static int pr2serr_lk(const char * fmt, ...);
401 static void pr_errno_lk(int e_no, const char * fmt, ...);
402 #endif
403 
404 
405 static int
pr2serr_lk(const char * fmt,...)406 pr2serr_lk(const char * fmt, ...)
407 {
408     int n;
409     va_list args;
410     lock_guard<mutex> lg(console_mutex);
411 
412     va_start(args, fmt);
413     n = vfprintf(stderr, fmt, args);
414     va_end(args);
415     return n;
416 }
417 
418 static void
pr_errno_lk(int e_no,const char * fmt,...)419 pr_errno_lk(int e_no, const char * fmt, ...)
420 {
421     char b[160];
422     va_list args;
423     lock_guard<mutex> lg(console_mutex);
424 
425     va_start(args, fmt);
426     vsnprintf(b, sizeof(b), fmt, args);
427     fprintf(stderr, "%s: %s\n", b, strerror(e_no));
428     va_end(args);
429 }
430 
431 static unsigned int
get_urandom_uint(void)432 get_urandom_uint(void)
433 {
434     unsigned int res = 0;
435     lock_guard<mutex> lg(rand_lba_mutex);
436 
437     int fd = open(URANDOM_DEV, O_RDONLY);
438     if (fd >= 0) {
439         uint8_t b[sizeof(unsigned int)];
440         int n = read(fd, b, sizeof(unsigned int));
441 
442         if (sizeof(unsigned int) == n)
443             memcpy(&res, b, sizeof(unsigned int));
444         close(fd);
445     }
446     return res;
447 }
448 
449 #define TUR_CMD_LEN 6
450 #define READ16_CMD_LEN 16
451 #define READ16_REPLY_LEN 4096
452 #define WRITE16_REPLY_LEN 4096
453 #define WRITE16_CMD_LEN 16
454 
455 /* Returns 0 if command injected okay, return -1 for error and 2 for
456  * not done due to queue data size limit struck. */
457 static int
start_sg3_cmd(int sg_fd,command2execute cmd2exe,int pack_id,uint64_t lba,uint8_t * lbp,int xfer_bytes,int flags,bool submit,unsigned int & enomem,unsigned int & eagains,unsigned int & ebusy,unsigned int & e2big,unsigned int & edom)458 start_sg3_cmd(int sg_fd, command2execute cmd2exe, int pack_id, uint64_t lba,
459               uint8_t * lbp, int xfer_bytes, int flags, bool submit,
460               unsigned int & enomem, unsigned int & eagains,
461               unsigned int & ebusy, unsigned int & e2big, unsigned int & edom)
462 {
463     struct sg_io_hdr pt;
464     struct sg_io_v4 p4t;
465     uint8_t turCmdBlk[TUR_CMD_LEN] = {0, 0, 0, 0, 0, 0};
466     uint8_t r16CmdBlk[READ16_CMD_LEN] =
467                 {0x88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
468     uint8_t w16CmdBlk[WRITE16_CMD_LEN] =
469                 {0x8a, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
470     uint8_t sense_buffer[64] SG_C_CPP_ZERO_INIT;
471     const char * np = NULL;
472     struct sg_io_hdr * ptp;
473 
474     if (submit) {       /* nest a v3 interface inside a store for v4 */
475         memset(&p4t, 0, sizeof(p4t));
476         ptp = (struct sg_io_hdr *)&p4t; /* p4t is larger than pt */
477     } else {
478         ptp = &pt;
479         memset(ptp, 0, sizeof(*ptp));
480     }
481     switch (cmd2exe) {
482     case SCSI_TUR:
483         np = "TEST UNIT READY";
484         ptp->cmdp = turCmdBlk;
485         ptp->cmd_len = sizeof(turCmdBlk);
486         ptp->dxfer_direction = SG_DXFER_NONE;
487         break;
488     case SCSI_READ16:
489         np = "READ(16)";
490         if (lba > 0xffffffff)
491             sg_put_unaligned_be32(lba >> 32, &r16CmdBlk[2]);
492         sg_put_unaligned_be32(lba & 0xffffffff, &r16CmdBlk[6]);
493         ptp->cmdp = r16CmdBlk;
494         ptp->cmd_len = sizeof(r16CmdBlk);
495         ptp->dxfer_direction = SG_DXFER_FROM_DEV;
496         ptp->dxferp = lbp;
497         ptp->dxfer_len = xfer_bytes;
498         break;
499     case SCSI_WRITE16:
500         np = "WRITE(16)";
501         if (lba > 0xffffffff)
502             sg_put_unaligned_be32(lba >> 32, &w16CmdBlk[2]);
503         sg_put_unaligned_be32(lba & 0xffffffff, &w16CmdBlk[6]);
504         ptp->cmdp = w16CmdBlk;
505         ptp->cmd_len = sizeof(w16CmdBlk);
506         ptp->dxfer_direction = SG_DXFER_TO_DEV;
507         ptp->dxferp = lbp;
508         ptp->dxfer_len = xfer_bytes;
509         break;
510     }
511     ptp->interface_id = 'S';
512     ptp->mx_sb_len = sizeof(sense_buffer);
513     ptp->sbp = sense_buffer;      /* ignored .... */
514     ptp->timeout = DEF_TIMEOUT_MS;
515     ptp->pack_id = pack_id;
516     ptp->flags = flags;
517 
518     for (int k = 0;
519          (submit ? ioctl(sg_fd, SG_IOSUBMIT_V3, ptp) :
520                    write(sg_fd, ptp, sizeof(*ptp)) < 0);
521          ++k) {
522         if ((ENOMEM == errno) && (k < MAX_CONSEC_NOMEMS)) {
523             ++enomem;
524             this_thread::yield();
525             continue;
526         } else if (EAGAIN == errno) {
527             ++eagains;
528             this_thread::yield();
529             continue;
530         } else if (EBUSY == errno) {
531             ++ebusy;
532             this_thread::yield();
533             continue;
534         } else if (E2BIG == errno) {
535             ++e2big;
536             return 2;
537         } else if (EDOM == errno)
538             ++edom;
539         else if (ENOMEM == errno)
540             pr_rusage(-1);
541         pr_errno_lk(errno, "%s: %s, pack_id=%d", __func__, np, pack_id);
542         return -1;
543     }
544     return 0;
545 }
546 
547 static int
finish_sg3_cmd(int sg_fd,command2execute cmd2exe,int & pack_id,bool receive,int wait_ms,unsigned int & enomem,unsigned int & eagains,unsigned int & ebusys,unsigned int & nanosecs)548 finish_sg3_cmd(int sg_fd, command2execute cmd2exe, int & pack_id,
549                bool receive, int wait_ms, unsigned int & enomem,
550                unsigned int & eagains, unsigned int & ebusys,
551                unsigned int & nanosecs)
552 {
553     bool ok;
554     int res, k;
555     uint8_t sense_buffer[64] SG_C_CPP_ZERO_INIT;
556     const char * np = NULL;
557     struct sg_io_hdr pt;
558     struct sg_io_hdr * ptp;
559     struct sg_io_v4 p4t;
560 
561     if (receive) {      /* nest a v3 interface inside a store for v4 */
562         memset(&p4t, 0, sizeof(p4t));
563         ptp = (struct sg_io_hdr *)&p4t; /* p4t is larger than pt */
564     } else {
565         ptp = &pt;
566         memset(ptp, 0, sizeof(*ptp));
567     }
568     switch (cmd2exe) {
569     case SCSI_TUR:
570         np = "TEST UNIT READY";
571         ptp->dxfer_direction = SG_DXFER_NONE;
572         break;
573     case SCSI_READ16:
574         np = "READ(16)";
575         ptp->dxfer_direction = SG_DXFER_FROM_DEV;
576         break;
577     case SCSI_WRITE16:
578         np = "WRITE(16)";
579         ptp->dxfer_direction = SG_DXFER_TO_DEV;
580         break;
581     }
582     ptp->interface_id = 'S';
583     ptp->mx_sb_len = sizeof(sense_buffer);
584     ptp->sbp = sense_buffer;
585     ptp->timeout = DEF_TIMEOUT_MS;
586     /* if SG_SET_FORCE_PACK_ID, then need to set ptp->dxfer_direction */
587     ptp->pack_id = pack_id;
588 
589     k = 0;
590     while ((((res = receive ? ioctl(sg_fd, SG_IORECEIVE_V3, ptp) :
591                               read(sg_fd, ptp, sizeof(*ptp)))) < 0) &&
592            ((EAGAIN == errno) || (EBUSY == errno) || (ENOMEM == errno))) {
593         if (ENOMEM == errno)
594             ++enomem;
595         else if (EAGAIN == errno)
596             ++eagains;
597         else
598             ++ebusys;
599         ++k;
600         if (k > 10000) {
601             pr2serr_lk("%s: sg_fd=%d: after %d EAGAINs, unable to find "
602                        "pack_id=%d\n", __func__, sg_fd, k, pack_id);
603             return -1;      /* crash out */
604         }
605         if (wait_ms > 0)
606             this_thread::sleep_for(milliseconds{wait_ms});
607         else if (0 == wait_ms)
608             this_thread::yield();
609         else if (-2 == wait_ms)
610             sleep(0);                   // process yield ??
611     }
612     if (res < 0) {
613         if (ENOMEM == errno)
614             pr_rusage(-1);
615         pr_errno_lk(errno, "%s: %s", __func__, np);
616         return -1;
617     }
618     /* now for the error processing */
619     pack_id = ptp->pack_id;
620     ok = false;
621     switch (sg_err_category3(ptp)) {
622     case SG_LIB_CAT_CLEAN:
623         ok = true;
624         break;
625     case SG_LIB_CAT_RECOVERED:
626         pr2serr_lk("%s: Recovered error on %s, continuing\n", __func__, np);
627         ok = true;
628         break;
629     default: /* won't bother decoding other categories */
630         {
631             lock_guard<mutex> lg(console_mutex);
632             sg_chk_n_print3(np, ptp, 1);
633         }
634         break;
635     }
636     if (ok)
637         nanosecs = ptp->duration;
638     return ok ? 0 : -1;
639 }
640 
641 /* Returns 0 if command injected okay, return -1 for error and 2 for
642  * not done due to queue data size limit struck. */
643 static int
start_sg4_cmd(int sg_fd,command2execute cmd2exe,int pack_id,uint64_t lba,uint8_t * lbp,int xfer_bytes,int flags,bool submit,unsigned int & enomem,unsigned int & eagains,unsigned int & ebusy,unsigned int & e2big,unsigned int & edom)644 start_sg4_cmd(int sg_fd, command2execute cmd2exe, int pack_id, uint64_t lba,
645               uint8_t * lbp, int xfer_bytes, int flags, bool submit,
646               unsigned int & enomem, unsigned int & eagains,
647               unsigned int & ebusy, unsigned int & e2big, unsigned int & edom)
648 {
649     struct sg_io_v4 p4t;
650     uint8_t turCmdBlk[TUR_CMD_LEN] = {0, 0, 0, 0, 0, 0};
651     uint8_t r16CmdBlk[READ16_CMD_LEN] =
652                 {0x88, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
653     uint8_t w16CmdBlk[WRITE16_CMD_LEN] =
654                 {0x8a, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0};
655     uint8_t sense_buffer[64] SG_C_CPP_ZERO_INIT;
656     const char * np = NULL;
657     struct sg_io_v4 * ptp;
658 
659     if (! submit) {
660         pr2serr_lk("%s: logic error, submit must be true, isn't\n", __func__);
661         return -1;
662     }
663     ptp = &p4t;
664     memset(ptp, 0, sizeof(*ptp));
665     switch (cmd2exe) {
666     case SCSI_TUR:
667         np = "TEST UNIT READY";
668         ptp->request = (uint64_t)turCmdBlk;
669         ptp->request_len = sizeof(turCmdBlk);
670         break;
671     case SCSI_READ16:
672         np = "READ(16)";
673         if (lba > 0xffffffff)
674             sg_put_unaligned_be32(lba >> 32, &r16CmdBlk[2]);
675         sg_put_unaligned_be32(lba & 0xffffffff, &r16CmdBlk[6]);
676         ptp->request = (uint64_t)r16CmdBlk;
677         ptp->request_len = sizeof(r16CmdBlk);
678         ptp->din_xferp = (uint64_t)lbp;
679         ptp->din_xfer_len = xfer_bytes;
680         break;
681     case SCSI_WRITE16:
682         np = "WRITE(16)";
683         if (lba > 0xffffffff)
684             sg_put_unaligned_be32(lba >> 32, &w16CmdBlk[2]);
685         sg_put_unaligned_be32(lba & 0xffffffff, &w16CmdBlk[6]);
686         ptp->request = (uint64_t)w16CmdBlk;
687         ptp->request_len = sizeof(w16CmdBlk);
688         ptp->dout_xferp = (uint64_t)lbp;
689         ptp->dout_xfer_len = xfer_bytes;
690         break;
691     }
692     ptp->guard = 'Q';
693     ptp->max_response_len = sizeof(sense_buffer);
694     ptp->response = (uint64_t)sense_buffer;      /* ignored .... */
695     ptp->timeout = DEF_TIMEOUT_MS;
696     ptp->request_extra = pack_id;
697     ptp->flags = flags;
698 
699     for (int k = 0; ioctl(sg_fd, SG_IOSUBMIT, ptp) < 0; ++k) {
700         if ((ENOMEM == errno) && (k < MAX_CONSEC_NOMEMS)) {
701             ++enomem;
702             this_thread::yield();
703             continue;
704         } else if (EAGAIN == errno) {
705             ++eagains;
706             this_thread::yield();
707             continue;
708         } else if (EBUSY == errno) {
709             ++ebusy;
710             this_thread::yield();
711             continue;
712         } else if (E2BIG == errno) {
713             ++e2big;
714             return 2;
715         } else if (EDOM == errno)
716             ++edom;
717         else if (ENOMEM == errno)
718             pr_rusage(-1);
719         pr_errno_lk(errno, "%s: %s, pack_id=%d", __func__, np, pack_id);
720         return -1;
721     }
722     return 0;
723 }
724 
725 static int
finish_sg4_cmd(int sg_fd,command2execute cmd2exe,int & pack_id,bool receive,int wait_ms,unsigned int & enomem,unsigned int & eagains,unsigned int & ebusys,unsigned int & nanosecs)726 finish_sg4_cmd(int sg_fd, command2execute cmd2exe, int & pack_id,
727                bool receive, int wait_ms, unsigned int & enomem,
728                unsigned int & eagains, unsigned int & ebusys,
729                unsigned int & nanosecs)
730 {
731     bool ok;
732     int res, k;
733     uint8_t sense_buffer[64] SG_C_CPP_ZERO_INIT;
734     const char * np = NULL;
735     struct sg_io_v4 * ptp;
736     struct sg_io_v4 p4t;
737 
738     if (! receive) {
739         pr2serr_lk("%s: logic error, receive must be true, isn't\n",
740                    __func__);
741         return -1;
742     }
743     ptp = &p4t;
744     memset(ptp, 0, sizeof(*ptp));
745     switch (cmd2exe) {
746     case SCSI_TUR:
747         np = "TEST UNIT READY";
748         break;
749     case SCSI_READ16:
750         np = "READ(16)";
751         break;
752     case SCSI_WRITE16:
753         np = "WRITE(16)";
754         break;
755     }
756     ptp->guard = 'Q';
757     ptp->max_response_len = sizeof(sense_buffer);
758     ptp->response = (uint64_t)sense_buffer;
759     ptp->timeout = DEF_TIMEOUT_MS;
760     /* if SG_SET_FORCE_PACK_ID, then need to set ptp->dxfer_direction */
761     ptp->request_extra = pack_id;
762 
763     k = 0;
764     while ((((res = ioctl(sg_fd, SG_IORECEIVE, ptp))) < 0) &&
765            ((EAGAIN == errno) || (EBUSY == errno))) {
766         if (EAGAIN == errno)
767             ++eagains;
768         else
769             ++ebusys;
770         ++k;
771         if (k > 10000) {
772             pr2serr_lk("%s: sg_fd=%d: after %d EAGAINs, unable to find "
773                        "pack_id=%d\n", __func__, sg_fd, k, pack_id);
774             return -1;      /* crash out */
775         }
776         if (wait_ms > 0)
777             this_thread::sleep_for(milliseconds{wait_ms});
778         else if (0 == wait_ms)
779             this_thread::yield();
780         else if (-2 == wait_ms)
781             sleep(0);                   // process yield ??
782     }
783     if (res < 0) {
784         if (ENOMEM == errno) {
785             ++enomem;
786             pr_rusage(-1);
787         }
788         pr_errno_lk(errno, "%s: %s", __func__, np);
789         return -1;
790     }
791     /* now for the error processing */
792     pack_id = ptp->request_extra;
793     ok = false;
794     res = sg_err_category_new(ptp->device_status, ptp->transport_status,
795                               ptp->driver_status,
796                               (const uint8_t *)ptp->response,
797                               ptp->response_len);
798     switch (res) {
799     case SG_LIB_CAT_CLEAN:
800         ok = true;
801         break;
802     case SG_LIB_CAT_RECOVERED:
803         pr2serr_lk("%s: Recovered error on %s, continuing\n", __func__, np);
804         ok = true;
805         break;
806     default: /* won't bother decoding other categories */
807         {
808             lock_guard<mutex> lg(console_mutex);
809 
810             sg_linux_sense_print(np, ptp->device_status,
811                                  ptp->transport_status,
812                                  ptp->driver_status,
813                                  (const uint8_t *)ptp->response,
814                                  ptp->response_len, true);
815         }
816         break;
817     }
818     if (ok)
819         nanosecs = ptp->duration;
820     return ok ? 0 : -1;
821 }
822 
823 static int
num_submitted(int sg_fd)824 num_submitted(int sg_fd)
825 {
826     uint32_t num_subm_wait = 0;
827     struct sg_extended_info sei;
828     struct sg_extended_info *seip = &sei;
829     const char * err = NULL;
830 
831     memset(seip, 0, sizeof(*seip));
832     seip->sei_wr_mask |= SG_SEIM_READ_VAL;
833     seip->sei_rd_mask |= SG_SEIM_READ_VAL;
834     seip->read_value = SG_SEIRV_SUBMITTED;
835     if (ioctl(sg_fd, SG_SET_GET_EXTENDED, seip) < 0)
836         err = "ioctl(SG_SET_GET_EXTENDED) failed\n";
837     else
838         num_subm_wait = seip->read_value;
839     if (err)
840         pr2serr_lk("%s: %s, errno=%d\n", __func__, err, errno);
841     return err ? -1 : (int)num_subm_wait;
842 }
843 
844 static int
pr_rusage(int id)845 pr_rusage(int id)
846 {
847     int res;
848     struct rusage ru;
849 
850     res = getrusage(RUSAGE_SELF /* RUSAGE_THREAD */, &ru);
851     if (res < 0) {
852         pr2serr_lk("%d->id: %s: getrusage() failed, errno=%d\n", id,
853                    __func__, errno);
854         return res;
855     }
856     pr2serr_lk("%d->id: maxrss=%ldKB  nvcsw=%ld nivcsw=%ld  majflt=%ld\n", id,
857                ru.ru_maxrss, ru.ru_nvcsw, ru.ru_nivcsw, ru.ru_majflt);
858     return 0;
859 }
860 
861 static void
work_sync_thread(int id,const char * dev_name,unsigned int,struct opts_t * op)862 work_sync_thread(int id, const char * dev_name, unsigned int /* hi_lba */,
863                  struct opts_t * op)
864 {
865     bool is_rw = (SCSI_TUR != op->c2e);
866     int k, sg_fd, err, rs, n, sense_cat, ret;
867     int vb = op->verbose;
868     int num_errs = 0;
869     int thr_sync_starts = 0;
870     struct sg_pt_base * ptp = NULL;
871     uint8_t cdb[6];
872     uint8_t sense_b[32] SG_C_CPP_ZERO_INIT;
873     char b[120];
874 
875     if (is_rw) {
876         pr2serr_lk("id=%d: only support TUR here for now\n", id);
877         goto err_out;
878     }
879     if (op->verbose)
880         pr2serr_lk("id=%d: using libsgutils generic sync passthrough\n", id);
881 
882     if ((sg_fd = sg_cmds_open_device(dev_name, false /* ro */, vb)) < 0) {
883         pr2serr_lk("id=%d: error opening file: %s: %s\n", id, dev_name,
884                    safe_strerror(-sg_fd));
885         if (ENOMEM == -sg_fd)
886             pr_rusage(id);
887         goto err_out;
888     }
889     if (vb > 2)
890         pr2serr_lk(">>>> id=%d: open(%s) --> fd=%d\n", id, dev_name, sg_fd);
891 
892     ptp = construct_scsi_pt_obj_with_fd(sg_fd, vb);
893     err = 0;
894     if ((NULL == ptp) || ((err = get_scsi_pt_os_err(ptp)))) {
895         ret = sg_convert_errno(err ? err : ENOMEM);
896         sg_exit2str(ret, true, sizeof(b), b);
897         pr2serr_lk("id=%d: construct_scsi_pt_obj_with_fd: %s\n", id, b);
898         goto err_out;
899     }
900     for (k = 0; k < op->num_per_thread; ++k) {
901         /* Might get Unit Attention on first invocation */
902         memset(cdb, 0, sizeof(cdb));    /* TUR's cdb is 6 zeros */
903         set_scsi_pt_cdb(ptp, cdb, sizeof(cdb));
904         set_scsi_pt_sense(ptp, sense_b, sizeof(sense_b));
905         set_scsi_pt_packet_id(ptp, uniq_pack_id.fetch_add(1));
906         ++thr_sync_starts;
907         rs = do_scsi_pt(ptp, -1, DEF_PT_TIMEOUT, vb);
908         n = sg_cmds_process_resp(ptp, "Test unit ready", rs,
909                                  (0 == k), vb, &sense_cat);
910         if (-1 == n) {
911             ret = sg_convert_errno(get_scsi_pt_os_err(ptp));
912             sg_exit2str(ret, true, sizeof(b), b);
913             pr2serr_lk("id=%d: do_scsi_pt: %s\n", id, b);
914             goto err_out;
915         } else if (-2 == n) {
916             switch (sense_cat) {
917             case SG_LIB_CAT_RECOVERED:
918             case SG_LIB_CAT_NO_SENSE:
919                 break;
920             case SG_LIB_CAT_NOT_READY:
921                 ++num_errs;
922                 if (1 ==  op->num_per_thread) {
923                     pr2serr_lk("id=%d: device not ready\n", id);
924                 }
925                 break;
926             case SG_LIB_CAT_UNIT_ATTENTION:
927                 ++num_errs;
928                 if (vb)
929                     pr2serr_lk("Ignoring Unit attention (sense key)\n");
930                 break;
931             default:
932                 ++num_errs;
933                 if (1 == op->num_per_thread) {
934                     sg_get_category_sense_str(sense_cat, sizeof(b), b, vb);
935                     pr2serr_lk("%s\n", b);
936                     goto err_out;
937                 }
938                 break;
939             }
940         }
941         clear_scsi_pt_obj(ptp);
942     }
943 err_out:
944     if (ptp)
945         destruct_scsi_pt_obj(ptp);
946     if (num_errs > 0)
947         pr2serr_lk("id=%d: number of errors: %d\n", id, num_errs);
948     sync_starts += thr_sync_starts;
949 }
950 
951 static void
work_thread(int id,struct opts_t * op)952 work_thread(int id, struct opts_t * op)
953 {
954     bool is_rw = (SCSI_TUR != op->c2e);
955     bool need_finish, repeat;
956     bool once = false;
957     bool once1000 = false;
958     bool once_2000 = false;
959     bool once_4000 = false;
960     bool once5000 = false;
961     bool once_6000 = false;
962     bool once_7000 = false;
963     bool once10_000 = false;
964     bool once20_000 = false;
965     int open_flags = O_RDWR;
966     int thr_async_starts = 0;
967     int thr_async_finishes = 0;
968     int vb = op->verbose;
969     int k, n, res, sg_fd, num_outstanding, do_inc, npt, pack_id, sg_flags;
970     int num_waiting_read, sz, encore_pack_id, ask, j, m, o;
971     int prev_pack_id, blk_sz;
972     unsigned int thr_enomem_count = 0;
973     unsigned int thr_start_eagain_count = 0;
974     unsigned int thr_start_ebusy_count = 0;
975     unsigned int thr_start_e2big_count = 0;
976     unsigned int thr_fin_eagain_count = 0;
977     unsigned int thr_fin_ebusy_count = 0;
978     unsigned int thr_start_edom_count = 0;
979     int needed_sz = op->lb_sz * op->num_lbs;
980     unsigned int nanosecs;
981     unsigned int hi_lba;
982     uint64_t lba;
983     uint64_t sum_nanosecs = 0;
984     uint8_t * lbp;
985     uint8_t * free_lbp = NULL;
986     uint8_t * wrkMmap = NULL;
987     const char * dev_name;
988     const char * err = NULL;
989     Rand_uint * ruip = NULL;
990     char ebuff[EBUFF_SZ];
991     struct pollfd  pfd[1];
992     list<pair<uint8_t *, uint8_t *> > free_lst;   /* of aligned lb buffers */
993     map<int, pair<uint8_t *, uint8_t *> > pi2buff;/* pack_id -> lb buffer */
994     map<int, uint64_t> pi_2_lba;            /* pack_id -> LBA */
995     pair<uint8_t *, uint8_t *> encore_lbps;
996 
997     /* device name and hi_lba may depend on id */
998     n = op->dev_names.size();
999     dev_name = op->dev_names[id % n];
1000     if (op->blk_szs.size() >= (unsigned)n)
1001         blk_sz = op->blk_szs[id % n];
1002     else
1003         blk_sz = DEF_LB_SZ;
1004     if ((UINT_MAX == op->hi_lba) && (n == (int)op->hi_lbas.size()))
1005         hi_lba = op->hi_lbas[id % n];
1006     else
1007         hi_lba = op->hi_lba;
1008 
1009     if (vb) {
1010         if ((vb > 1) && hi_lba)
1011             pr2serr_lk("Enter work_t_id=%d using %s\n"
1012                        "    LBA range: 0x%x to 0x%x (inclusive)\n",
1013                        id, dev_name, (unsigned int)op->lba, hi_lba);
1014         else
1015             pr2serr_lk("Enter work_t_id=%d using %s\n", id, dev_name);
1016     }
1017     if (op->generic_sync) {
1018         work_sync_thread(id, dev_name, hi_lba, op);
1019         return;
1020     }
1021     if (! op->block)
1022         open_flags |= O_NONBLOCK;
1023 
1024     sg_fd = open(dev_name, open_flags);
1025     if (sg_fd < 0) {
1026         pr_errno_lk(errno, "%s: id=%d, error opening file: %s", __func__, id,
1027                     dev_name);
1028         if (ENOMEM == -sg_fd)
1029             pr_rusage(id);
1030         return;
1031     }
1032     if (vb > 2)
1033         pr2serr_lk(">>>> id=%d: open(%s) --> fd=%d\n", id, dev_name, sg_fd);
1034     if (op->pack_id_force) {
1035         k = 1;
1036         if (ioctl(sg_fd, SG_SET_FORCE_PACK_ID, &k) < 0)
1037             pr2serr_lk("ioctl(SG_SET_FORCE_PACK_ID) failed, errno=%d %s\n",
1038                        errno, strerror(errno));
1039     }
1040     if (op->sg_vn_ge_40000) {
1041         if (ioctl(sg_fd, SG_GET_RESERVED_SIZE, &k) >= 0) {
1042             if (needed_sz > k)
1043                 ioctl(sg_fd, SG_SET_RESERVED_SIZE, &needed_sz);
1044         }
1045         if (op->sg_vn_ge_40030 && (op->cmd_time || op->masync)) {
1046             struct sg_extended_info sei;
1047             struct sg_extended_info * seip;
1048 
1049             seip = &sei;
1050             memset(seip, 0, sizeof(*seip));
1051             seip->sei_wr_mask |= SG_SEIM_CTL_FLAGS;
1052             seip->sei_rd_mask |= SG_SEIM_CTL_FLAGS;
1053             if (op->cmd_time) {
1054                 seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_TIME_IN_NS;
1055                 seip->ctl_flags_rd_mask |= SG_CTL_FLAGM_TIME_IN_NS;
1056                 seip->ctl_flags |= SG_CTL_FLAGM_TIME_IN_NS;
1057             }
1058             if (op->masync) {
1059                 seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_MORE_ASYNC;
1060                 seip->ctl_flags |= SG_CTL_FLAGM_MORE_ASYNC;
1061             }
1062             if (op->excl) {
1063                 seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_EXCL_WAITQ;
1064                 seip->ctl_flags |= SG_CTL_FLAGM_EXCL_WAITQ;
1065             }
1066             if (ioctl(sg_fd, SG_SET_GET_EXTENDED, seip) < 0) {
1067                 pr2serr_lk("ioctl(EXTENDED(TIME_IN_NS)) failed, errno=%d %s\n",
1068                            errno, strerror(errno));
1069             }
1070             if (op->cmd_time &&
1071                 (! (SG_CTL_FLAGM_TIME_IN_NS & seip->ctl_flags))) {
1072                 memset(seip, 0, sizeof(*seip));
1073                 seip->sei_rd_mask |= SG_SEIM_CTL_FLAGS;
1074                 seip->sei_wr_mask |= SG_SEIM_CTL_FLAGS;
1075                 seip->ctl_flags_wr_mask |= SG_CTL_FLAGM_TIME_IN_NS;
1076                 seip->ctl_flags |= SG_CTL_FLAGM_TIME_IN_NS;
1077                 if (ioctl(sg_fd, SG_SET_GET_EXTENDED, seip) < 0)
1078                     pr2serr_lk("ioctl(EXTENDED(TIME_IN_NS)) failed, "
1079                                "errno=%d %s\n", errno, strerror(errno));
1080                 else if (vb > 1)
1081                     pr2serr_lk("t_id: %d: set TIME_IN_NS flag\n", id);
1082             }
1083         }
1084     }
1085     if (is_rw && op->mmap_io) {
1086 
1087         if (ioctl(sg_fd, SG_GET_RESERVED_SIZE, &sz) < 0) {
1088             pr2serr_lk("t_id=%d: ioctl(SG_GET_RESERVED_SIZE) errno=%d\n",
1089                        id, errno);
1090             return;
1091         }
1092         if (sz < needed_sz) {
1093             sz = needed_sz;
1094             if (ioctl(sg_fd, SG_SET_RESERVED_SIZE, &sz) < 0) {
1095                 pr2serr_lk("t_id=%d: ioctl(SG_SET_RESERVED_SIZE) errno=%d\n",
1096                            id, errno);
1097                 return;
1098             }
1099             if (ioctl(sg_fd, SG_GET_RESERVED_SIZE, &sz) < 0) {
1100                 pr2serr_lk("t_id=%d: ioctl(SG_GET_RESERVED_SIZE) errno=%d\n",
1101                            id, errno);
1102                 return;
1103             }
1104             if (sz < needed_sz) {
1105                 pr2serr_lk("t_id=%d: unable to grow reserve buffer to %d "
1106                            "bytes\n", id, needed_sz);
1107                 return;
1108             }
1109         }
1110         wrkMmap = (uint8_t *)mmap(NULL, needed_sz, PROT_READ | PROT_WRITE,
1111                                   MAP_SHARED, sg_fd, 0);
1112         if (MAP_FAILED == wrkMmap) {
1113             int ern = errno;
1114 
1115             pr2serr_lk("t_id=%d: mmap() failed, errno=%d\n", id, ern);
1116             return;
1117         }
1118     }
1119     pfd[0].fd = sg_fd;
1120     pfd[0].events = POLLIN;
1121     if (is_rw && hi_lba) {
1122         unsigned int seed = get_urandom_uint();
1123 
1124         if (vb > 1)
1125             pr2serr_lk("  id=%d, /dev/urandom seed=0x%x\n", id, seed);
1126         ruip = new Rand_uint((unsigned int)op->lba, hi_lba, seed);
1127     }
1128 
1129     sg_flags = 0;
1130     if (BLQ_AT_TAIL == op->blqd)
1131         sg_flags |= SG_FLAG_Q_AT_TAIL;
1132     else if (BLQ_AT_HEAD == op->blqd)
1133         sg_flags |= SG_FLAG_Q_AT_HEAD;
1134     if (op->direct)
1135         sg_flags |= SG_FLAG_DIRECT_IO;
1136     if (op->mmap_io)
1137         sg_flags |= SG_FLAG_MMAP_IO;
1138     if (op->no_xfer)
1139         sg_flags |= SG_FLAG_NO_DXFER;
1140     if (vb > 1)
1141         pr2serr_lk("  id=%d, sg_flags=0x%x, %s cmds\n", id, sg_flags,
1142                    ((SCSI_TUR == op->c2e) ? "TUR":
1143                     ((SCSI_READ16 == op->c2e) ? "READ" : "WRITE")));
1144 
1145     npt = op->num_per_thread;
1146     need_finish = false;
1147     lba = 0;
1148     pack_id = 0;
1149     prev_pack_id = 0;
1150     encore_pack_id = 0;
1151     do_inc = 0;
1152     /* main loop, continues until num_per_thread exhausted and there are
1153      * no more outstanding responses */
1154     for (k = 0, m = 0, o=0, num_outstanding = 0; (k < npt) || num_outstanding;
1155          k = do_inc ? k + 1 : k, ++o) {
1156         int num_to_read = 0;
1157 
1158         if (do_inc)
1159             m = 0;
1160         else {
1161             ++m;
1162             if (m > 100) {
1163                 if (vb)
1164                     pr2serr_lk("%d->id: no main loop inc =%d times\n", id, m);
1165                 m = 0;
1166             }
1167         }
1168         if (vb && (! once1000) && (num_outstanding >= 1000)) {
1169             int num_waiting;
1170             int num_subm = (op->sg_vn_ge_40030) ? num_submitted(sg_fd) :
1171                                                   pi2buff.size();
1172 
1173             once1000 = true;
1174             if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting) < 0) {
1175                 err = "ioctl(SG_GET_NUM_WAITING) failed";
1176                 break;
1177             }
1178             pr2serr_lk("%d->id: once 1000: k=%d, submitted=%d waiting=%d; "
1179                        "pi2buff.sz=%u\n", id, k, num_subm, num_waiting,
1180                        (uint32_t)pi2buff.size());
1181             pr_rusage(id);
1182         }
1183         if (vb && ! once5000 && num_outstanding >= 5000) {
1184             int num_waiting;
1185             int num_subm = (op->sg_vn_ge_40030) ? num_submitted(sg_fd) :
1186                                                   pi2buff.size();
1187 
1188             once5000 = true;
1189             if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting) < 0) {
1190                 err = "ioctl(SG_GET_NUM_WAITING) failed";
1191                 break;
1192             }
1193             pr2serr_lk("%d->id: once 5000: k=%d, submitted=%d waiting=%d\n",
1194                        id, k, num_subm, num_waiting);
1195             pr_rusage(id);
1196         }
1197         if (vb && ! once_7000 && num_outstanding >= 7000) {
1198             int num_waiting;
1199             int num_subm = (op->sg_vn_ge_40030) ? num_submitted(sg_fd) :
1200                                                   pi2buff.size();
1201 
1202             once_7000 = true;
1203             if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting) < 0) {
1204                 err = "ioctl(SG_GET_NUM_WAITING) failed";
1205                 break;
1206             }
1207             pr2serr_lk("%d->id: once 7000: k=%d, submitted=%d waiting=%d\n",
1208                        id, k, num_subm, num_waiting);
1209             pr_rusage(id);
1210         }
1211         if (vb && ! once10_000 && num_outstanding >= 10000) {
1212             int num_waiting;
1213             int num_subm = (op->sg_vn_ge_40030) ? num_submitted(sg_fd) :
1214                                                   pi2buff.size();
1215 
1216             once10_000 = true;
1217             if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting) < 0) {
1218                 err = "ioctl(SG_GET_NUM_WAITING) failed";
1219                 break;
1220             }
1221             pr2serr_lk("%d->id: once 10^4: k=%d, submitted=%d waiting=%d\n",
1222                        id, k, num_subm, num_waiting);
1223             pr_rusage(id);
1224         }
1225         if (vb && ! once20_000 && num_outstanding >= 20000) {
1226             int num_waiting;
1227             int num_subm = (op->sg_vn_ge_40030) ? num_submitted(sg_fd) :
1228                                                   pi2buff.size();
1229 
1230             once20_000 = true;
1231             if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting) < 0) {
1232                 err = "ioctl(SG_GET_NUM_WAITING) failed";
1233                 break;
1234             }
1235             pr2serr_lk("%d->id: once 20000: k=%d, submitted=%d waiting=%d\n",
1236                        id, k, num_subm, num_waiting);
1237             pr_rusage(id);
1238         }
1239         do_inc = 0;
1240         if ((num_outstanding < op->maxq_per_thread) && (k < npt)) {
1241             do_inc = 1;
1242             if (need_finish) {
1243                 pack_id = encore_pack_id;
1244                 need_finish = false;
1245                 repeat = true;
1246             } else {
1247                 prev_pack_id = pack_id;
1248                 pack_id = uniq_pack_id.fetch_add(1);
1249                 repeat = false;
1250             }
1251             if (is_rw) {    /* get new lb buffer or one from free list */
1252                 if (free_lst.empty()) {
1253                     lbp = sg_memalign(op->lb_sz * op->num_lbs, 0, &free_lbp,
1254                                       false);
1255                     if (NULL == lbp) {
1256                         err = "out of memory";
1257                         break;
1258                     }
1259                 } else if (! repeat) {
1260                     lbp = free_lst.back().first;
1261                     free_lbp = free_lst.back().second;
1262                     free_lst.pop_back();
1263                 } else {
1264                     lbp = encore_lbps.first;
1265                     free_lbp = encore_lbps.second;
1266                     if (vb && !once && free_lst.size() > 1000) {
1267                         once = true;
1268                         pr2serr_lk("%d->id: free_lst.size() over 1000\n", id);
1269                     }
1270                     if (vb && !once_2000 && free_lst.size() > 2000) {
1271                         once_2000 = true;
1272                         pr2serr_lk("%d->id: free_lst.size() over 2000\n", id);
1273                     }
1274                     if (vb && !once_6000 && free_lst.size() > 6000) {
1275                         once_2000 = true;
1276                         pr2serr_lk("%d->id: free_lst.size() over 6000\n", id);
1277                     }
1278                 }
1279             } else
1280                 lbp = NULL;
1281             if (is_rw) {
1282                 if (ruip) {
1283                     if (! repeat) {
1284                         lba = ruip->get();  /* fetch a random LBA */
1285                         if (vb > 3)
1286                             pr2serr_lk("  id=%d: start IO at lba=0x%" PRIx64
1287                                        "\n", id, lba);
1288                     }
1289                 } else
1290                     lba = op->lba;
1291             } else
1292                 lba = 0;
1293             if (vb > 4)
1294                 pr2serr_lk("t_id=%d: starting pack_id=%d\n", id, pack_id);
1295             res = (op->v4) ?
1296                 start_sg4_cmd(sg_fd, op->c2e, pack_id, lba, lbp,
1297                               blk_sz * op->num_lbs, sg_flags, op->submit,
1298                               thr_enomem_count, thr_start_eagain_count,
1299                               thr_start_ebusy_count, thr_start_e2big_count,
1300                               thr_start_edom_count)  :
1301                 start_sg3_cmd(sg_fd, op->c2e, pack_id, lba, lbp,
1302                               blk_sz * op->num_lbs, sg_flags, op->submit,
1303                               thr_enomem_count, thr_start_eagain_count,
1304                               thr_start_ebusy_count, thr_start_e2big_count,
1305                               thr_start_edom_count);
1306             if (res) {
1307                 if (res > 1) { /* here if E2BIG, start not done, try finish */
1308                     do_inc = 0;
1309                     need_finish = true;
1310                     encore_pack_id = pack_id;
1311                     pack_id = prev_pack_id;
1312                     encore_lbps = make_pair(lbp, free_lbp);
1313                     if (vb > 2)
1314                         pr2serr_lk("t_id=%d: E2BIG hit, prev_pack_id=%d, "
1315                                    "encore_pack_id=%d\n", id, prev_pack_id,
1316                                    encore_pack_id);
1317                 } else {
1318                     err = "start_sg3_cmd()";
1319                     break;
1320                 }
1321             } else {    /* no error */
1322                 ++thr_async_starts;
1323                 ++num_outstanding;
1324                 pi2buff[pack_id] = make_pair(lbp, free_lbp);
1325                 if (ruip)
1326                     pi_2_lba[pack_id] = lba;
1327             }
1328             if (vb && !once && (pi2buff.size() > 1000)) {
1329                 once = true;
1330                 pr2serr_lk("%d->id: pi2buff.size() over 1000 (b)\n", id);
1331             }
1332             if (vb && !once_2000 && free_lst.size() > 2000) {
1333                 once_2000 = true;
1334                 pr2serr_lk("%d->id: free_lst.size() over 2000 (b)\n", id);
1335             }
1336             if (vb && !once_6000 && free_lst.size() > 6000) {
1337                 once_2000 = true;
1338                 pr2serr_lk("%d->id: free_lst.size() over 6000 (b)\n", id);
1339             }
1340         }
1341         if (need_finish) {
1342             num_waiting_read = 0;
1343             if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting_read) < 0) {
1344                 err = "ioctl(SG_GET_NUM_WAITING) failed";
1345                 break;
1346             } else if (vb > 3)
1347                 pr2serr_lk("t_id=%d: num_waiting_read=%d\n", id,
1348                            num_waiting_read);
1349             if (num_waiting_read > 0)
1350                 num_to_read = num_waiting_read;
1351             else {
1352                 struct timespec tspec = {0, 100000 /* 100 usecs */};
1353 
1354                 nanosleep(&tspec, NULL);
1355                 if (vb > 3)
1356                     pr2serr_lk("t_id=%d: E2BIG, 100 usecs sleep\n", id);
1357                 // err = "strange, E2BIG but nothing to read";
1358                 // break;
1359             }
1360         } else if ((num_outstanding >= op->maxq_per_thread) || (k >= npt)) {
1361             /* full queue or finished injecting */
1362             num_waiting_read = 0;
1363             if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting_read) < 0) {
1364                 err = "ioctl(SG_GET_NUM_WAITING) failed";
1365                 break;
1366             }
1367             if (1 == num_waiting_read)
1368                 num_to_read = num_waiting_read;
1369             else if (num_waiting_read > 0) {
1370                 if (k >= npt)
1371                     num_to_read = num_waiting_read;
1372                 else {
1373                     switch (op->myqd) {
1374                     case MYQD_LOW:
1375                         num_to_read = num_waiting_read;
1376                         break;
1377                     case MYQD_MEDIUM:
1378                         num_to_read = num_waiting_read / 2;
1379                         break;
1380                     case MYQD_HIGH:
1381                     default:
1382                         if (op->ovn > 0) {
1383                             if (op->sg_vn_ge_40030) {
1384                                 int num_subm = num_submitted(sg_fd);
1385 
1386                                 if (num_subm > op->ovn) {
1387                                     num_to_read = num_waiting_read > 0 ?
1388                                                     num_waiting_read : 1;
1389                                     break;
1390                                 }
1391                             } else {
1392                                 if (num_waiting_read > (op->ovn / 2)) {
1393                                     num_to_read = num_waiting_read / 2;
1394                                     break;
1395                                 }
1396                             }
1397                         }
1398                         num_to_read = 1;
1399                         break;
1400                     }
1401                 }
1402             } else {    /* nothing waiting to be read */
1403                 if (op->sg_vn_ge_40030) {
1404                     int val = num_submitted(sg_fd);
1405 
1406                     if (0 == val) {
1407                         err = "nothing submitted now ??";
1408                         break;
1409                     } else if (val < 0) {
1410                         err = "num_submitted failed";
1411                         break;
1412                     }
1413                 }
1414                 n = (op->wait_ms > 0) ? op->wait_ms : 0;
1415                 if (n > 0) {
1416                     for (j = 0; (j < 1000000) &&
1417                          (0 == (res = poll(pfd, 1, n)));
1418                          ++j)
1419                         ;
1420                     if (j >= 1000000) {
1421                         err = "poll() looped 1 million times";
1422                         break;
1423                     }
1424                     if (res < 0) {
1425                         err = "poll(wait_ms) failed";
1426                         break;
1427                     }
1428                 } else {
1429                     struct timespec ts;
1430 
1431                     ts.tv_sec = 0;
1432                     ts.tv_nsec = DEF_NANOSEC_WAIT;
1433                     if (nanosleep(&ts, NULL) < 0) {
1434                         err = "nanosleep() failed";
1435                         break;
1436                     }
1437                 }
1438             }
1439         } else {        /* not full, not finished injecting */
1440             if (MYQD_HIGH == op->myqd) {
1441                 num_to_read = 0;
1442                 if (op->ovn) {
1443                     if (op->sg_vn_ge_40030) {
1444                         int num_subm = num_submitted(sg_fd);
1445 
1446                         if (num_subm > op->ovn)
1447                             num_to_read = num_waiting_read > 0 ?
1448                                             num_waiting_read : 1;
1449                     } else {
1450                         num_waiting_read = 0;
1451                         if (ioctl(sg_fd, SG_GET_NUM_WAITING,
1452                                   &num_waiting_read) < 0) {
1453                             err = "ioctl(SG_GET_NUM_WAITING) failed";
1454                             break;
1455                         }
1456                         if (num_waiting_read > (op->ovn / 2))
1457                             num_to_read = num_waiting_read / 2;
1458                     }
1459                 }
1460             } else {
1461                 num_waiting_read = 0;
1462                 if (ioctl(sg_fd, SG_GET_NUM_WAITING, &num_waiting_read) < 0) {
1463                     err = "ioctl(SG_GET_NUM_WAITING) failed";
1464                     break;
1465                 }
1466                 if (num_waiting_read > 0)
1467                     num_to_read = num_waiting_read /
1468                                   ((MYQD_LOW == op->myqd) ? 1 : 2);
1469                 else
1470                     num_to_read = 0;
1471             }
1472         }
1473 
1474         if (vb && !once_4000 && (num_to_read > 4000)) {
1475             once_4000 = true;
1476             pr2serr_lk("%d->id: num_to_read=%d\n", id, num_to_read);
1477         }
1478         while (num_to_read > 0) {
1479             --num_to_read;
1480             if (op->pack_id_force) {
1481                 j = pi2buff.size();
1482                 if (j > 0)
1483                     pack_id = pi2buff.begin()->first;
1484                 else
1485                     pack_id = -1;
1486             } else
1487                 pack_id = -1;
1488             ask = pack_id;
1489             res = (op->v4) ?
1490                     finish_sg4_cmd(sg_fd, op->c2e, pack_id, op->submit,
1491                                    op->wait_ms, thr_enomem_count,
1492                                    thr_fin_eagain_count, thr_fin_ebusy_count,
1493                                    nanosecs)           :
1494                     finish_sg3_cmd(sg_fd, op->c2e, pack_id, op->submit,
1495                                    op->wait_ms, thr_enomem_count,
1496                                    thr_fin_eagain_count, thr_fin_ebusy_count,
1497                                    nanosecs);
1498             if (res) {
1499                 err = "finish_sg3_cmd()";
1500                 if (ruip && (pack_id > 0)) {
1501                     auto q = pi_2_lba.find(pack_id);
1502 
1503                     if (q != pi_2_lba.end()) {
1504                         snprintf(ebuff, sizeof(ebuff), "%s: lba=0x%" PRIx64 ,
1505                                  err, q->second);
1506                         err = ebuff;
1507                     }
1508                 }
1509                 break;
1510             }
1511             if (op->cmd_time && op->sg_vn_ge_40030)
1512                 sum_nanosecs += nanosecs;
1513             ++thr_async_finishes;
1514             --num_outstanding;
1515             if (vb > 4)
1516                 pr2serr_lk("t_id=%d: finishing pack_id ask=%d, got=%d, "
1517                            "outstanding=%d\n", id, ask, pack_id,
1518                            num_outstanding);
1519             auto p = pi2buff.find(pack_id);
1520 
1521             if (p == pi2buff.end()) {
1522                 snprintf(ebuff, sizeof(ebuff), "pack_id=%d from "
1523                          "finish_sg3_cmd() not found\n", pack_id);
1524                 if (! err)
1525                     err = ebuff;
1526             } else {
1527                 lbp = p->second.first;
1528                 free_lbp = p->second.second;
1529                 pi2buff.erase(p);
1530                 if (lbp)
1531                     free_lst.push_front(make_pair(lbp, free_lbp));
1532             }
1533             if (ruip && (pack_id > 0)) {
1534                 auto q = pi_2_lba.find(pack_id);
1535 
1536                 if (q != pi_2_lba.end()) {
1537                     if (vb > 3)
1538                         pr2serr_lk("    id=%d: finish IO at lba=0x%" PRIx64
1539                                    "\n", id, q->second);
1540                     pi_2_lba.erase(q);
1541                 }
1542             }
1543             if (err)
1544                 break;
1545         }       /* end of while loop counting down num_to_read */
1546         if (err)
1547             break;
1548     }           /* end of for loop over npt (number per thread) */
1549     if (vb)
1550         pr2serr_lk("%d->id: leaving main thread loop; k=%d, o=%d\n", id, k,
1551                    o);
1552     close(sg_fd);       // sg driver will handle any commands "in flight"
1553     if (ruip)
1554         delete ruip;
1555 
1556     if (err || (k < npt)) {
1557         if (k < npt)
1558             pr2serr_lk("t_id=%d FAILed at iteration %d%s%s\n", id, k,
1559                        (err ? ", Reason: " : ""), (err ? err : ""));
1560         else
1561             pr2serr_lk("t_id=%d FAILed on last%s%s\n", id,
1562                        (err ? ", Reason: " : ""), (err ? err : ""));
1563     }
1564     n = pi2buff.size();
1565     if (n > 0)
1566         pr2serr_lk("t_id=%d Still %d elements in pi2buff map on "
1567                    "exit\n", id, n);
1568     for (k = 0; ! free_lst.empty(); ++k) {
1569         lbp = free_lst.back().first;
1570         free_lbp = free_lst.back().second;
1571         free_lst.back().second = NULL;
1572         free_lst.pop_back();
1573         if (vb > 6)
1574             pr2serr_lk("t_id=%d freeing %p (free_ %p)\n", id, lbp, free_lbp);
1575         if (free_lbp) {
1576             free(free_lbp);
1577             free_lbp = NULL;
1578         }
1579     }
1580     if ((vb > 2) && (k > 0))
1581         pr2serr_lk("%d->id: Maximum number of READ/WRITEs queued: %d\n",
1582                    id, k);
1583     async_starts += thr_async_starts;
1584     async_finishes += thr_async_finishes;
1585     start_eagain_count += thr_start_eagain_count;
1586     start_ebusy_count += thr_start_ebusy_count;
1587     start_e2big_count += thr_start_e2big_count;
1588     fin_eagain_count += thr_fin_eagain_count;
1589     fin_ebusy_count += thr_fin_ebusy_count;
1590     enomem_count += thr_enomem_count;
1591     start_edom_count += thr_start_edom_count;
1592     if (op->cmd_time && op->sg_vn_ge_40030 && (npt > 0)) {
1593         pr2serr_lk("t_id=%d average nanosecs per cmd: %" PRId64
1594                    "\n", id, sum_nanosecs / npt);
1595     }
1596 }
1597 
1598 #define INQ_REPLY_LEN 96
1599 #define INQ_CMD_LEN 6
1600 
1601 /* Send INQUIRY and fetches response. If okay puts PRODUCT ID field
1602  * in b (up to m_blen bytes). Does not use O_EXCL flag. Returns 0 on success,
1603  * else -1 . */
1604 static int
do_inquiry_prod_id(const char * dev_name,int block,int & sg_ver_num,char * b,int b_mlen)1605 do_inquiry_prod_id(const char * dev_name, int block, int & sg_ver_num,
1606                    char * b, int b_mlen)
1607 {
1608     int sg_fd, ok, ret;
1609     struct sg_io_hdr pt;
1610     uint8_t inqCmdBlk [INQ_CMD_LEN] =
1611                                 {0x12, 0, 0, 0, INQ_REPLY_LEN, 0};
1612     uint8_t inqBuff[INQ_REPLY_LEN];
1613     uint8_t sense_buffer[64] SG_C_CPP_ZERO_INIT;
1614     int open_flags = O_RDWR;    /* O_EXCL | O_RDONLY fails with EPERM */
1615 
1616     if (! block)
1617         open_flags |= O_NONBLOCK;
1618     sg_fd = open(dev_name, open_flags);
1619     if (sg_fd < 0) {
1620         pr_errno_lk(errno, "%s: error opening file: %s", __func__, dev_name);
1621         return -1;
1622     }
1623     if (ioctl(sg_fd, SG_GET_VERSION_NUM, &sg_ver_num) < 0)
1624         sg_ver_num = 0;
1625     /* Prepare INQUIRY command */
1626     memset(&pt, 0, sizeof(pt));
1627     pt.interface_id = 'S';
1628     pt.cmd_len = sizeof(inqCmdBlk);
1629     /* pt.iovec_count = 0; */  /* memset takes care of this */
1630     pt.mx_sb_len = sizeof(sense_buffer);
1631     pt.dxfer_direction = SG_DXFER_FROM_DEV;
1632     pt.dxfer_len = INQ_REPLY_LEN;
1633     pt.dxferp = inqBuff;
1634     pt.cmdp = inqCmdBlk;
1635     pt.sbp = sense_buffer;
1636     pt.timeout = 20000;     /* 20000 millisecs == 20 seconds */
1637     /* pt.flags = 0; */     /* take defaults: indirect IO, etc */
1638     /* pt.pack_id = 0; */
1639     /* pt.usr_ptr = NULL; */
1640 
1641     if (ioctl(sg_fd, SG_IO, &pt) < 0) {
1642         pr_errno_lk(errno, "%s: Inquiry SG_IO ioctl error", __func__);
1643         close(sg_fd);
1644         return -1;
1645     }
1646 
1647     /* now for the error processing */
1648     ok = 0;
1649     switch (sg_err_category3(&pt)) {
1650     case SG_LIB_CAT_CLEAN:
1651         ok = 1;
1652         break;
1653     case SG_LIB_CAT_RECOVERED:
1654         pr2serr_lk("Recovered error on INQUIRY, continuing\n");
1655         ok = 1;
1656         break;
1657     default: /* won't bother decoding other categories */
1658         {
1659             lock_guard<mutex> lg(console_mutex);
1660             sg_chk_n_print3("INQUIRY command error", &pt, 1);
1661         }
1662         break;
1663     }
1664     if (ok) {
1665         /* Good, so fetch Product ID from response, copy to 'b' */
1666         if (b_mlen > 0) {
1667             if (b_mlen > 16) {
1668                 memcpy(b, inqBuff + 16, 16);
1669                 b[16] = '\0';
1670             } else {
1671                 memcpy(b, inqBuff + 16, b_mlen - 1);
1672                 b[b_mlen - 1] = '\0';
1673             }
1674         }
1675         ret = 0;
1676     } else
1677         ret = -1;
1678 
1679     close(sg_fd);
1680     return ret;
1681 }
1682 
1683 /* Only allow ranges up to 2**32-1 upper limit, so READ CAPACITY(10)
1684  * sufficient. Return of 0 -> success, -1 -> failure, 2 -> try again */
1685 static int
do_read_capacity(const char * dev_name,int block,unsigned int * last_lba,unsigned int * blk_sz)1686 do_read_capacity(const char * dev_name, int block, unsigned int * last_lba,
1687                  unsigned int * blk_sz)
1688 {
1689     int res, sg_fd;
1690     uint8_t rcCmdBlk [10] = {0x25, 0, 0, 0, 0, 0, 0, 0, 0, 0};
1691     uint8_t rcBuff[64];
1692     uint8_t sense_b[64] SG_C_CPP_ZERO_INIT;
1693     sg_io_hdr_t io_hdr SG_C_CPP_ZERO_INIT;
1694     int open_flags = O_RDWR;    /* O_EXCL | O_RDONLY fails with EPERM */
1695 
1696     if (! block)
1697         open_flags |= O_NONBLOCK;
1698     sg_fd = open(dev_name, open_flags);
1699     if (sg_fd < 0) {
1700         pr_errno_lk(errno, "%s: error opening file: %s", __func__, dev_name);
1701         return -1;
1702     }
1703     /* Prepare READ CAPACITY(10) command */
1704     io_hdr.interface_id = 'S';
1705     io_hdr.cmd_len = sizeof(rcCmdBlk);
1706     io_hdr.mx_sb_len = sizeof(sense_b);
1707     io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
1708     io_hdr.dxfer_len = sizeof(rcBuff);
1709     io_hdr.dxferp = rcBuff;
1710     io_hdr.cmdp = rcCmdBlk;
1711     io_hdr.sbp = sense_b;
1712     io_hdr.timeout = 20000;     /* 20000 millisecs == 20 seconds */;
1713 
1714     if (ioctl(sg_fd, SG_IO, &io_hdr) < 0) {
1715         pr_errno_lk(errno, "%s (SG_IO) error", __func__);
1716         close(sg_fd);
1717         return -1;
1718     }
1719     res = sg_err_category3(&io_hdr);
1720     if (SG_LIB_CAT_UNIT_ATTENTION == res) {
1721         lock_guard<mutex> lg(console_mutex);
1722         sg_chk_n_print3("read capacity", &io_hdr, 1);
1723         close(sg_fd);
1724         return 2; /* probably have another go ... */
1725     } else if (SG_LIB_CAT_CLEAN != res) {
1726         lock_guard<mutex> lg(console_mutex);
1727         sg_chk_n_print3("read capacity", &io_hdr, 1);
1728         close(sg_fd);
1729         return -1;
1730     }
1731     *last_lba = sg_get_unaligned_be32(&rcBuff[0]);
1732     *blk_sz = sg_get_unaligned_be32(&rcBuff[4]);
1733     close(sg_fd);
1734     return 0;
1735 }
1736 
1737 
1738 int
main(int argc,char * argv[])1739 main(int argc, char * argv[])
1740 {
1741     bool maxq_per_thread_given = false;
1742     int n;
1743     int force = 0;
1744     int64_t ll;
1745     int num_threads = DEF_NUM_THREADS;
1746     struct timespec start_tm, end_tm;
1747     struct opts_t * op;
1748     const char * cp;
1749 
1750     op = &a_opts;
1751 #if 0
1752     memset(op, 0, sizeof(*op));         // C++ doesn't like this
1753 #endif
1754     op->direct = DEF_DIRECT;
1755     op->lba = DEF_LBA;
1756     op->hi_lba = 0;
1757     op->lb_sz = DEF_LB_SZ;
1758     op->maxq_per_thread = MAX_Q_PER_FD;
1759     op->mmap_io = DEF_MMAP_IO;
1760     op->num_per_thread = DEF_NUM_PER_THREAD;
1761     op->num_lbs = 1;
1762     op->no_xfer = !! DEF_NO_XFER;
1763     op->verbose = 0;
1764     op->wait_ms = DEF_WAIT_MS;
1765     op->c2e = SCSI_TUR;
1766     op->blqd = BLQ_DEFAULT;
1767     op->block = !! DEF_BLOCKING;
1768     op->myqd = MYQD_HIGH;
1769     page_size = sysconf(_SC_PAGESIZE);
1770 
1771     while (1) {
1772         int option_index = 0;
1773         int c;
1774 
1775         c = getopt_long(argc, argv,
1776                         "34acdefghl:L:mM:n:NO:pq:Q:Rs:St:TuvVw:W",
1777                         long_options, &option_index);
1778         if (c == -1)
1779             break;
1780 
1781         switch (c) {
1782         case '3':
1783             op->v3 = true;
1784             op->v3_given = true;
1785             op->v4 = false;     /* if '-4 -3' take latter */
1786             op->v4_given = false;
1787             break;
1788         case '4':
1789             op->v4 = true;
1790             op->v4_given = true;
1791             op->v3 = false;
1792             op->v3_given = false;
1793             break;
1794         case 'a':
1795             op->masync = true;
1796             break;
1797         case 'c':
1798             op->cmd_time = true;
1799             break;
1800         case 'd':
1801             op->direct = true;
1802             break;
1803         case 'e':
1804             op->excl = true;
1805             break;
1806         case 'f':
1807             force = true;
1808             break;
1809         case 'g':
1810             op->generic_sync = true;
1811             break;
1812         case 'h':
1813         case '?':
1814             usage();
1815             return 0;
1816         case 'l':
1817             if (isdigit(*optarg)) {
1818                 ll = sg_get_llnum(optarg);
1819                 if (-1 == ll) {
1820                     pr2serr_lk("could not decode lba\n");
1821                     return 1;
1822                 } else
1823                     op->lba = (uint64_t)ll;
1824                 cp = strchr(optarg, ',');
1825                 if (cp) {
1826                     if (0 == strcmp("-1", cp + 1))
1827                         op->hi_lba = UINT_MAX;
1828                     else {
1829                         ll = sg_get_llnum(cp + 1);
1830                         if ((-1 == ll) || (ll > UINT_MAX)) {
1831                             pr2serr_lk("could not decode hi_lba, or > "
1832                                        "UINT_MAX\n");
1833                             return 1;
1834                         } else
1835                             op->hi_lba = (unsigned int)ll;
1836                     }
1837                 }
1838             } else {
1839                 pr2serr_lk("--lba= expects a number\n");
1840                 return 1;
1841             }
1842             break;
1843         case 'L':
1844             op->lb_sz = sg_get_num(optarg);
1845             if (op->lb_sz < 0) {
1846                 pr2serr_lk("--lbsz= expects power of 2\n");
1847                 return 1;
1848             }
1849             if (0 == op->lb_sz)
1850                 op->lb_sz = DEF_LB_SZ;
1851             break;
1852         case 'm':
1853             op->mmap_io = true;
1854             break;
1855         case 'M':
1856             if (isdigit(*optarg)) {
1857                 n = atoi(optarg);
1858                 if ((n < 1) || (n > MAX_Q_PER_FD)) {
1859                     pr2serr_lk("-M expects a value from 1 to %d\n",
1860                                MAX_Q_PER_FD);
1861                     return 1;
1862                 }
1863                 maxq_per_thread_given = true;
1864                 op->maxq_per_thread = n;
1865             } else {
1866                 pr2serr_lk("--maxqpt= expects a number\n");
1867                 return 1;
1868             }
1869             break;
1870         case 'n':
1871             if (isdigit(*optarg))
1872                 op->num_per_thread = sg_get_num(optarg);
1873             else {
1874                 pr2serr_lk("--numpt= expects a number\n");
1875                 return 1;
1876             }
1877             break;
1878         case 'N':
1879             op->no_xfer = true;
1880             break;
1881         case 'O':
1882             if (isdigit(*optarg))
1883                 op->ovn = sg_get_num(optarg);
1884             else {
1885                 pr2serr_lk("--override= expects a number\n");
1886                 return 1;
1887             }
1888             if (op->ovn < 0) {
1889                 pr2serr_lk("--override= bad number\n");
1890                 return 1;
1891             }
1892             break;
1893         case 'p':
1894             op->pack_id_force = true;
1895             break;
1896         case 'q':
1897             if (isdigit(*optarg)) {
1898                 n = atoi(optarg);
1899                 if (0 == n)
1900                     op->blqd = BLQ_AT_HEAD;
1901                 else if (1 == n)
1902                     op->blqd = BLQ_AT_TAIL;
1903             } else {
1904                 pr2serr_lk("--qat= expects a number: 0 or 1\n");
1905                 return 1;
1906             }
1907             break;
1908         case 'Q':
1909             if (isdigit(*optarg)) {
1910                 n = atoi(optarg);
1911                 if (0 == n)
1912                     op->myqd = MYQD_LOW;
1913                 else if (1 == n)
1914                     op->myqd = MYQD_MEDIUM;
1915                 else if (2 == n)
1916                     op->myqd = MYQD_HIGH;
1917             } else {
1918                 pr2serr_lk("--qfav= expects a number: 0, 1 or 2\n");
1919                 return 1;
1920             }
1921             break;
1922         case 'R':
1923             op->c2e = SCSI_READ16;
1924             break;
1925         case 's':
1926             if (isdigit(*optarg)) {
1927                 op->lb_sz = atoi(optarg);
1928                 if (op->lb_sz < 256) {
1929                     cerr << "Strange lb_sz, using 256" << endl;
1930                     op->lb_sz = 256;
1931                 }
1932             } else {
1933                 pr2serr_lk("--szlb= expects a number\n");
1934                 return 1;
1935             }
1936             if ((cp = strchr(optarg, ','))) {
1937                 n = sg_get_num(cp + 1);
1938                 if (n < 1) {
1939                     pr2serr_lk("could not decode 2nd part of "
1940                                "--szlb=LBS,NLBS\n");
1941                     return 1;
1942                 }
1943                 op->num_lbs = n;
1944             }
1945             break;
1946         case 'S':
1947             ++op->stats;
1948             break;
1949         case 't':
1950             if (isdigit(*optarg))
1951                 num_threads = atoi(optarg);
1952             else {
1953                 pr2serr_lk("--tnum= expects a number\n");
1954                 return 1;
1955             }
1956             break;
1957         case 'T':
1958             op->c2e = SCSI_TUR;
1959             break;
1960         case 'u':
1961             op->submit = true;
1962             break;
1963         case 'v':
1964             op->verbose_given = true;
1965             ++op->verbose;
1966             break;
1967         case 'V':
1968             op->version_given = true;
1969             break;
1970         case 'w':
1971             if ((isdigit(*optarg) || ('-' == *optarg))) {
1972                 if ('-' == *optarg)
1973                     op->wait_ms = - atoi(optarg + 1);
1974                 else
1975                     op->wait_ms = atoi(optarg);
1976             } else {
1977                 pr2serr_lk("--wait= expects a number\n");
1978                 return 1;
1979             }
1980             break;
1981         case 'W':
1982             op->c2e = SCSI_WRITE16;
1983             break;
1984         default:
1985             pr2serr_lk("unrecognised option code 0x%x ??\n", c);
1986             usage();
1987             return 1;
1988         }
1989     }
1990     if (optind < argc) {
1991         for (; optind < argc; ++optind)
1992             op->dev_names.push_back(argv[optind]);
1993     }
1994 #ifdef DEBUG
1995     pr2serr_lk("In DEBUG mode, ");
1996     if (op->verbose_given && op->version_given) {
1997         pr2serr_lk("but override: '-vV' given, zero verbose and continue\n");
1998         op->verbose_given = false;
1999         op->version_given = false;
2000         op->verbose = 0;
2001     } else if (! op->verbose_given) {
2002         pr2serr_lk("set '-vv'\n");
2003         op->verbose = 2;
2004     } else
2005         pr2serr_lk("keep verbose=%d\n", op->verbose);
2006 #else
2007     if (op->verbose_given && op->version_given)
2008         pr2serr_lk("Not in DEBUG mode, so '-vV' has no special action\n");
2009 #endif
2010     if (op->version_given) {
2011         pr2serr_lk("version: %s\n", version_str);
2012         return 0;
2013     }
2014     if (op->mmap_io) {
2015         if (maxq_per_thread_given && (op->maxq_per_thread > 1)) {
2016             pr2serr_lk("With mmap_io selected, QPT cannot exceed 1\n");
2017             return 1;
2018         } else if (op->direct) {
2019             pr2serr_lk("direct IO and mmap-ed IO cannot both be selected\n");
2020             return 1;
2021         } else if (op->generic_sync) {
2022             pr2serr_lk("--generic-sync and and mmap-ed IO are compatible\n");
2023             return 1;
2024         } else
2025             op->maxq_per_thread = 1;
2026     }
2027     if (! op->cmd_time && getenv("SG3_UTILS_LINUX_NANO")) {
2028         op->cmd_time = true;
2029         if (op->verbose)
2030             fprintf(stderr, "setting nanosecond timing due to environment "
2031                     "variable: SG3_UTILS_LINUX_NANO\n");
2032     }
2033     if (0 == op->dev_names.size()) {
2034         fprintf(stderr, "No sg_disk_device-s given\n\n");
2035         usage();
2036         return 1;
2037     }
2038     if (op->hi_lba && (op->lba > op->hi_lba)) {
2039         cerr << "lba,hi_lba range is illegal" << endl;
2040         return 1;
2041     }
2042     if (op->v4) {
2043         if (! op->submit) {
2044             op->submit = true;
2045             if (op->verbose > 1)
2046                 cerr << "when --v4 is given, --submit will be set" << endl;
2047         }
2048     }
2049 
2050     try {
2051         int k, sg_ver_num;
2052         unsigned int last_lba;
2053         unsigned int blk_sz;
2054         struct stat a_stat;
2055 
2056         for (k = 0; k < (int)op->dev_names.size(); ++k) {
2057             int res;
2058             const char * dev_name;
2059             char b[128];
2060 
2061             dev_name = op->dev_names[k];
2062             if (stat(dev_name, &a_stat) < 0) {
2063                 snprintf(b, sizeof(b), "could not stat() %s", dev_name);
2064                 perror(b);
2065                 return 1;
2066             }
2067             if (! S_ISCHR(a_stat.st_mode)) {
2068                 pr2serr_lk("%s should be a sg device which is a char "
2069                            "device. %s\n", dev_name, dev_name);
2070                 pr2serr_lk("is not a char device and damage could be done "
2071                            "if it is a BLOCK\ndevice, exiting ...\n");
2072                 return 1;
2073             }
2074             res = do_inquiry_prod_id(dev_name, op->block, sg_ver_num,
2075                                      b, sizeof(b));
2076             if (! force) {
2077                 if (res) {
2078                     pr2serr_lk("INQUIRY failed on %s\n", dev_name);
2079                     return 1;
2080                 }
2081                 // For safety, since <lba> written to, only permit scsi_debug
2082                 // devices. Bypass this with '-f' option.
2083                 if (0 != memcmp("scsi_debug", b, 10)) {
2084                     pr2serr_lk("Since this utility may write to LBAs, "
2085                                "only devices with the\n"
2086                                "product ID 'scsi_debug' accepted. Use '-f' "
2087                                "to override.\n");
2088                     return 2;
2089                 }
2090             }
2091             if (sg_ver_num < 30000) {
2092                 pr2serr_lk("%s either not sg device or too old\n", dev_name);
2093                 return 2;
2094             } else if (sg_ver_num >= 40030) {
2095                 op->sg_vn_ge_40030 = true;
2096                 op->sg_vn_ge_40000 = true;
2097                 if (! (op->v3_given || op->v4_given)) {
2098                     op->v4 = true;
2099                     op->v3 = false;
2100                     op->submit = true;
2101                 }
2102             } else if (sg_ver_num >= 40000) {
2103                 op->sg_vn_ge_40030 = false;
2104                 op->sg_vn_ge_40000 = true;
2105                 if (! (op->v3_given || op->v4_given)) {
2106                     op->v4 = true;
2107                     op->v3 = false;
2108                     op->submit = true;
2109                 }
2110             } else {
2111                 if (! (op->v3_given || op->v4_given)) {
2112                     op->v4 = false;
2113                     op->v3 = true;
2114                     op->submit = false;
2115                 }
2116             }
2117 
2118             if ((SCSI_WRITE16 == op->c2e) || (SCSI_READ16 == op->c2e)) {
2119                 res = do_read_capacity(dev_name, op->block, &last_lba,
2120                                        &blk_sz);
2121                 if (2 == res)
2122                     res = do_read_capacity(dev_name, op->block, &last_lba,
2123                                            &blk_sz);
2124                 if (res) {
2125                     pr2serr_lk("READ CAPACITY(10) failed on %s\n", dev_name);
2126                     return 1;
2127                 }
2128                 if (blk_sz != (unsigned int)op->lb_sz) {
2129                     pr2serr_lk(">>> Logical block size (%d) of %s\n"
2130                                "    differs from command line option (or "
2131                                "default)\n", blk_sz, dev_name);
2132                    pr2serr_lk("... continue anyway\n");
2133                 }
2134                 op->blk_szs.push_back(blk_sz);
2135                 if (UINT_MAX == op->hi_lba)
2136                     op->hi_lbas.push_back(last_lba);
2137             }
2138         }
2139 
2140         start_tm.tv_sec = 0;
2141         start_tm.tv_nsec = 0;
2142         if (clock_gettime(CLOCK_MONOTONIC, &start_tm) < 0)
2143             perror("clock_gettime failed");
2144 
2145         vector<thread *> vt;
2146 
2147         /* start multi-threaded section */
2148         for (k = 0; k < num_threads; ++k) {
2149             thread * tp = new thread {work_thread, k, op};
2150             vt.push_back(tp);
2151         }
2152 
2153         // g++ 4.7.3 didn't like range-for loop here
2154         for (k = 0; k < (int)vt.size(); ++k)
2155             vt[k]->join();
2156         /* end multi-threaded section, just this main thread left */
2157 
2158         for (k = 0; k < (int)vt.size(); ++k)
2159             delete vt[k];
2160 
2161         n = uniq_pack_id.load() - 1;
2162         if (((n > 0) || op->generic_sync) &&
2163             (0 == clock_gettime(CLOCK_MONOTONIC, &end_tm))) {
2164             struct timespec res_tm;
2165             double a, b;
2166 
2167             if (op->generic_sync)
2168                 n = op->num_per_thread * num_threads;
2169             res_tm.tv_sec = end_tm.tv_sec - start_tm.tv_sec;
2170             res_tm.tv_nsec = end_tm.tv_nsec - start_tm.tv_nsec;
2171             if (res_tm.tv_nsec < 0) {
2172                 --res_tm.tv_sec;
2173                 res_tm.tv_nsec += 1000000000;
2174             }
2175             a = res_tm.tv_sec;
2176             a += (0.000001 * (res_tm.tv_nsec / 1000));
2177             b = (double)n;
2178             if (a > 0.000001) {
2179                 printf("Time to complete %d commands was %d.%06d seconds\n",
2180                        n, (int)res_tm.tv_sec, (int)(res_tm.tv_nsec / 1000));
2181                 printf("Implies %.0f IOPS\n", (b / a));
2182             }
2183         }
2184 
2185         if (op->verbose || op->stats) {
2186             cout << "Number of sync_starts: " << sync_starts.load() << endl;
2187             cout << "Number of async_starts: " << async_starts.load() << endl;
2188             cout << "Number of async_finishes: " << async_finishes.load() <<
2189                     endl;
2190             cout << "Last pack_id: " << n << endl;
2191         }
2192         n = start_ebusy_count.load();
2193         if (op->verbose || op->stats || (n > 0))
2194             cout << "Number of start EBUSYs: " << n << endl;
2195         n = fin_ebusy_count.load();
2196         if (op->verbose || op->stats || (n > 0))
2197             cout << "Number of finish EBUSYs: " << n << endl;
2198         n = start_eagain_count.load();
2199         if (op->verbose || op->stats || (n > 0))
2200             cout << "Number of start EAGAINs: " << n << endl;
2201         n = fin_eagain_count.load();
2202         if (op->verbose || op->stats || (n > 0))
2203             cout << "Number of finish EAGAINs: " << n << endl;
2204         n = start_e2big_count.load();
2205         if (op->verbose || op->stats || (n > 0))
2206             cout << "Number of E2BIGs: " << n << endl;
2207         n = start_edom_count.load();
2208         if (op->verbose || op->stats || (n > 0))
2209             cout << "Number of EDOMs: " << n << endl;
2210         n = enomem_count.load();
2211         if (op->verbose || op->stats || (n > 0))
2212             cout << "Number of ENOMEMs: " << n << endl;
2213     }
2214     catch(system_error& e)  {
2215         cerr << "got a system_error exception: " << e.what() << '\n';
2216         auto ec = e.code();
2217         cerr << "category: " << ec.category().name() << '\n';
2218         cerr << "value: " << ec.value() << '\n';
2219         cerr << "message: " << ec.message() << '\n';
2220         cerr << "\nNote: if g++ may need '-pthread' or similar in "
2221                 "compile/link line" << '\n';
2222     }
2223     catch(...) {
2224         cerr << "got another exception: " << '\n';
2225     }
2226     return 0;
2227 }
2228