• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 // Copyright (c) 2020 Wenbo Zhang
3 //
4 // Based on biolatency(8) from BCC by Brendan Gregg.
5 // 15-Jun-2020   Wenbo Zhang   Created this.
6 #include <argp.h>
7 #include <signal.h>
8 #include <stdio.h>
9 #include <unistd.h>
10 #include <fcntl.h>
11 #include <time.h>
12 #include <bpf/libbpf.h>
13 #include <sys/resource.h>
14 #include <bpf/bpf.h>
15 #include "blk_types.h"
16 #include "biolatency.h"
17 #include "biolatency.skel.h"
18 #include "trace_helpers.h"
19 
20 #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
21 
22 static struct env {
23 	char *disk;
24 	time_t interval;
25 	int times;
26 	bool timestamp;
27 	bool queued;
28 	bool per_disk;
29 	bool per_flag;
30 	bool milliseconds;
31 	bool verbose;
32 	char *cgroupspath;
33 	bool cg;
34 } env = {
35 	.interval = 99999999,
36 	.times = 99999999,
37 };
38 
39 static volatile bool exiting;
40 
41 const char *argp_program_version = "biolatency 0.1";
42 const char *argp_program_bug_address =
43 	"https://github.com/iovisor/bcc/tree/master/libbpf-tools";
44 const char argp_program_doc[] =
45 "Summarize block device I/O latency as a histogram.\n"
46 "\n"
47 "USAGE: biolatency [--help] [-T] [-m] [-Q] [-D] [-F] [-d DISK] [-c CG] [interval] [count]\n"
48 "\n"
49 "EXAMPLES:\n"
50 "    biolatency              # summarize block I/O latency as a histogram\n"
51 "    biolatency 1 10         # print 1 second summaries, 10 times\n"
52 "    biolatency -mT 1        # 1s summaries, milliseconds, and timestamps\n"
53 "    biolatency -Q           # include OS queued time in I/O time\n"
54 "    biolatency -D           # show each disk device separately\n"
55 "    biolatency -F           # show I/O flags separately\n"
56 "    biolatency -d sdc       # Trace sdc only\n"
57 "    biolatency -c CG        # Trace process under cgroupsPath CG\n";
58 
59 static const struct argp_option opts[] = {
60 	{ "timestamp", 'T', NULL, 0, "Include timestamp on output" },
61 	{ "milliseconds", 'm', NULL, 0, "Millisecond histogram" },
62 	{ "queued", 'Q', NULL, 0, "Include OS queued time in I/O time" },
63 	{ "disk", 'D', NULL, 0, "Print a histogram per disk device" },
64 	{ "flag", 'F', NULL, 0, "Print a histogram per set of I/O flags" },
65 	{ "disk",  'd', "DISK",  0, "Trace this disk only" },
66 	{ "verbose", 'v', NULL, 0, "Verbose debug output" },
67 	{ "cgroup", 'c', "/sys/fs/cgroup/unified", 0, "Trace process in cgroup path"},
68 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
69 	{},
70 };
71 
parse_arg(int key,char * arg,struct argp_state * state)72 static error_t parse_arg(int key, char *arg, struct argp_state *state)
73 {
74 	static int pos_args;
75 
76 	switch (key) {
77 	case 'h':
78 		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
79 		break;
80 	case 'v':
81 		env.verbose = true;
82 		break;
83 	case 'm':
84 		env.milliseconds = true;
85 		break;
86 	case 'Q':
87 		env.queued = true;
88 		break;
89 	case 'D':
90 		env.per_disk = true;
91 		break;
92 	case 'F':
93 		env.per_flag = true;
94 		break;
95 	case 'T':
96 		env.timestamp = true;
97 		break;
98 	case 'c':
99 		env.cgroupspath = arg;
100 		env.cg = true;
101 		break;
102 	case 'd':
103 		env.disk = arg;
104 		if (strlen(arg) + 1 > DISK_NAME_LEN) {
105 			fprintf(stderr, "invaild disk name: too long\n");
106 			argp_usage(state);
107 		}
108 		break;
109 	case ARGP_KEY_ARG:
110 		errno = 0;
111 		if (pos_args == 0) {
112 			env.interval = strtol(arg, NULL, 10);
113 			if (errno) {
114 				fprintf(stderr, "invalid internal\n");
115 				argp_usage(state);
116 			}
117 		} else if (pos_args == 1) {
118 			env.times = strtol(arg, NULL, 10);
119 			if (errno) {
120 				fprintf(stderr, "invalid times\n");
121 				argp_usage(state);
122 			}
123 		} else {
124 			fprintf(stderr,
125 				"unrecognized positional argument: %s\n", arg);
126 			argp_usage(state);
127 		}
128 		pos_args++;
129 		break;
130 	default:
131 		return ARGP_ERR_UNKNOWN;
132 	}
133 	return 0;
134 }
135 
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)136 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
137 {
138 	if (level == LIBBPF_DEBUG && !env.verbose)
139 		return 0;
140 	return vfprintf(stderr, format, args);
141 }
142 
sig_handler(int sig)143 static void sig_handler(int sig)
144 {
145 	exiting = true;
146 }
147 
print_cmd_flags(int cmd_flags)148 static void print_cmd_flags(int cmd_flags)
149 {
150 	static struct { int bit; const char *str; } flags[] = {
151 		{ REQ_NOWAIT, "NoWait-" },
152 		{ REQ_BACKGROUND, "Background-" },
153 		{ REQ_RAHEAD, "ReadAhead-" },
154 		{ REQ_PREFLUSH, "PreFlush-" },
155 		{ REQ_FUA, "FUA-" },
156 		{ REQ_INTEGRITY, "Integrity-" },
157 		{ REQ_IDLE, "Idle-" },
158 		{ REQ_NOMERGE, "NoMerge-" },
159 		{ REQ_PRIO, "Priority-" },
160 		{ REQ_META, "Metadata-" },
161 		{ REQ_SYNC, "Sync-" },
162 	};
163 	static const char *ops[] = {
164 		[REQ_OP_READ] = "Read",
165 		[REQ_OP_WRITE] = "Write",
166 		[REQ_OP_FLUSH] = "Flush",
167 		[REQ_OP_DISCARD] = "Discard",
168 		[REQ_OP_SECURE_ERASE] = "SecureErase",
169 		[REQ_OP_ZONE_RESET] = "ZoneReset",
170 		[REQ_OP_WRITE_SAME] = "WriteSame",
171 		[REQ_OP_ZONE_RESET_ALL] = "ZoneResetAll",
172 		[REQ_OP_WRITE_ZEROES] = "WriteZeroes",
173 		[REQ_OP_ZONE_OPEN] = "ZoneOpen",
174 		[REQ_OP_ZONE_CLOSE] = "ZoneClose",
175 		[REQ_OP_ZONE_FINISH] = "ZoneFinish",
176 		[REQ_OP_SCSI_IN] = "SCSIIn",
177 		[REQ_OP_SCSI_OUT] = "SCSIOut",
178 		[REQ_OP_DRV_IN] = "DrvIn",
179 		[REQ_OP_DRV_OUT] = "DrvOut",
180 	};
181 	int i;
182 
183 	printf("flags = ");
184 
185 	for (i = 0; i < ARRAY_SIZE(flags); i++) {
186 		if (cmd_flags & flags[i].bit)
187 			printf("%s", flags[i].str);
188 	}
189 
190 	if ((cmd_flags & REQ_OP_MASK) < ARRAY_SIZE(ops))
191 		printf("%s", ops[cmd_flags & REQ_OP_MASK]);
192 	else
193 		printf("Unknown");
194 }
195 
196 static
print_log2_hists(struct bpf_map * hists,struct partitions * partitions)197 int print_log2_hists(struct bpf_map *hists, struct partitions *partitions)
198 {
199 	struct hist_key lookup_key = { .cmd_flags = -1 }, next_key;
200 	const char *units = env.milliseconds ? "msecs" : "usecs";
201 	const struct partition *partition;
202 	int err, fd = bpf_map__fd(hists);
203 	struct hist hist;
204 
205 	while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) {
206 		err = bpf_map_lookup_elem(fd, &next_key, &hist);
207 		if (err < 0) {
208 			fprintf(stderr, "failed to lookup hist: %d\n", err);
209 			return -1;
210 		}
211 		if (env.per_disk) {
212 			partition = partitions__get_by_dev(partitions,
213 							next_key.dev);
214 			printf("\ndisk = %s\t", partition ? partition->name :
215 				"Unknown");
216 		}
217 		if (env.per_flag)
218 			print_cmd_flags(next_key.cmd_flags);
219 		printf("\n");
220 		print_log2_hist(hist.slots, MAX_SLOTS, units);
221 		lookup_key = next_key;
222 	}
223 
224 	lookup_key.cmd_flags = -1;
225 	while (!bpf_map_get_next_key(fd, &lookup_key, &next_key)) {
226 		err = bpf_map_delete_elem(fd, &next_key);
227 		if (err < 0) {
228 			fprintf(stderr, "failed to cleanup hist : %d\n", err);
229 			return -1;
230 		}
231 		lookup_key = next_key;
232 	}
233 
234 	return 0;
235 }
236 
main(int argc,char ** argv)237 int main(int argc, char **argv)
238 {
239 	struct partitions *partitions = NULL;
240 	const struct partition *partition;
241 	static const struct argp argp = {
242 		.options = opts,
243 		.parser = parse_arg,
244 		.doc = argp_program_doc,
245 	};
246 	struct biolatency_bpf *obj;
247 	struct tm *tm;
248 	char ts[32];
249 	time_t t;
250 	int err;
251 	int idx, cg_map_fd;
252 	int cgfd = -1;
253 
254 	err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
255 	if (err)
256 		return err;
257 
258 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
259 	libbpf_set_print(libbpf_print_fn);
260 
261 	obj = biolatency_bpf__open();
262 	if (!obj) {
263 		fprintf(stderr, "failed to open BPF object\n");
264 		return 1;
265 	}
266 
267 	partitions = partitions__load();
268 	if (!partitions) {
269 		fprintf(stderr, "failed to load partitions info\n");
270 		goto cleanup;
271 	}
272 
273 	/* initialize global data (filtering options) */
274 	if (env.disk) {
275 		partition = partitions__get_by_name(partitions, env.disk);
276 		if (!partition) {
277 			fprintf(stderr, "invaild partition name: not exist\n");
278 			goto cleanup;
279 		}
280 		obj->rodata->filter_dev = true;
281 		obj->rodata->targ_dev = partition->dev;
282 	}
283 	obj->rodata->targ_per_disk = env.per_disk;
284 	obj->rodata->targ_per_flag = env.per_flag;
285 	obj->rodata->targ_ms = env.milliseconds;
286 	obj->rodata->targ_queued = env.queued;
287 	obj->rodata->filter_cg = env.cg;
288 
289 	err = biolatency_bpf__load(obj);
290 	if (err) {
291 		fprintf(stderr, "failed to load BPF object: %d\n", err);
292 		goto cleanup;
293 	}
294 
295 	/* update cgroup path fd to map */
296 	if (env.cg) {
297 		idx = 0;
298 		cg_map_fd = bpf_map__fd(obj->maps.cgroup_map);
299 		cgfd = open(env.cgroupspath, O_RDONLY);
300 		if (cgfd < 0) {
301 			fprintf(stderr, "Failed opening Cgroup path: %s", env.cgroupspath);
302 			goto cleanup;
303 		}
304 		if (bpf_map_update_elem(cg_map_fd, &idx, &cgfd, BPF_ANY)) {
305 			fprintf(stderr, "Failed adding target cgroup to map");
306 			goto cleanup;
307 		}
308 	}
309 
310 	if (env.queued) {
311 		obj->links.block_rq_insert = bpf_program__attach(obj->progs.block_rq_insert);
312 		if (!obj->links.block_rq_insert) {
313 			err = -errno;
314 			fprintf(stderr, "failed to attach: %s\n", strerror(-err));
315 			goto cleanup;
316 		}
317 	}
318 	obj->links.block_rq_issue = bpf_program__attach(obj->progs.block_rq_issue);
319 	if (!obj->links.block_rq_issue) {
320 		err = -errno;
321 		fprintf(stderr, "failed to attach: %s\n", strerror(-err));
322 		goto cleanup;
323 	}
324 	obj->links.block_rq_complete = bpf_program__attach(obj->progs.block_rq_complete);
325 	if (!obj->links.block_rq_complete) {
326 		err = -errno;
327 		fprintf(stderr, "failed to attach: %s\n", strerror(-err));
328 		goto cleanup;
329 	}
330 
331 	signal(SIGINT, sig_handler);
332 
333 	printf("Tracing block device I/O... Hit Ctrl-C to end.\n");
334 
335 	/* main: poll */
336 	while (1) {
337 		sleep(env.interval);
338 		printf("\n");
339 
340 		if (env.timestamp) {
341 			time(&t);
342 			tm = localtime(&t);
343 			strftime(ts, sizeof(ts), "%H:%M:%S", tm);
344 			printf("%-8s\n", ts);
345 		}
346 
347 		err = print_log2_hists(obj->maps.hists, partitions);
348 		if (err)
349 			break;
350 
351 		if (exiting || --env.times == 0)
352 			break;
353 	}
354 
355 cleanup:
356 	biolatency_bpf__destroy(obj);
357 	partitions__free(partitions);
358 	if (cgfd > 0)
359 		close(cgfd);
360 
361 	return err != 0;
362 }
363