• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 // Copyright (c) 2020 Wenbo Zhang
3 //
4 // Based on biosnoop(8) from BCC by Brendan Gregg.
5 // 29-Jun-2020   Wenbo Zhang   Created this.
6 #include <argp.h>
7 #include <signal.h>
8 #include <stdio.h>
9 #include <unistd.h>
10 #include <time.h>
11 #include <bpf/libbpf.h>
12 #include <sys/resource.h>
13 #include <bpf/bpf.h>
14 #include <fcntl.h>
15 #include "blk_types.h"
16 #include "biosnoop.h"
17 #include "biosnoop.skel.h"
18 #include "trace_helpers.h"
19 
20 #define PERF_BUFFER_PAGES	16
21 #define PERF_POLL_TIMEOUT_MS	100
22 
23 static volatile sig_atomic_t exiting = 0;
24 
25 static struct env {
26 	char *disk;
27 	int duration;
28 	bool timestamp;
29 	bool queued;
30 	bool verbose;
31 	char *cgroupspath;
32 	bool cg;
33 } env = {};
34 
35 static volatile __u64 start_ts;
36 
37 const char *argp_program_version = "biosnoop 0.1";
38 const char *argp_program_bug_address =
39 	"https://github.com/iovisor/bcc/tree/master/libbpf-tools";
40 const char argp_program_doc[] =
41 "Trace block I/O.\n"
42 "\n"
43 "USAGE: biosnoop [--help] [-d DISK] [-c CG] [-Q]\n"
44 "\n"
45 "EXAMPLES:\n"
46 "    biosnoop              # trace all block I/O\n"
47 "    biosnoop -Q           # include OS queued time in I/O time\n"
48 "    biosnoop 10           # trace for 10 seconds only\n"
49 "    biosnoop -d sdc       # trace sdc only\n"
50 "    biosnoop -c CG        # Trace process under cgroupsPath CG\n";
51 
52 static const struct argp_option opts[] = {
53 	{ "queued", 'Q', NULL, 0, "Include OS queued time in I/O time" },
54 	{ "disk",  'd', "DISK",  0, "Trace this disk only" },
55 	{ "verbose", 'v', NULL, 0, "Verbose debug output" },
56 	{ "cgroup", 'c', "/sys/fs/cgroup/unified/CG", 0, "Trace process in cgroup path"},
57 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
58 	{},
59 };
60 
parse_arg(int key,char * arg,struct argp_state * state)61 static error_t parse_arg(int key, char *arg, struct argp_state *state)
62 {
63 	static int pos_args;
64 
65 	switch (key) {
66 	case 'h':
67 		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
68 		break;
69 	case 'v':
70 		env.verbose = true;
71 		break;
72 	case 'Q':
73 		env.queued = true;
74 		break;
75 	case 'c':
76 		env.cg = true;
77 		env.cgroupspath = arg;
78 		break;
79 	case 'd':
80 		env.disk = arg;
81 		if (strlen(arg) + 1 > DISK_NAME_LEN) {
82 			fprintf(stderr, "invaild disk name: too long\n");
83 			argp_usage(state);
84 		}
85 		break;
86 	case ARGP_KEY_ARG:
87 		if (pos_args++) {
88 			fprintf(stderr,
89 				"unrecognized positional argument: %s\n", arg);
90 			argp_usage(state);
91 		}
92 		errno = 0;
93 		env.duration = strtoll(arg, NULL, 10);
94 		if (errno || env.duration <= 0) {
95 			fprintf(stderr, "invalid delay (in us): %s\n", arg);
96 			argp_usage(state);
97 		}
98 		break;
99 	default:
100 		return ARGP_ERR_UNKNOWN;
101 	}
102 	return 0;
103 }
104 
libbpf_print_fn(enum libbpf_print_level level,const char * format,va_list args)105 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
106 {
107 	if (level == LIBBPF_DEBUG && !env.verbose)
108 		return 0;
109 	return vfprintf(stderr, format, args);
110 }
111 
sig_int(int signo)112 static void sig_int(int signo)
113 {
114 	exiting = 1;
115 }
116 
blk_fill_rwbs(char * rwbs,unsigned int op)117 static void blk_fill_rwbs(char *rwbs, unsigned int op)
118 {
119 	int i = 0;
120 
121 	if (op & REQ_PREFLUSH)
122 		rwbs[i++] = 'F';
123 
124 	switch (op & REQ_OP_MASK) {
125 	case REQ_OP_WRITE:
126 	case REQ_OP_WRITE_SAME:
127 		rwbs[i++] = 'W';
128 		break;
129 	case REQ_OP_DISCARD:
130 		rwbs[i++] = 'D';
131 		break;
132 	case REQ_OP_SECURE_ERASE:
133 		rwbs[i++] = 'D';
134 		rwbs[i++] = 'E';
135 		break;
136 	case REQ_OP_FLUSH:
137 		rwbs[i++] = 'F';
138 		break;
139 	case REQ_OP_READ:
140 		rwbs[i++] = 'R';
141 		break;
142 	default:
143 		rwbs[i++] = 'N';
144 	}
145 
146 	if (op & REQ_FUA)
147 		rwbs[i++] = 'F';
148 	if (op & REQ_RAHEAD)
149 		rwbs[i++] = 'A';
150 	if (op & REQ_SYNC)
151 		rwbs[i++] = 'S';
152 	if (op & REQ_META)
153 		rwbs[i++] = 'M';
154 
155 	rwbs[i] = '\0';
156 }
157 
158 static struct partitions *partitions;
159 
handle_event(void * ctx,int cpu,void * data,__u32 data_sz)160 void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
161 {
162 	const struct partition *partition;
163 	const struct event *e = data;
164 	char rwbs[RWBS_LEN];
165 
166 	if (!start_ts)
167 		start_ts = e->ts;
168 	blk_fill_rwbs(rwbs, e->cmd_flags);
169 	partition = partitions__get_by_dev(partitions, e->dev);
170 	printf("%-11.6f %-14.14s %-6d %-7s %-4s %-10lld %-7d ",
171 		(e->ts - start_ts) / 1000000000.0,
172 		e->comm, e->pid, partition ? partition->name : "Unknown", rwbs,
173 		e->sector, e->len);
174 	if (env.queued)
175 		printf("%7.3f ", e->qdelta != -1 ?
176 			e->qdelta / 1000000.0 : -1);
177 	printf("%7.3f\n", e->delta / 1000000.0);
178 }
179 
handle_lost_events(void * ctx,int cpu,__u64 lost_cnt)180 void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
181 {
182 	fprintf(stderr, "lost %llu events on CPU #%d\n", lost_cnt, cpu);
183 }
184 
main(int argc,char ** argv)185 int main(int argc, char **argv)
186 {
187 	const struct partition *partition;
188 	static const struct argp argp = {
189 		.options = opts,
190 		.parser = parse_arg,
191 		.doc = argp_program_doc,
192 	};
193 	struct perf_buffer *pb = NULL;
194 	struct ksyms *ksyms = NULL;
195 	struct biosnoop_bpf *obj;
196 	__u64 time_end = 0;
197 	int err;
198 	int idx, cg_map_fd;
199 	int cgfd = -1;
200 
201 	err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
202 	if (err)
203 		return err;
204 
205 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
206 	libbpf_set_print(libbpf_print_fn);
207 
208 	obj = biosnoop_bpf__open();
209 	if (!obj) {
210 		fprintf(stderr, "failed to open BPF object\n");
211 		return 1;
212 	}
213 
214 	partitions = partitions__load();
215 	if (!partitions) {
216 		fprintf(stderr, "failed to load partitions info\n");
217 		goto cleanup;
218 	}
219 
220 	/* initialize global data (filtering options) */
221 	if (env.disk) {
222 		partition = partitions__get_by_name(partitions, env.disk);
223 		if (!partition) {
224 			fprintf(stderr, "invaild partition name: not exist\n");
225 			goto cleanup;
226 		}
227 		obj->rodata->filter_dev = true;
228 		obj->rodata->targ_dev = partition->dev;
229 	}
230 	obj->rodata->targ_queued = env.queued;
231 	obj->rodata->filter_cg = env.cg;
232 
233 	err = biosnoop_bpf__load(obj);
234 	if (err) {
235 		fprintf(stderr, "failed to load BPF object: %d\n", err);
236 		goto cleanup;
237 	}
238 
239 	/* update cgroup path fd to map */
240 	if (env.cg) {
241 		idx = 0;
242 		cg_map_fd = bpf_map__fd(obj->maps.cgroup_map);
243 		cgfd = open(env.cgroupspath, O_RDONLY);
244 		if (cgfd < 0) {
245 			fprintf(stderr, "Failed opening Cgroup path: %s\n", env.cgroupspath);
246 			goto cleanup;
247 		}
248 		if (bpf_map_update_elem(cg_map_fd, &idx, &cgfd, BPF_ANY)) {
249 			fprintf(stderr, "Failed adding target cgroup to map\n");
250 			goto cleanup;
251 		}
252 	}
253 
254 	obj->links.blk_account_io_start = bpf_program__attach(obj->progs.blk_account_io_start);
255 	if (!obj->links.blk_account_io_start) {
256 		err = -errno;
257 		fprintf(stderr, "failed to attach blk_account_io_start: %s\n",
258 			strerror(-err));
259 		goto cleanup;
260 	}
261 	ksyms = ksyms__load();
262 	if (!ksyms) {
263 		err = -ENOMEM;
264 		fprintf(stderr, "failed to load kallsyms\n");
265 		goto cleanup;
266 	}
267 	if (ksyms__get_symbol(ksyms, "blk_account_io_merge_bio")) {
268 		obj->links.blk_account_io_merge_bio =
269 			bpf_program__attach(obj->progs.blk_account_io_merge_bio);
270 		if (!obj->links.blk_account_io_merge_bio) {
271 			err = -errno;
272 			fprintf(stderr, "failed to attach blk_account_io_merge_bio: %s\n",
273 				strerror(-err));
274 			goto cleanup;
275 		}
276 	}
277 	if (env.queued) {
278 		obj->links.block_rq_insert =
279 			bpf_program__attach(obj->progs.block_rq_insert);
280 		if (!obj->links.block_rq_insert) {
281 			err = -errno;
282 			fprintf(stderr, "failed to attach block_rq_insert: %s\n", strerror(-err));
283 			goto cleanup;
284 		}
285 	}
286 	obj->links.block_rq_issue = bpf_program__attach(obj->progs.block_rq_issue);
287 	if (!obj->links.block_rq_issue) {
288 		err = -errno;
289 		fprintf(stderr, "failed to attach block_rq_issue: %s\n", strerror(-err));
290 		goto cleanup;
291 	}
292 	obj->links.block_rq_complete = bpf_program__attach(obj->progs.block_rq_complete);
293 	if (!obj->links.block_rq_complete) {
294 		err = -errno;
295 		fprintf(stderr, "failed to attach block_rq_complete: %s\n", strerror(-err));
296 		goto cleanup;
297 	}
298 
299 	pb = perf_buffer__new(bpf_map__fd(obj->maps.events), PERF_BUFFER_PAGES,
300 			      handle_event, handle_lost_events, NULL, NULL);
301 	if (!pb) {
302 		err = -errno;
303 		fprintf(stderr, "failed to open perf buffer: %d\n", err);
304 		goto cleanup;
305 	}
306 
307 	printf("%-11s %-14s %-6s %-7s %-4s %-10s %-7s ",
308 		"TIME(s)", "COMM", "PID", "DISK", "T", "SECTOR", "BYTES");
309 	if (env.queued)
310 		printf("%7s ", "QUE(ms)");
311 	printf("%7s\n", "LAT(ms)");
312 
313 	/* setup duration */
314 	if (env.duration)
315 		time_end = get_ktime_ns() + env.duration * NSEC_PER_SEC;
316 
317 	if (signal(SIGINT, sig_int) == SIG_ERR) {
318 		fprintf(stderr, "can't set signal handler: %s\n", strerror(errno));
319 		err = 1;
320 		goto cleanup;
321 	}
322 
323 	/* main: poll */
324 	while (!exiting) {
325 		err = perf_buffer__poll(pb, PERF_POLL_TIMEOUT_MS);
326 		if (err < 0 && err != -EINTR) {
327 			fprintf(stderr, "error polling perf buffer: %s\n", strerror(-err));
328 			goto cleanup;
329 		}
330 		if (env.duration && get_ktime_ns() > time_end)
331 			goto cleanup;
332 		/* reset err to return 0 if exiting */
333 		err = 0;
334 	}
335 
336 cleanup:
337 	perf_buffer__free(pb);
338 	biosnoop_bpf__destroy(obj);
339 	ksyms__free(ksyms);
340 	partitions__free(partitions);
341 	if (cgfd > 0)
342 		close(cgfd);
343 
344 	return err != 0;
345 }
346