• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2020 Wenbo Zhang
3 #include <vmlinux.h>
4 #include <bpf/bpf_helpers.h>
5 #include <bpf/bpf_core_read.h>
6 #include <bpf/bpf_tracing.h>
7 #include "biosnoop.h"
8 
9 #define MAX_ENTRIES	10240
10 
11 const volatile bool filter_cg = false;
12 const volatile bool targ_queued = false;
13 const volatile bool filter_dev = false;
14 const volatile __u32 targ_dev = 0;
15 
16 extern __u32 LINUX_KERNEL_VERSION __kconfig;
17 
18 struct {
19 	__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
20 	__type(key, u32);
21 	__type(value, u32);
22 	__uint(max_entries, 1);
23 } cgroup_map SEC(".maps");
24 
25 struct piddata {
26 	char comm[TASK_COMM_LEN];
27 	u32 pid;
28 };
29 
30 struct {
31 	__uint(type, BPF_MAP_TYPE_HASH);
32 	__uint(max_entries, MAX_ENTRIES);
33 	__type(key, struct request *);
34 	__type(value, struct piddata);
35 	__uint(map_flags, BPF_F_NO_PREALLOC);
36 } infobyreq SEC(".maps");
37 
38 struct stage {
39 	u64 insert;
40 	u64 issue;
41 	__u32 dev;
42 };
43 
44 struct {
45 	__uint(type, BPF_MAP_TYPE_HASH);
46 	__uint(max_entries, MAX_ENTRIES);
47 	__type(key, struct request *);
48 	__type(value, struct stage);
49 } start SEC(".maps");
50 
51 struct {
52 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
53 	__uint(key_size, sizeof(u32));
54 	__uint(value_size, sizeof(u32));
55 } events SEC(".maps");
56 
57 static __always_inline
trace_pid(struct request * rq)58 int trace_pid(struct request *rq)
59 {
60 	u64 id = bpf_get_current_pid_tgid();
61 	struct piddata piddata = {};
62 
63 	piddata.pid = id >> 32;
64 	bpf_get_current_comm(&piddata.comm, sizeof(&piddata.comm));
65 	bpf_map_update_elem(&infobyreq, &rq, &piddata, 0);
66 	return 0;
67 }
68 
69 SEC("fentry/blk_account_io_start")
BPF_PROG(blk_account_io_start,struct request * rq)70 int BPF_PROG(blk_account_io_start, struct request *rq)
71 {
72 	if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
73 		return 0;
74 
75 	return trace_pid(rq);
76 }
77 
78 SEC("kprobe/blk_account_io_merge_bio")
BPF_KPROBE(blk_account_io_merge_bio,struct request * rq)79 int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
80 {
81 	if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
82 		return 0;
83 
84 	return trace_pid(rq);
85 }
86 
87 static __always_inline
trace_rq_start(struct request * rq,bool insert)88 int trace_rq_start(struct request *rq, bool insert)
89 {
90 	struct stage *stagep, stage = {};
91 	u64 ts = bpf_ktime_get_ns();
92 
93 	stagep = bpf_map_lookup_elem(&start, &rq);
94 	if (!stagep) {
95 		struct gendisk *disk = BPF_CORE_READ(rq, rq_disk);
96 
97 		stage.dev = disk ? MKDEV(BPF_CORE_READ(disk, major),
98 				BPF_CORE_READ(disk, first_minor)) : 0;
99 		if (filter_dev && targ_dev != stage.dev)
100 			return 0;
101 		stagep = &stage;
102 	}
103 	if (insert)
104 		stagep->insert = ts;
105 	else
106 		stagep->issue = ts;
107 	if (stagep == &stage)
108 		bpf_map_update_elem(&start, &rq, stagep, 0);
109 	return 0;
110 }
111 
112 SEC("tp_btf/block_rq_insert")
BPF_PROG(block_rq_insert)113 int BPF_PROG(block_rq_insert)
114 {
115 	if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
116 		return 0;
117 
118 	/**
119 	 * commit a54895fa (v5.11-rc1) changed tracepoint argument list
120 	 * from TP_PROTO(struct request_queue *q, struct request *rq)
121 	 * to TP_PROTO(struct request *rq)
122 	 */
123 	if (LINUX_KERNEL_VERSION > KERNEL_VERSION(5, 10, 0))
124 		return trace_rq_start((void *)ctx[0], true);
125 	else
126 		return trace_rq_start((void *)ctx[1], true);
127 }
128 
129 SEC("tp_btf/block_rq_issue")
BPF_PROG(block_rq_issue)130 int BPF_PROG(block_rq_issue)
131 {
132 	if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
133 		return 0;
134 
135 	/**
136 	 * commit a54895fa (v5.11-rc1) changed tracepoint argument list
137 	 * from TP_PROTO(struct request_queue *q, struct request *rq)
138 	 * to TP_PROTO(struct request *rq)
139 	 */
140 	if (LINUX_KERNEL_VERSION > KERNEL_VERSION(5, 10, 0))
141 		return trace_rq_start((void *)ctx[0], false);
142 	else
143 		return trace_rq_start((void *)ctx[1], false);
144 }
145 
146 SEC("tp_btf/block_rq_complete")
BPF_PROG(block_rq_complete,struct request * rq,int error,unsigned int nr_bytes)147 int BPF_PROG(block_rq_complete, struct request *rq, int error,
148 	     unsigned int nr_bytes)
149 {
150 	if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
151 		return 0;
152 
153 	u64 ts = bpf_ktime_get_ns();
154 	struct piddata *piddatap;
155 	struct event event = {};
156 	struct stage *stagep;
157 	s64 delta;
158 
159 	stagep = bpf_map_lookup_elem(&start, &rq);
160 	if (!stagep)
161 		return 0;
162 	delta = (s64)(ts - stagep->issue);
163 	if (delta < 0)
164 		goto cleanup;
165 	piddatap = bpf_map_lookup_elem(&infobyreq, &rq);
166 	if (!piddatap) {
167 		event.comm[0] = '?';
168 	} else {
169 		__builtin_memcpy(&event.comm, piddatap->comm,
170 				sizeof(event.comm));
171 		event.pid = piddatap->pid;
172 	}
173 	event.delta = delta;
174 	if (targ_queued && BPF_CORE_READ(rq, q, elevator)) {
175 		if (!stagep->insert)
176 			event.qdelta = -1; /* missed or don't insert entry */
177 		else
178 			event.qdelta = stagep->issue - stagep->insert;
179 	}
180 	event.ts = ts;
181 	event.sector = rq->__sector;
182 	event.len = rq->__data_len;
183 	event.cmd_flags = rq->cmd_flags;
184 	event.dev = stagep->dev;
185 	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event,
186 			sizeof(event));
187 
188 cleanup:
189 	bpf_map_delete_elem(&start, &rq);
190 	bpf_map_delete_elem(&infobyreq, &rq);
191 	return 0;
192 }
193 
194 char LICENSE[] SEC("license") = "GPL";
195