1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2020 Wenbo Zhang
3 #include <vmlinux.h>
4 #include <bpf/bpf_helpers.h>
5 #include <bpf/bpf_core_read.h>
6 #include <bpf/bpf_tracing.h>
7 #include "biosnoop.h"
8
9 #define MAX_ENTRIES 10240
10
11 const volatile bool filter_cg = false;
12 const volatile bool targ_queued = false;
13 const volatile bool filter_dev = false;
14 const volatile __u32 targ_dev = 0;
15
16 extern __u32 LINUX_KERNEL_VERSION __kconfig;
17
18 struct {
19 __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
20 __type(key, u32);
21 __type(value, u32);
22 __uint(max_entries, 1);
23 } cgroup_map SEC(".maps");
24
25 struct piddata {
26 char comm[TASK_COMM_LEN];
27 u32 pid;
28 };
29
30 struct {
31 __uint(type, BPF_MAP_TYPE_HASH);
32 __uint(max_entries, MAX_ENTRIES);
33 __type(key, struct request *);
34 __type(value, struct piddata);
35 __uint(map_flags, BPF_F_NO_PREALLOC);
36 } infobyreq SEC(".maps");
37
38 struct stage {
39 u64 insert;
40 u64 issue;
41 __u32 dev;
42 };
43
44 struct {
45 __uint(type, BPF_MAP_TYPE_HASH);
46 __uint(max_entries, MAX_ENTRIES);
47 __type(key, struct request *);
48 __type(value, struct stage);
49 } start SEC(".maps");
50
51 struct {
52 __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
53 __uint(key_size, sizeof(u32));
54 __uint(value_size, sizeof(u32));
55 } events SEC(".maps");
56
57 static __always_inline
trace_pid(struct request * rq)58 int trace_pid(struct request *rq)
59 {
60 u64 id = bpf_get_current_pid_tgid();
61 struct piddata piddata = {};
62
63 piddata.pid = id >> 32;
64 bpf_get_current_comm(&piddata.comm, sizeof(&piddata.comm));
65 bpf_map_update_elem(&infobyreq, &rq, &piddata, 0);
66 return 0;
67 }
68
69 SEC("fentry/blk_account_io_start")
BPF_PROG(blk_account_io_start,struct request * rq)70 int BPF_PROG(blk_account_io_start, struct request *rq)
71 {
72 if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
73 return 0;
74
75 return trace_pid(rq);
76 }
77
78 SEC("kprobe/blk_account_io_merge_bio")
BPF_KPROBE(blk_account_io_merge_bio,struct request * rq)79 int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq)
80 {
81 if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
82 return 0;
83
84 return trace_pid(rq);
85 }
86
87 static __always_inline
trace_rq_start(struct request * rq,bool insert)88 int trace_rq_start(struct request *rq, bool insert)
89 {
90 struct stage *stagep, stage = {};
91 u64 ts = bpf_ktime_get_ns();
92
93 stagep = bpf_map_lookup_elem(&start, &rq);
94 if (!stagep) {
95 struct gendisk *disk = BPF_CORE_READ(rq, rq_disk);
96
97 stage.dev = disk ? MKDEV(BPF_CORE_READ(disk, major),
98 BPF_CORE_READ(disk, first_minor)) : 0;
99 if (filter_dev && targ_dev != stage.dev)
100 return 0;
101 stagep = &stage;
102 }
103 if (insert)
104 stagep->insert = ts;
105 else
106 stagep->issue = ts;
107 if (stagep == &stage)
108 bpf_map_update_elem(&start, &rq, stagep, 0);
109 return 0;
110 }
111
112 SEC("tp_btf/block_rq_insert")
BPF_PROG(block_rq_insert)113 int BPF_PROG(block_rq_insert)
114 {
115 if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
116 return 0;
117
118 /**
119 * commit a54895fa (v5.11-rc1) changed tracepoint argument list
120 * from TP_PROTO(struct request_queue *q, struct request *rq)
121 * to TP_PROTO(struct request *rq)
122 */
123 if (LINUX_KERNEL_VERSION > KERNEL_VERSION(5, 10, 0))
124 return trace_rq_start((void *)ctx[0], true);
125 else
126 return trace_rq_start((void *)ctx[1], true);
127 }
128
129 SEC("tp_btf/block_rq_issue")
BPF_PROG(block_rq_issue)130 int BPF_PROG(block_rq_issue)
131 {
132 if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
133 return 0;
134
135 /**
136 * commit a54895fa (v5.11-rc1) changed tracepoint argument list
137 * from TP_PROTO(struct request_queue *q, struct request *rq)
138 * to TP_PROTO(struct request *rq)
139 */
140 if (LINUX_KERNEL_VERSION > KERNEL_VERSION(5, 10, 0))
141 return trace_rq_start((void *)ctx[0], false);
142 else
143 return trace_rq_start((void *)ctx[1], false);
144 }
145
146 SEC("tp_btf/block_rq_complete")
BPF_PROG(block_rq_complete,struct request * rq,int error,unsigned int nr_bytes)147 int BPF_PROG(block_rq_complete, struct request *rq, int error,
148 unsigned int nr_bytes)
149 {
150 if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0))
151 return 0;
152
153 u64 ts = bpf_ktime_get_ns();
154 struct piddata *piddatap;
155 struct event event = {};
156 struct stage *stagep;
157 s64 delta;
158
159 stagep = bpf_map_lookup_elem(&start, &rq);
160 if (!stagep)
161 return 0;
162 delta = (s64)(ts - stagep->issue);
163 if (delta < 0)
164 goto cleanup;
165 piddatap = bpf_map_lookup_elem(&infobyreq, &rq);
166 if (!piddatap) {
167 event.comm[0] = '?';
168 } else {
169 __builtin_memcpy(&event.comm, piddatap->comm,
170 sizeof(event.comm));
171 event.pid = piddatap->pid;
172 }
173 event.delta = delta;
174 if (targ_queued && BPF_CORE_READ(rq, q, elevator)) {
175 if (!stagep->insert)
176 event.qdelta = -1; /* missed or don't insert entry */
177 else
178 event.qdelta = stagep->issue - stagep->insert;
179 }
180 event.ts = ts;
181 event.sector = rq->__sector;
182 event.len = rq->__data_len;
183 event.cmd_flags = rq->cmd_flags;
184 event.dev = stagep->dev;
185 bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event,
186 sizeof(event));
187
188 cleanup:
189 bpf_map_delete_elem(&start, &rq);
190 bpf_map_delete_elem(&infobyreq, &rq);
191 return 0;
192 }
193
194 char LICENSE[] SEC("license") = "GPL";
195