1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2021 Google LLC.
3 *
4 * Based on klockstat from BCC by Jiri Olsa and others
5 * 2021-10-26 Barret Rhoden Created this.
6 */
7 #include "vmlinux.h"
8 #include <bpf/bpf_core_read.h>
9 #include <bpf/bpf_helpers.h>
10 #include <bpf/bpf_tracing.h>
11 #include "klockstat.h"
12 #include "bits.bpf.h"
13
14 const volatile pid_t targ_tgid = 0;
15 const volatile pid_t targ_pid = 0;
16 struct mutex *const volatile targ_lock = NULL;
17
18 struct {
19 __uint(type, BPF_MAP_TYPE_STACK_TRACE);
20 __uint(max_entries, MAX_ENTRIES);
21 __uint(key_size, sizeof(u32));
22 __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
23 } stack_map SEC(".maps");
24
25 /*
26 * Uniquely identifies a task grabbing a particular lock; a task can only hold
27 * the same lock once (non-recursive mutexes).
28 */
29 struct task_lock {
30 u64 task_id;
31 u64 lock_ptr;
32 };
33
34 struct lockholder_info {
35 s32 stack_id;
36 u64 task_id;
37 u64 try_at;
38 u64 acq_at;
39 u64 rel_at;
40 };
41
42 struct {
43 __uint(type, BPF_MAP_TYPE_HASH);
44 __uint(max_entries, MAX_ENTRIES);
45 __type(key, struct task_lock);
46 __type(value, struct lockholder_info);
47 } lockholder_map SEC(".maps");
48
49 /*
50 * Keyed by stack_id.
51 *
52 * Multiple call sites may have the same underlying lock, but we only know the
53 * stats for a particular stack frame. Multiple tasks may have the same
54 * stackframe.
55 */
56 struct {
57 __uint(type, BPF_MAP_TYPE_HASH);
58 __uint(max_entries, MAX_ENTRIES);
59 __type(key, s32);
60 __type(value, struct lock_stat);
61 } stat_map SEC(".maps");
62
tracing_task(u64 task_id)63 static bool tracing_task(u64 task_id)
64 {
65 u32 tgid = task_id >> 32;
66 u32 pid = task_id;
67
68 if (targ_tgid && targ_tgid != tgid)
69 return false;
70 if (targ_pid && targ_pid != pid)
71 return false;
72 return true;
73 }
74
lock_contended(void * ctx,struct mutex * lock)75 static void lock_contended(void *ctx, struct mutex *lock)
76 {
77 u64 task_id;
78 struct lockholder_info li[1] = {0};
79 struct task_lock tl = {};
80
81 if (targ_lock && targ_lock != lock)
82 return;
83 task_id = bpf_get_current_pid_tgid();
84 if (!tracing_task(task_id))
85 return;
86
87 li->task_id = task_id;
88 /*
89 * Skip 4 frames, e.g.:
90 * __this_module+0x34ef
91 * __this_module+0x34ef
92 * __this_module+0x8c44
93 * mutex_lock+0x5
94 *
95 * Note: if you make major changes to this bpf program, double check
96 * that you aren't skipping too many frames.
97 */
98 li->stack_id = bpf_get_stackid(ctx, &stack_map,
99 4 | BPF_F_FAST_STACK_CMP);
100 /* Legit failures include EEXIST */
101 if (li->stack_id < 0)
102 return;
103 li->try_at = bpf_ktime_get_ns();
104
105 tl.task_id = task_id;
106 tl.lock_ptr = (u64)lock;
107 bpf_map_update_elem(&lockholder_map, &tl, li, BPF_ANY);
108 }
109
lock_aborted(struct mutex * lock)110 static void lock_aborted(struct mutex *lock)
111 {
112 u64 task_id;
113 struct task_lock tl = {};
114
115 if (targ_lock && targ_lock != lock)
116 return;
117 task_id = bpf_get_current_pid_tgid();
118 if (!tracing_task(task_id))
119 return;
120 tl.task_id = task_id;
121 tl.lock_ptr = (u64)lock;
122 bpf_map_delete_elem(&lockholder_map, &tl);
123 }
124
lock_acquired(struct mutex * lock)125 static void lock_acquired(struct mutex *lock)
126 {
127 u64 task_id;
128 struct lockholder_info *li;
129 struct task_lock tl = {};
130
131 if (targ_lock && targ_lock != lock)
132 return;
133 task_id = bpf_get_current_pid_tgid();
134 if (!tracing_task(task_id))
135 return;
136
137 tl.task_id = task_id;
138 tl.lock_ptr = (u64)lock;
139 li = bpf_map_lookup_elem(&lockholder_map, &tl);
140 if (!li)
141 return;
142
143 li->acq_at = bpf_ktime_get_ns();
144 }
145
account(struct lockholder_info * li)146 static void account(struct lockholder_info *li)
147 {
148 struct lock_stat *ls;
149 u64 delta;
150
151 /*
152 * Multiple threads may have the same stack_id. Even though we are
153 * holding the lock, dynamically allocated mutexes can have the same
154 * callgraph but represent different locks. They will be accounted as
155 * the same lock, which is what we want, but we need to use atomics to
156 * avoid corruption, especially for the total_time variables.
157 */
158 ls = bpf_map_lookup_elem(&stat_map, &li->stack_id);
159 if (!ls) {
160 struct lock_stat fresh = {0};
161
162 bpf_map_update_elem(&stat_map, &li->stack_id, &fresh, BPF_ANY);
163 ls = bpf_map_lookup_elem(&stat_map, &li->stack_id);
164 if (!ls)
165 return;
166 }
167
168 delta = li->acq_at - li->try_at;
169 __sync_fetch_and_add(&ls->acq_count, 1);
170 __sync_fetch_and_add(&ls->acq_total_time, delta);
171 if (delta > READ_ONCE(ls->acq_max_time)) {
172 WRITE_ONCE(ls->acq_max_time, delta);
173 WRITE_ONCE(ls->acq_max_id, li->task_id);
174 /*
175 * Potentially racy, if multiple threads think they are the max,
176 * so you may get a clobbered write.
177 */
178 bpf_get_current_comm(ls->acq_max_comm, TASK_COMM_LEN);
179 }
180
181 delta = li->rel_at - li->acq_at;
182 __sync_fetch_and_add(&ls->hld_count, 1);
183 __sync_fetch_and_add(&ls->hld_total_time, delta);
184 if (delta > READ_ONCE(ls->hld_max_time)) {
185 WRITE_ONCE(ls->hld_max_time, delta);
186 WRITE_ONCE(ls->hld_max_id, li->task_id);
187 bpf_get_current_comm(ls->hld_max_comm, TASK_COMM_LEN);
188 }
189 }
190
lock_released(struct mutex * lock)191 static void lock_released(struct mutex *lock)
192 {
193 u64 task_id;
194 struct lockholder_info *li;
195 struct task_lock tl = {};
196
197 if (targ_lock && targ_lock != lock)
198 return;
199 task_id = bpf_get_current_pid_tgid();
200 if (!tracing_task(task_id))
201 return;
202 tl.task_id = task_id;
203 tl.lock_ptr = (u64)lock;
204 li = bpf_map_lookup_elem(&lockholder_map, &tl);
205 if (!li)
206 return;
207
208 li->rel_at = bpf_ktime_get_ns();
209 account(li);
210
211 bpf_map_delete_elem(&lockholder_map, &tl);
212 }
213
214 SEC("fentry/mutex_lock")
BPF_PROG(mutex_lock,struct mutex * lock)215 int BPF_PROG(mutex_lock, struct mutex *lock)
216 {
217 lock_contended(ctx, lock);
218 return 0;
219 }
220
221 SEC("fexit/mutex_lock")
BPF_PROG(mutex_lock_exit,struct mutex * lock,long ret)222 int BPF_PROG(mutex_lock_exit, struct mutex *lock, long ret)
223 {
224 lock_acquired(lock);
225 return 0;
226 }
227
228 SEC("fexit/mutex_trylock")
BPF_PROG(mutex_trylock_exit,struct mutex * lock,long ret)229 int BPF_PROG(mutex_trylock_exit, struct mutex *lock, long ret)
230 {
231 if (ret) {
232 lock_contended(ctx, lock);
233 lock_acquired(lock);
234 }
235 return 0;
236 }
237
238 SEC("fentry/mutex_lock_interruptible")
BPF_PROG(mutex_lock_interruptible,struct mutex * lock)239 int BPF_PROG(mutex_lock_interruptible, struct mutex *lock)
240 {
241 lock_contended(ctx, lock);
242 return 0;
243 }
244
245 SEC("fexit/mutex_lock_interruptible")
BPF_PROG(mutex_lock_interruptible_exit,struct mutex * lock,long ret)246 int BPF_PROG(mutex_lock_interruptible_exit, struct mutex *lock, long ret)
247 {
248 if (ret)
249 lock_aborted(lock);
250 else
251 lock_acquired(lock);
252 return 0;
253 }
254
255 SEC("fentry/mutex_lock_killable")
BPF_PROG(mutex_lock_killable,struct mutex * lock)256 int BPF_PROG(mutex_lock_killable, struct mutex *lock)
257 {
258 lock_contended(ctx, lock);
259 return 0;
260 }
261
262 SEC("fexit/mutex_lock_killable")
BPF_PROG(mutex_lock_killable_exit,struct mutex * lock,long ret)263 int BPF_PROG(mutex_lock_killable_exit, struct mutex *lock, long ret)
264 {
265 if (ret)
266 lock_aborted(lock);
267 else
268 lock_acquired(lock);
269 return 0;
270 }
271
272 SEC("fentry/mutex_unlock")
BPF_PROG(mutex_unlock,struct mutex * lock)273 int BPF_PROG(mutex_unlock, struct mutex *lock)
274 {
275 lock_released(lock);
276 return 0;
277 }
278
279 char LICENSE[] SEC("license") = "GPL";
280