• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2021 Google LLC.
3  *
4  * Based on klockstat from BCC by Jiri Olsa and others
5  * 2021-10-26   Barret Rhoden   Created this.
6  */
7 #include "vmlinux.h"
8 #include <bpf/bpf_core_read.h>
9 #include <bpf/bpf_helpers.h>
10 #include <bpf/bpf_tracing.h>
11 #include "klockstat.h"
12 #include "bits.bpf.h"
13 
14 const volatile pid_t targ_tgid = 0;
15 const volatile pid_t targ_pid = 0;
16 struct mutex *const volatile targ_lock = NULL;
17 
18 struct {
19 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
20 	__uint(max_entries, MAX_ENTRIES);
21 	__uint(key_size, sizeof(u32));
22 	__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
23 } stack_map SEC(".maps");
24 
25 /*
26  * Uniquely identifies a task grabbing a particular lock; a task can only hold
27  * the same lock once (non-recursive mutexes).
28  */
29 struct task_lock {
30 	u64 task_id;
31 	u64 lock_ptr;
32 };
33 
34 struct lockholder_info {
35 	s32 stack_id;
36 	u64 task_id;
37 	u64 try_at;
38 	u64 acq_at;
39 	u64 rel_at;
40 };
41 
42 struct {
43 	__uint(type, BPF_MAP_TYPE_HASH);
44 	__uint(max_entries, MAX_ENTRIES);
45 	__type(key, struct task_lock);
46 	__type(value, struct lockholder_info);
47 } lockholder_map SEC(".maps");
48 
49 /*
50  * Keyed by stack_id.
51  *
52  * Multiple call sites may have the same underlying lock, but we only know the
53  * stats for a particular stack frame.  Multiple tasks may have the same
54  * stackframe.
55  */
56 struct {
57 	__uint(type, BPF_MAP_TYPE_HASH);
58 	__uint(max_entries, MAX_ENTRIES);
59 	__type(key, s32);
60 	__type(value, struct lock_stat);
61 } stat_map SEC(".maps");
62 
tracing_task(u64 task_id)63 static bool tracing_task(u64 task_id)
64 {
65 	u32 tgid = task_id >> 32;
66 	u32 pid = task_id;
67 
68 	if (targ_tgid && targ_tgid != tgid)
69 		return false;
70 	if (targ_pid && targ_pid != pid)
71 		return false;
72 	return true;
73 }
74 
lock_contended(void * ctx,struct mutex * lock)75 static void lock_contended(void *ctx, struct mutex *lock)
76 {
77 	u64 task_id;
78 	struct lockholder_info li[1] = {0};
79 	struct task_lock tl = {};
80 
81 	if (targ_lock && targ_lock != lock)
82 		return;
83 	task_id = bpf_get_current_pid_tgid();
84 	if (!tracing_task(task_id))
85 		return;
86 
87 	li->task_id = task_id;
88 	/*
89 	 * Skip 4 frames, e.g.:
90 	 *       __this_module+0x34ef
91 	 *       __this_module+0x34ef
92 	 *       __this_module+0x8c44
93 	 *             mutex_lock+0x5
94 	 *
95 	 * Note: if you make major changes to this bpf program, double check
96 	 * that you aren't skipping too many frames.
97 	 */
98 	li->stack_id = bpf_get_stackid(ctx, &stack_map,
99 				       4 | BPF_F_FAST_STACK_CMP);
100 	/* Legit failures include EEXIST */
101 	if (li->stack_id < 0)
102 		return;
103 	li->try_at = bpf_ktime_get_ns();
104 
105 	tl.task_id = task_id;
106 	tl.lock_ptr = (u64)lock;
107 	bpf_map_update_elem(&lockholder_map, &tl, li, BPF_ANY);
108 }
109 
lock_aborted(struct mutex * lock)110 static void lock_aborted(struct mutex *lock)
111 {
112 	u64 task_id;
113 	struct task_lock tl = {};
114 
115 	if (targ_lock && targ_lock != lock)
116 		return;
117 	task_id = bpf_get_current_pid_tgid();
118 	if (!tracing_task(task_id))
119 		return;
120 	tl.task_id = task_id;
121 	tl.lock_ptr = (u64)lock;
122 	bpf_map_delete_elem(&lockholder_map, &tl);
123 }
124 
lock_acquired(struct mutex * lock)125 static void lock_acquired(struct mutex *lock)
126 {
127 	u64 task_id;
128 	struct lockholder_info *li;
129 	struct task_lock tl = {};
130 
131 	if (targ_lock && targ_lock != lock)
132 		return;
133 	task_id = bpf_get_current_pid_tgid();
134 	if (!tracing_task(task_id))
135 		return;
136 
137 	tl.task_id = task_id;
138 	tl.lock_ptr = (u64)lock;
139 	li = bpf_map_lookup_elem(&lockholder_map, &tl);
140 	if (!li)
141 		return;
142 
143 	li->acq_at = bpf_ktime_get_ns();
144 }
145 
account(struct lockholder_info * li)146 static void account(struct lockholder_info *li)
147 {
148 	struct lock_stat *ls;
149 	u64 delta;
150 
151 	/*
152 	 * Multiple threads may have the same stack_id.  Even though we are
153 	 * holding the lock, dynamically allocated mutexes can have the same
154 	 * callgraph but represent different locks.  They will be accounted as
155 	 * the same lock, which is what we want, but we need to use atomics to
156 	 * avoid corruption, especially for the total_time variables.
157 	 */
158 	ls = bpf_map_lookup_elem(&stat_map, &li->stack_id);
159 	if (!ls) {
160 		struct lock_stat fresh = {0};
161 
162 		bpf_map_update_elem(&stat_map, &li->stack_id, &fresh, BPF_ANY);
163 		ls = bpf_map_lookup_elem(&stat_map, &li->stack_id);
164 		if (!ls)
165 			return;
166 	}
167 
168 	delta = li->acq_at - li->try_at;
169 	__sync_fetch_and_add(&ls->acq_count, 1);
170 	__sync_fetch_and_add(&ls->acq_total_time, delta);
171 	if (delta > READ_ONCE(ls->acq_max_time)) {
172 		WRITE_ONCE(ls->acq_max_time, delta);
173 		WRITE_ONCE(ls->acq_max_id, li->task_id);
174 		/*
175 		 * Potentially racy, if multiple threads think they are the max,
176 		 * so you may get a clobbered write.
177 		 */
178 		bpf_get_current_comm(ls->acq_max_comm, TASK_COMM_LEN);
179 	}
180 
181 	delta = li->rel_at - li->acq_at;
182 	__sync_fetch_and_add(&ls->hld_count, 1);
183 	__sync_fetch_and_add(&ls->hld_total_time, delta);
184 	if (delta > READ_ONCE(ls->hld_max_time)) {
185 		WRITE_ONCE(ls->hld_max_time, delta);
186 		WRITE_ONCE(ls->hld_max_id, li->task_id);
187 		bpf_get_current_comm(ls->hld_max_comm, TASK_COMM_LEN);
188 	}
189 }
190 
lock_released(struct mutex * lock)191 static void lock_released(struct mutex *lock)
192 {
193 	u64 task_id;
194 	struct lockholder_info *li;
195 	struct task_lock tl = {};
196 
197 	if (targ_lock && targ_lock != lock)
198 		return;
199 	task_id = bpf_get_current_pid_tgid();
200 	if (!tracing_task(task_id))
201 		return;
202 	tl.task_id = task_id;
203 	tl.lock_ptr = (u64)lock;
204 	li = bpf_map_lookup_elem(&lockholder_map, &tl);
205 	if (!li)
206 		return;
207 
208 	li->rel_at = bpf_ktime_get_ns();
209 	account(li);
210 
211 	bpf_map_delete_elem(&lockholder_map, &tl);
212 }
213 
214 SEC("fentry/mutex_lock")
BPF_PROG(mutex_lock,struct mutex * lock)215 int BPF_PROG(mutex_lock, struct mutex *lock)
216 {
217 	lock_contended(ctx, lock);
218 	return 0;
219 }
220 
221 SEC("fexit/mutex_lock")
BPF_PROG(mutex_lock_exit,struct mutex * lock,long ret)222 int BPF_PROG(mutex_lock_exit, struct mutex *lock, long ret)
223 {
224 	lock_acquired(lock);
225 	return 0;
226 }
227 
228 SEC("fexit/mutex_trylock")
BPF_PROG(mutex_trylock_exit,struct mutex * lock,long ret)229 int BPF_PROG(mutex_trylock_exit, struct mutex *lock, long ret)
230 {
231 	if (ret) {
232 		lock_contended(ctx, lock);
233 		lock_acquired(lock);
234 	}
235 	return 0;
236 }
237 
238 SEC("fentry/mutex_lock_interruptible")
BPF_PROG(mutex_lock_interruptible,struct mutex * lock)239 int BPF_PROG(mutex_lock_interruptible, struct mutex *lock)
240 {
241 	lock_contended(ctx, lock);
242 	return 0;
243 }
244 
245 SEC("fexit/mutex_lock_interruptible")
BPF_PROG(mutex_lock_interruptible_exit,struct mutex * lock,long ret)246 int BPF_PROG(mutex_lock_interruptible_exit, struct mutex *lock, long ret)
247 {
248 	if (ret)
249 		lock_aborted(lock);
250 	else
251 		lock_acquired(lock);
252 	return 0;
253 }
254 
255 SEC("fentry/mutex_lock_killable")
BPF_PROG(mutex_lock_killable,struct mutex * lock)256 int BPF_PROG(mutex_lock_killable, struct mutex *lock)
257 {
258 	lock_contended(ctx, lock);
259 	return 0;
260 }
261 
262 SEC("fexit/mutex_lock_killable")
BPF_PROG(mutex_lock_killable_exit,struct mutex * lock,long ret)263 int BPF_PROG(mutex_lock_killable_exit, struct mutex *lock, long ret)
264 {
265 	if (ret)
266 		lock_aborted(lock);
267 	else
268 		lock_acquired(lock);
269 	return 0;
270 }
271 
272 SEC("fentry/mutex_unlock")
BPF_PROG(mutex_unlock,struct mutex * lock)273 int BPF_PROG(mutex_unlock, struct mutex *lock)
274 {
275 	lock_released(lock);
276 	return 0;
277 }
278 
279 char LICENSE[] SEC("license") = "GPL";
280