1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd. All rights reserved.
3
4 * The hiebpf.bpf.c is dual licensed: you can use it either under the terms of
5 * the GPL V2, or the 3-Clause BSD license, at your option.
6 * See the LICENSE file in the root of this repository for complete details.
7 */
8
9 #include <linux/version.h>
10 #include "vmlinux.h"
11 #include "bpf_helpers.h"
12 #include "bpf_tracing.h"
13 #include "bpf_core_read.h"
14 #include "hiebpf_types.h"
15 #include "bpf_log_writer.h"
16
17 #ifndef VM_FAULT_ERROR
18 #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | \
19 VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON | \
20 VM_FAULT_HWPOISON_LARGE | VM_FAULT_FALLBACK)
21 #endif
22
23 extern int LINUX_KERNEL_VERSION __kconfig;
24 const volatile unsigned int g_stack_limit = 0;
25 static const int SHIFT_32 = 32;
26
27 // global configuration data
28 // const volatile int tracer_pid = -1;
29 // const volatile int bpf_log_level = BPF_LOG_DEBUG;
30 // const volatile int max_stack_depth = MAX_STACK_LIMIT;
31 // const volatile int target_pids[MAX_TARGET_PIDS];
32
33 /*********************************************************************************
34 * use BPF map to store global configuration data instead if global variables
35 * are not available
36 ********************/
37 struct {
38 /*
39 * config_var_map[0] = tracer_pid, tracer_pid != -1 means not tracing the tracer itself
40 * config_var_map[1] = bpf_log_level
41 * config_var_map[2] = max_stack_limit
42 * config_var_map[3] = max_stack_depth,
43 * config_var_map[4] = unwind_stack, none-zero means unwinding stack, other wise not unwinding
44 */
45 __uint(type, BPF_MAP_TYPE_ARRAY);
46 __uint(key_size, sizeof(u32));
47 __uint(value_size, sizeof(u32));
48 __uint(max_entries, NR_CONFIG_VARIABLES);
49 } config_var_map SEC(".maps");
50
51 struct {
52 /*
53 * target_pid_map[0] != 0 means tracing all processes
54 */
55 __uint(type, BPF_MAP_TYPE_ARRAY);
56 __uint(key_size, sizeof(u32));
57 __uint(value_size, sizeof(u32));
58 __uint(max_entries, MAX_TARGET_PIDS + 1);
59 } target_pid_map SEC(".maps");
60 /**********************
61 *************************************************************************************/
62
63 /******************************** BPF maps BEGIN*************************************/
64 /*start event map*/
65 struct {
66 /* Since execution of syscalls of the same process never cross over,
67 * we can simply use pid as the identifier of the start of a syscall
68 */
69 __uint(type, BPF_MAP_TYPE_HASH);
70 __uint(key_size, sizeof(u64));
71 __uint(value_size, sizeof(struct start_event_t));
72 __uint(max_entries, MAX_START_EVENTS_NUM);
73 } start_event_map SEC(".maps");
74
75 /*pftrace stats map*/
76 struct {
77 __uint(type, BPF_MAP_TYPE_ARRAY);
78 __uint(key_size, sizeof(u32));
79 __uint(value_size, sizeof(struct pf_stat_t));
80 __uint(max_entries, PF_MAX_EVENT_TYPE);
81 } pftrace_stats_map SEC(".maps");
82
83 /*bpf ringbuffers*/
84 struct {
85 __uint(type, BPF_MAP_TYPE_RINGBUF);
86 __uint(max_entries, BPF_RINGBUF_SIZE);
87 } bpf_ringbuf_map SEC(".maps");
88
89 /********************************* BPF maps END *************************************/
90
91 /******************************** inline funcs BEGIN ********************************/
92 static __always_inline
unwind_stack()93 int unwind_stack()
94 {
95 u32 index = UNWIND_FLAG_INDEX;
96 const u32 *unwind_ptr = bpf_map_lookup_elem(&config_var_map, &index);
97 u32 unwind = 0;
98 int err = bpf_probe_read_kernel(&unwind, sizeof(u32), unwind_ptr);
99 if (err) {
100 BPFLOGW(BPF_TRUE, "failed to read unwind configuration");
101 }
102 return unwind;
103 }
104
105 static __always_inline
emit_fstrace_event(void * ctx,int64_t retval)106 int emit_fstrace_event(void* ctx, int64_t retval)
107 {
108 // get exit timestamp as soon as possible
109 u64 ctime = bpf_ktime_get_ns();
110 u64 pid_tgid = bpf_get_current_pid_tgid();
111
112 const u64 event_size = sizeof(struct fstrace_cmplt_event_t);
113 struct fstrace_cmplt_event_t *cmplt_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
114 if (cmplt_event == NULL) {
115 BPFLOGD(BPF_TRUE, "failed to reserve space for fstrace event from BPF ringbuffer");
116 return -1;
117 }
118 __builtin_memset(cmplt_event, 0, event_size);
119
120 const struct fstrace_start_event_t* start_event = bpf_map_lookup_elem(&start_event_map, &pid_tgid);
121 int err = bpf_probe_read_kernel(&cmplt_event->start_event, sizeof(struct fstrace_start_event_t), start_event);
122 if (err) {
123 BPFLOGD(BPF_TRUE, "fstrace event discarded: failed to read fstrace_start_event");
124 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
125 return -1;
126 }
127 if (cmplt_event->start_event.type == 0) {
128 BPFLOGI(BPF_TRUE, "fstrace event discarded: invalide fstrace start event");
129 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
130 return -1;
131 }
132
133 cmplt_event->ctime = ctime;
134 cmplt_event->tracer = FSTRACE;
135 cmplt_event->pid = (u32) pid_tgid;
136 cmplt_event->tgid = (u32) (pid_tgid >> SHIFT_32);
137 cmplt_event->uid = bpf_get_current_uid_gid();
138 cmplt_event->gid = (bpf_get_current_uid_gid() >> SHIFT_32);
139 cmplt_event->retval = retval;
140 err = bpf_get_current_comm(cmplt_event->comm, MAX_COMM_LEN);
141 if (err) {
142 BPFLOGD(BPF_TRUE, "fstrace event discarded: failed to get process command");
143 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
144 return -1;
145 }
146
147 // get user callchain
148 if (unwind_stack()) {
149 cmplt_event->nips =
150 bpf_get_stack(ctx, cmplt_event->ips, g_stack_limit * sizeof(u64), BPF_F_USER_STACK) / sizeof(u64);
151 }
152
153 // send out the complete event data to perf event buffer
154 bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
155 return 0;
156 }
157
158 static __always_inline
handle_pftrace_start_event(struct vm_fault * vmf,u32 type)159 int handle_pftrace_start_event(struct vm_fault *vmf, u32 type)
160 {
161 struct start_event_t start_event = {};
162 __builtin_memset(&start_event, 0, sizeof(start_event));
163 struct pftrace_start_event_t *pf_se = &start_event.pf_se;
164 pf_se->stime = bpf_ktime_get_ns();
165 pf_se->type = type;
166 int err = bpf_probe_read_kernel(&pf_se->addr, sizeof(vmf->address), &vmf->address);
167 if (err) {
168 BPFLOGW(BPF_TRUE, "failed read page fault address for pftrace start event");
169 // clear the start event to indicate error
170 __builtin_memset(&start_event, 0, sizeof(start_event));
171 }
172 u64 pid_tgid = bpf_get_current_pid_tgid();
173 err = (int) bpf_map_update_elem(&start_event_map, &pid_tgid, &start_event, BPF_ANY);
174 if (err < 0) {
175 // this should never happens
176 BPFLOGF(BPF_TRUE, "failed to update pftrace_start_event for pid_tgid = %lld", pid_tgid);
177 return -1;
178 }
179 return 0;
180 }
181
182 static __always_inline
read_modify_update_page_fault_stats(u32 type,u32 duration)183 int read_modify_update_page_fault_stats(u32 type, u32 duration)
184 {
185 const struct pf_stat_t *stat_ptr = bpf_map_lookup_elem(&pftrace_stats_map, &type);
186 if (stat_ptr == NULL) {
187 BPFLOGD(BPF_TRUE, "failed to lookup pftrace stat");
188 return -1;
189 }
190 struct pf_stat_t stat;
191 int err = (int) bpf_probe_read_kernel(&stat, sizeof(stat), stat_ptr);
192 if (err < 0) {
193 BPFLOGD(BPF_TRUE, "failed to read pftrace stat");
194 return -1;
195 }
196 u32 tot_duration = stat.tot_duration + duration;
197 u32 avg_duration = tot_duration / (stat.count + 1);
198 stat.dev_duration = (duration * duration + stat.count * stat.dev_duration
199 + stat.tot_duration * stat.avg_duration - tot_duration * avg_duration)
200 / (stat.count + 1);
201 ++stat.count;
202 stat.tot_duration = tot_duration;
203 stat.avg_duration = avg_duration;
204 if (duration < stat.min_duration) {
205 stat.min_duration = duration;
206 } else if (duration > stat.max_duration) {
207 stat.max_duration = duration;
208 }
209 err = (int) bpf_map_update_elem(&pftrace_stats_map, &type, &stat, BPF_ANY);
210 if (err < 0) {
211 BPFLOGD(BPF_TRUE, "failed to update pftrace stat");
212 return -1;
213 }
214
215 return 0;
216 }
217
218 static __always_inline
read_modify_update_current_type(u32 type)219 int read_modify_update_current_type(u32 type)
220 {
221 u64 pid_tgid = bpf_get_current_pid_tgid();
222 const struct pftrace_start_event_t *se_ptr = bpf_map_lookup_elem(&start_event_map, &pid_tgid);
223 struct pftrace_start_event_t start_event;
224 int err = bpf_probe_read_kernel(&start_event, sizeof(start_event), se_ptr);
225 if (err) {
226 BPFLOGW(BPF_TRUE, "failed to read pftrace start event to update event type");
227 return 0;
228 }
229 u32 old_type = start_event.type;
230 if (type) {
231 start_event.type = type;
232 }
233 return old_type;
234 }
235
236 static __always_inline
handle_return_value(struct pftrace_cmplt_event_t * cmplt_event,long long retval)237 int handle_return_value(struct pftrace_cmplt_event_t* cmplt_event, long long retval)
238 {
239 switch (read_modify_update_current_type(0)) {
240 case PF_PAGE_CACHE_HIT: {
241 struct file *fpin = (struct file *) retval;
242 if (fpin == NULL) {
243 cmplt_event->size = 0;
244 } else {
245 cmplt_event->size = 1;
246 }
247 break;
248 }
249 case PF_FILE_BACKED_IN:
250 case PF_SWAP_FROM_ZRAM:
251 case PF_SWAP_FROM_DISK:
252 case PF_ZERO_FILL_PAGE:
253 case PF_FAKE_ZERO_PAGE:
254 case PF_COPY_ON_WRITE: {
255 vm_fault_t vmf_flags = (vm_fault_t) retval;
256 if (vmf_flags & VM_FAULT_ERROR) {
257 cmplt_event->size = 0;
258 } else {
259 cmplt_event->size = 1;
260 }
261 break;
262 }
263 default: return -1;
264 }
265 return 0;
266 }
267
268 static __always_inline
emit_pftrace_event(void * ctx,int64_t retval)269 int emit_pftrace_event(void* ctx, int64_t retval)
270 {
271 // get timestamp as soon as possible
272 u64 ctime = bpf_ktime_get_ns();
273 u64 pid_tgid = bpf_get_current_pid_tgid();
274
275 const u64 event_size = sizeof(struct pftrace_cmplt_event_t);
276 struct pftrace_cmplt_event_t* cmplt_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
277 if (cmplt_event == NULL) {
278 BPFLOGD(BPF_TRUE, "failed to reserve space for pftrace event from BPF ringbuffer");
279 return -1;
280 }
281 __builtin_memset(cmplt_event, 0, event_size);
282
283 const struct pftrace_start_event_t *se_ptr = bpf_map_lookup_elem(&start_event_map, &pid_tgid);
284 int err = bpf_probe_read_kernel(&cmplt_event->start_event, sizeof(cmplt_event->start_event), se_ptr);
285 if (err) {
286 BPFLOGI(BPF_TRUE, "pftrace event discarded: failed to read pftrace start event");
287 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
288 return -1;
289 }
290 if (cmplt_event->start_event.type == 0) {
291 BPFLOGI(BPF_TRUE, "pftrace event discarded: invalide pftrace start event");
292 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
293 return -1;
294 }
295 cmplt_event->tracer = PFTRACE;
296 cmplt_event->ctime = ctime;
297 cmplt_event->pid = (u32) pid_tgid;
298 cmplt_event->tgid = (pid_tgid >> SHIFT_32);
299 cmplt_event->uid = bpf_get_current_uid_gid();
300 cmplt_event->gid = (bpf_get_current_uid_gid() >> SHIFT_32);
301 err = bpf_get_current_comm(cmplt_event->comm, MAX_COMM_LEN);
302 if (err < 0) {
303 BPFLOGD(BPF_TRUE, "pftrace event discarded: failed to get process command");
304 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
305 return -1;
306 }
307 err = handle_return_value(cmplt_event, retval);
308 if (err) {
309 BPFLOGW(BPF_TRUE, "pftrace event discarded: failed to handle pftrace return value");
310 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
311 return -1;
312 }
313
314 // get user callchain
315 if (unwind_stack()) {
316 cmplt_event->nips =
317 bpf_get_stack(ctx, cmplt_event->ips, g_stack_limit * sizeof(u64), BPF_F_USER_STACK) / sizeof(u64);
318 }
319
320 if (read_modify_update_page_fault_stats(cmplt_event->start_event.type,
321 cmplt_event->ctime - cmplt_event->start_event.stime) != 0) {
322 BPFLOGD(BPF_TRUE, "pftrace event discarded: failed to update pftrace stats");
323 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
324 return -1;
325 }
326
327 bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
328 return 0;
329 }
330
331 static __always_inline
emit_event(void * ctx,int64_t retval,u32 tracer)332 int emit_event(void* ctx, int64_t retval, u32 tracer)
333 {
334 switch (tracer) {
335 case FSTRACE: return emit_fstrace_event(ctx, retval);
336 case PFTRACE: return emit_pftrace_event(ctx, retval);
337 default:
338 BPFLOGD(BPF_TRUE, "unkonwn event source with id = %d", tracer);
339 return -1;
340 }
341 }
342
343 static __always_inline
is_target_process(const char * target_comm,const size_t comm_size)344 int is_target_process(const char* target_comm, const size_t comm_size)
345 {
346 char curr_comm[MAX_COMM_LEN] = "\0";
347 long retval = bpf_get_current_comm(curr_comm, sizeof(curr_comm));
348 if (retval == 0) {
349 size_t min_comm = comm_size < sizeof(curr_comm) ? comm_size : sizeof(curr_comm);
350 for (size_t j = 0; j <= min_comm; ++j) {
351 if (j == min_comm) {
352 char fmt[] = "current comm is %s\n";
353 bpf_trace_printk(fmt, sizeof(fmt), curr_comm);
354 return 0;
355 }
356 if (target_comm[j] != curr_comm[j]) {
357 break;
358 }
359 }
360 }
361 return -1;
362 }
363
364 // if both the pid and tgid values are - 1, check whether the event occurs in the current process
365 static __always_inline
check_current_pid(const int32_t pid,const int32_t tgid)366 int check_current_pid(const int32_t pid, const int32_t tgid)
367 {
368 u32 index = 0;
369 const int32_t* tracer_pid_ptr = bpf_map_lookup_elem(&config_var_map, &index);
370 int32_t tracer_pid = -1;
371 bpf_probe_read_kernel(&tracer_pid, sizeof(int32_t), tracer_pid_ptr);
372 int32_t curr_pid = pid;
373 if (curr_pid < 0) {
374 curr_pid = (int32_t)bpf_get_current_pid_tgid();
375 }
376 int32_t curr_tgid = tgid;
377 if (curr_tgid < 0) {
378 curr_tgid = (int32_t)(bpf_get_current_pid_tgid() >> SHIFT_32);
379 }
380 if (curr_pid == tracer_pid || curr_tgid == tracer_pid) {
381 // currrent process is not a target process
382 return -1;
383 }
384 const u32 trace_all_index = 0;
385 int32_t target_pids[MAX_TARGET_PIDS + 1];
386 __builtin_memset(target_pids, 0, sizeof(target_pids));
387 const int32_t *target_pids_ptr = bpf_map_lookup_elem(&target_pid_map, &trace_all_index);
388 if (target_pids_ptr == NULL) {
389 BPFLOGW(BPF_TRUE, "failed to lookup target pid map, will trace all processes");
390 return 0;
391 }
392 int err = bpf_probe_read_kernel(target_pids, sizeof(int32_t) * (MAX_TARGET_PIDS + 1), target_pids_ptr);
393 if (err) {
394 BPFLOGW(BPF_TRUE, "failed to read target pids, will trace all processes");
395 return 0;
396 }
397 if (target_pids[trace_all_index] != 0) {
398 return 0;
399 }
400 for (u32 index = 1; index != MAX_TARGET_PIDS; ++index) {
401 if (target_pids[index] < 0) {
402 break;
403 }
404 if (target_pids[index] == curr_pid || target_pids[index] == curr_tgid) {
405 return 0;
406 }
407 }
408 return -1;
409 }
410
411 static __always_inline
get_mountpoint_by_inode(char * filename,int len,const struct inode * host)412 size_t get_mountpoint_by_inode(char *filename, int len, const struct inode *host)
413 {
414 struct list_head *mountsHead = BPF_CORE_READ(host, i_sb, s_mounts.next);
415 struct mount *mnt = NULL;
416 mnt = list_entry(mountsHead, struct mount, mnt_instance);
417 const u32 MAX_MOUNT_POINT = 5;
418 size_t pos = 0;
419 for (u32 cnt = MAX_MOUNT_POINT; cnt != 0; --cnt) {
420 long name_len = 0;
421 const u8 *name = BPF_CORE_READ(mnt, mnt_mountpoint, d_name.name);
422 if (BPF_CORE_READ(mnt, mnt_mountpoint, d_name.len) <= 1) {
423 break;
424 }
425 name_len = bpf_probe_read_kernel_str(filename + pos, MAX_DENTRY_NAME_LEN, name);
426 if (name_len <= 1) {
427 BPFLOGD(BPF_TRUE, "failed to read dentry name from kernel stack buffer");
428 break;
429 }
430 pos += (size_t)name_len;
431 filename[pos - 1] = '/';
432 mnt = BPF_CORE_READ(mnt, mnt_parent);
433 }
434 return pos;
435 }
436
437 static __always_inline
get_filename_by_inode(char * filename,const size_t len,const struct inode * host)438 int get_filename_by_inode(char *filename, const size_t len, const struct inode *host)
439 {
440 int err = 0;
441 struct hlist_node *curr_hlist_node = BPF_CORE_READ(host, i_dentry.first);
442 if (curr_hlist_node == NULL) {
443 BPFLOGD(BPF_TRUE, "failed to get alias dentries of the inode");
444 return -1;
445 }
446
447 const struct dentry *curr_dentry = NULL;
448 const u32 MAX_ALIAS_DENTRY = 100;
449 for (u32 cnt = MAX_ALIAS_DENTRY; cnt != 0; --cnt) {
450 curr_dentry = container_of(curr_hlist_node, struct dentry, d_u);
451 struct inode *curr_inode = BPF_CORE_READ(curr_dentry, d_inode);
452 if (curr_inode == NULL) {
453 BPFLOGD(BPF_TRUE, "failed to get the current inode");
454 return -1;
455 }
456 if (curr_inode == host) {
457 break;
458 }
459 curr_hlist_node = BPF_CORE_READ(curr_hlist_node, next);
460 if (curr_hlist_node == NULL) {
461 BPFLOGD(BPF_TRUE, "failed to get the next hlist_node");
462 break;
463 }
464 }
465
466 unsigned int flags = BPF_CORE_READ(curr_dentry, d_flags);
467 flags = (flags & 0xFFFFFF) >> FILE_TYPE_BITS;
468 if (flags != DCACHE_DIRECTORY_TYPE && flags != DCACHE_REGULAR_TYPE) {
469 return 0;
470 }
471
472 size_t pos = 0;
473 const u32 MAX_BACKTRACE_DEPTH = 20;
474 for (u32 cnt = MAX_BACKTRACE_DEPTH; cnt != 0; --cnt) {
475 if (err || curr_dentry == NULL) {
476 break;
477 }
478 unsigned int name_len = BPF_CORE_READ(curr_dentry, d_name.len);
479 const u8 *name = BPF_CORE_READ(curr_dentry, d_name.name);
480 if (name_len <= 1) {
481 break;
482 }
483 long dentry_name_len = bpf_probe_read_kernel_str(filename + pos, MAX_DENTRY_NAME_LEN, name);
484 if (dentry_name_len <= 1) {
485 BPFLOGD(BPF_TRUE, "failed to read dentry name from kernel stack buffer");
486 break;
487 }
488 pos += (size_t)dentry_name_len;
489 size_t index = pos > 1 ? pos - 1 : 0;
490 filename[index] = '/';
491 struct dentry *temp_dentry = BPF_CORE_READ(curr_dentry, d_parent);
492 if (temp_dentry == curr_dentry || temp_dentry == NULL) {
493 break;
494 }
495 curr_dentry = temp_dentry;
496 }
497 pos += get_mountpoint_by_inode(filename + pos, len - pos, host);
498 return pos + 1;
499 }
500
501 static __always_inline
get_filename_by_bio(char * filename,const size_t len,const struct bio * bio)502 int get_filename_by_bio(char *filename, const size_t len, const struct bio *bio)
503 {
504 if (filename == NULL || len == 0 || bio == NULL) {
505 BPFLOGD(BPF_TRUE, "get_filename_by_bio() error: invalid argument");
506 return -1;
507 }
508 struct inode *host = BPF_CORE_READ(bio, bi_io_vec, bv_page, mapping, host);
509 if (host == NULL) {
510 BPFLOGD(BPF_TRUE, "failed to get the bio associated inode");
511 return -1;
512 }
513
514 return get_filename_by_inode(filename, len, host);
515 }
516
517 static __always_inline
get_file_by_fd(const struct files_struct * files,const unsigned int fd)518 struct file* get_file_by_fd(const struct files_struct *files, const unsigned int fd)
519 {
520 if (files == NULL) {
521 BPFLOGD(BPF_TRUE, "get_file_by_fd() error: invalid argument");
522 return NULL;
523 }
524 if (fd >= BPF_CORE_READ(files, fdt, max_fds)) {
525 BPFLOGD(BPF_TRUE, "get_file_by_fd() error: invalid argument");
526 return NULL;
527 }
528 struct file **fd_array = BPF_CORE_READ(files, fdt, fd);
529 if (fd_array == NULL) {
530 BPFLOGD(BPF_TRUE, "failed to get fd array");
531 return NULL;
532 }
533 struct file *filp = NULL;
534 int err = bpf_probe_read_kernel(&filp, sizeof(filp), &fd_array[fd]);
535 if (err || filp == NULL) {
536 BPFLOGD(BPF_TRUE, "failed to get file");
537 return NULL;
538 }
539 return filp;
540 }
541
542 static __always_inline
get_filename_by_file(char * filename,const size_t len,const struct file * filp)543 int get_filename_by_file(char *filename, const size_t len, const struct file *filp)
544 {
545 if (filename == NULL || filp == NULL || len == 0) {
546 BPFLOGD(BPF_TRUE, "get_filename_by_file() error: invalid argument");
547 return -1;
548 }
549 struct inode *f_inode = BPF_CORE_READ(filp, f_inode);
550 if (f_inode == NULL) {
551 BPFLOGD(BPF_TRUE, "failed to get inode");
552 return -1;
553 }
554 return get_filename_by_inode(filename, len, f_inode);
555 }
556
557 static __always_inline
emit_strtrace_event(u64 stime,u32 type,const void * addr,u32 stracer)558 int emit_strtrace_event(u64 stime, u32 type, const void *addr, u32 stracer)
559 {
560 if (addr == NULL) {
561 BPFLOGD(BPF_TRUE, "strtrace event discarded: invalid argument");
562 return -1;
563 }
564 const u64 event_size = sizeof(struct strtrace_cmplt_event_t);
565 struct strtrace_cmplt_event_t *cmplt_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
566 if (cmplt_event == NULL) {
567 BPFLOGD(BPF_TRUE, "failed to reserve space for strtrace event from BPF ringbuffer");
568 return -1;
569 }
570 __builtin_memset(cmplt_event, 0, event_size);
571 cmplt_event->tracer = STRTRACE;
572 cmplt_event->start_event.type = type;
573 cmplt_event->start_event.stracer = stracer;
574 cmplt_event->start_event.stime = stime;
575 cmplt_event->start_event.addr = addr;
576 cmplt_event->pid = bpf_get_current_pid_tgid();
577 cmplt_event->tgid = (bpf_get_current_pid_tgid() >> SHIFT_32);
578 int err = 0;
579 switch (stracer) {
580 case BIOTRACE: {
581 err = get_filename_by_bio(cmplt_event->filename, MAX_FILENAME_LEN, addr);
582 break;
583 }
584 case FSTRACE: {
585 if (type == SYS_OPENAT2) {
586 err = bpf_probe_read_user_str(cmplt_event->filename, MAX_FILENAME_LEN, addr);
587 break;
588 }
589 if (type == SYS_CLOSE) {
590 const struct sys_close_args_t *args = (const struct sys_close_args_t *) addr;
591 const struct file *filp = get_file_by_fd(args->files, args->fd);
592 err = get_filename_by_file(cmplt_event->filename, MAX_FILENAME_LEN, filp);
593 break;
594 }
595 BPFLOGD(BPF_TRUE, "strtrace event discarded: bad source event type = %d of fstrace", type);
596 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
597 return -1;
598 }
599 default: {
600 BPFLOGD(BPF_TRUE, "strtrace event discarded: bad strtrace source tracer = %d", stracer);
601 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
602 return -1;
603 }
604 }
605
606 if (err <= 0) {
607 BPFLOGD(BPF_TRUE, "strtrace event discarded: failed to read path for tracer = %d", stracer);
608 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
609 return -1;
610 }
611 cmplt_event->len = (__u32)err;
612 bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
613 return 0;
614 }
615
616 static __always_inline
get_biotrace_event_type_by_flags(unsigned int cmd_flags)617 u32 get_biotrace_event_type_by_flags(unsigned int cmd_flags)
618 {
619 if (cmd_flags & REQ_META) {
620 if ((cmd_flags & REQ_OP_MASK) == REQ_OP_READ) {
621 return BIO_METADATA_READ;
622 }
623 if ((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE) {
624 return BIO_METADATA_WRITE;
625 }
626 return 0;
627 }
628 if (cmd_flags & REQ_SWAP) {
629 if ((cmd_flags & REQ_OP_MASK) == REQ_OP_READ) {
630 return BIO_PAGE_IN;
631 }
632 if ((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE) {
633 return BIO_PAGE_OUT;
634 }
635 return 0;
636 }
637 if ((cmd_flags & REQ_OP_MASK) == REQ_OP_READ) {
638 return BIO_DATA_READ;
639 }
640 if ((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE) {
641 return BIO_DATA_WRITE;
642 }
643 return 0;
644 }
645 /***************************** inline funcs END **********************************/
646
647 /***************************** pftrace BPF progs BEGING *****************************/
648 SEC("kprobe/__do_fault")
BPF_KPROBE(__do_fault_entry,struct vm_fault * vmf)649 int BPF_KPROBE(__do_fault_entry, struct vm_fault *vmf)
650 {
651 if (check_current_pid(-1, -1) != 0) {
652 // not any one of target processes, skip it
653 return 0;
654 }
655 return handle_pftrace_start_event(vmf, (u32) PF_FILE_BACKED_IN);
656 }
657
658 SEC("kretprobe/__do_fault")
BPF_KRETPROBE(__do_fault_exit,int64_t vmf_flags)659 int BPF_KRETPROBE(__do_fault_exit, int64_t vmf_flags)
660 {
661 if (check_current_pid(-1, -1) != 0) {
662 // not any one of target processes, skip it
663 return 0;
664 }
665 return emit_event(ctx, vmf_flags, PFTRACE);
666 }
667
668 SEC("kprobe/do_swap_page")
BPF_KPROBE(do_swap_page_entry,struct vm_fault * vmf)669 int BPF_KPROBE(do_swap_page_entry, struct vm_fault *vmf)
670 {
671 if (check_current_pid(-1, -1) != 0) {
672 // not any one of target processes, skip it
673 return 0;
674 }
675 return handle_pftrace_start_event(vmf, (u32) PF_SWAP_FROM_DISK);
676 }
677
678 SEC("kretprobe/do_swap_page")
BPF_KRETPROBE(do_swap_page_exit,int64_t vmf_flags)679 int BPF_KRETPROBE(do_swap_page_exit, int64_t vmf_flags)
680 {
681 if (check_current_pid(-1, -1) != 0) {
682 // not any one of target processes, skip it
683 return 0;
684 }
685 return emit_event(ctx, vmf_flags, PFTRACE);
686 }
687
688 SEC("kprobe/do_wp_page")
BPF_KPROBE(do_wp_page_entry,struct vm_fault * vmf)689 int BPF_KPROBE(do_wp_page_entry, struct vm_fault *vmf)
690 {
691 if (check_current_pid(-1, -1) != 0) {
692 // not any one of target processes, skip it
693 return 0;
694 }
695 return handle_pftrace_start_event(vmf, (u32) PF_COPY_ON_WRITE);
696 }
697
698 SEC("kretprobe/do_wp_page")
BPF_KRETPROBE(do_wp_page_exit,int64_t vmf_flags)699 int BPF_KRETPROBE(do_wp_page_exit, int64_t vmf_flags)
700 {
701 if (check_current_pid(-1, -1) != 0) {
702 // not any one of target processes, skip it
703 return 0;
704 }
705 return emit_event(ctx, vmf_flags, PFTRACE);
706 }
707 /*************************** pftrace BPF progs END *******************************/
708
709 /**************************** bio BPF progs BEGING *******************************/
710 static __always_inline
handle_blk_issue(struct request * rq)711 int handle_blk_issue(struct request *rq)
712 {
713 if (rq == NULL) {
714 BPFLOGD(BPF_TRUE, "request is NULL");
715 return 0;
716 }
717 const u64 start_event_map_key = (const u64)rq;
718 struct start_event_t start_event = {};
719 __builtin_memset(&start_event, 0, sizeof(start_event));
720 struct biotrace_start_event_t *bio_se = &start_event.bio_se;
721 bio_se->stime = bpf_ktime_get_ns();
722 bio_se->type = get_biotrace_event_type_by_flags(BPF_CORE_READ(rq, cmd_flags));
723 if (bio_se->type == 0) {
724 BPFLOGD(BPF_TRUE, "failed to get biotrace event type");
725 return 0;
726 }
727 emit_strtrace_event(bio_se->stime, bio_se->type, BPF_CORE_READ(rq, bio), BIOTRACE);
728 u64 tgid_pid = bpf_get_current_pid_tgid();
729 bio_se->pid = (u32) tgid_pid;
730 bio_se->tgid = (u32) (tgid_pid >> SHIFT_32);
731 bpf_get_current_comm(bio_se->comm, MAX_COMM_LEN);
732 bio_se->size = BPF_CORE_READ(rq, bio, bi_iter.bi_size);
733 bpf_map_update_elem(&start_event_map, &start_event_map_key, &start_event, BPF_ANY);
734 return 0;
735 }
736
737 SEC("tp_btf/block_rq_issue")
block_issue(u64 * ctx)738 int block_issue(u64 *ctx)
739 {
740 if (check_current_pid(-1, -1) != 0) {
741 return 0;
742 }
743 if (LINUX_KERNEL_VERSION < KERNEL_VERSION(5, 10, 137)) {
744 return handle_blk_issue((void *)ctx[1]);
745 } else {
746 return handle_blk_issue((void *)ctx[0]);
747 }
748 }
749
750 SEC("kprobe/blk_update_request")
BPF_PROG(blk_update_request,struct request * rq)751 int BPF_PROG(blk_update_request, struct request *rq)
752 {
753 if (check_current_pid(-1, -1) != 0) {
754 return 0;
755 }
756 u64 ctime = bpf_ktime_get_ns();
757 const u64 event_size = sizeof(struct biotrace_cmplt_event_t);
758 struct biotrace_cmplt_event_t *cmplt_event= bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
759 if (cmplt_event == NULL) {
760 BPFLOGD(BPF_TRUE, "failed to reserve space for biotrace event from BPF ringbuffer");
761 return 0;
762 }
763 __builtin_memset(cmplt_event, 0, event_size);
764 const u64 start_event_map_key = (const u64)rq;
765 const struct biotrace_start_event_t *start_event = bpf_map_lookup_elem(&start_event_map, &start_event_map_key);
766 if (start_event == NULL) {
767 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
768 return 0;
769 }
770 cmplt_event->start_event = *start_event;
771 if (cmplt_event->start_event.type == 0) {
772 BPFLOGI(BPF_TRUE, "biotrace event discarded: invalide biotrace start event");
773 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
774 return 0;
775 }
776 cmplt_event->tracer = BIOTRACE;
777 cmplt_event->ctime = ctime;
778 cmplt_event->prio = BPF_CORE_READ(rq, bio, bi_ioprio);
779 cmplt_event->blkcnt = BPF_CORE_READ(rq, bio, bi_iter.bi_sector);
780 if (unwind_stack()) {
781 cmplt_event->nips = bpf_get_stack(ctx, cmplt_event->ips, g_stack_limit * sizeof(u64), 0) / sizeof(u64);
782 }
783 bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
784 return 0;
785 }
786 /*************************** bio BPF progs END *******************************/
787
788 /*************************** user BPF progs START ****************************/
789 SEC("uretprobe//system/lib/ld-musl-aarch64.so.1:dlopen")
BPF_KRETPROBE(uretprobe_dlopen,void * ret)790 int BPF_KRETPROBE(uretprobe_dlopen, void *ret)
791 {
792 if (check_current_pid(-1, -1) != 0) {
793 return 0;
794 }
795 if (ret == NULL) {
796 return 0;
797 }
798
799 const u64 event_size = sizeof(struct dlopen_trace_start_event_t);
800 struct dlopen_trace_start_event_t *start_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
801 if (start_event == NULL) {
802 BPFLOGD(BPF_TRUE, "failed to reserve space for biotrace event from BPF ringbuffer");
803 return -1;
804 }
805 start_event->type = DLOPEN_TRACE;
806 u64 tgid_pid = bpf_get_current_pid_tgid();
807 start_event->tgid = (u32)(tgid_pid >> SHIFT_32);
808 bpf_ringbuf_submit(start_event, BPF_RB_FORCE_WAKEUP);
809 return 0;
810 }
811 /*************************** user BPF progs END ****************************/
812 #include "fstrace_progs.h"
813
814 char _license[] SEC("license") = "GPL";
815 u32 _version SEC("version") = LINUX_VERSION_CODE;
816