• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd. All rights reserved.
3 
4 * The hiebpf.bpf.c is dual licensed: you can use it either under the terms of
5 * the GPL V2, or the 3-Clause BSD license, at your option.
6 * See the LICENSE file in the root of this repository for complete details.
7 */
8 
9 #include <linux/version.h>
10 #include "vmlinux.h"
11 #include "bpf_helpers.h"
12 #include "bpf_tracing.h"
13 #include "bpf_core_read.h"
14 #include "hiebpf_types.h"
15 #include "bpf_log_writer.h"
16 
17 #ifndef VM_FAULT_ERROR
18 #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS |    \
19             VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON |    \
20             VM_FAULT_HWPOISON_LARGE | VM_FAULT_FALLBACK)
21 #endif
22 
23 extern int LINUX_KERNEL_VERSION __kconfig;
24 const volatile unsigned int g_stack_limit = 0;
25 static const int SHIFT_32 = 32;
26 
27 // global configuration data
28 // const volatile int tracer_pid = -1;
29 // const volatile int bpf_log_level = BPF_LOG_DEBUG;
30 // const volatile int max_stack_depth = MAX_STACK_LIMIT;
31 // const volatile int target_pids[MAX_TARGET_PIDS];
32 
33 /*********************************************************************************
34  * use BPF map to store global configuration data instead if global variables
35  * are not available
36  ********************/
37 struct {
38     /*
39      * config_var_map[0] = tracer_pid, tracer_pid != -1 means not tracing the tracer itself
40      * config_var_map[1] = bpf_log_level
41      * config_var_map[2] = max_stack_limit
42      * config_var_map[3] = max_stack_depth,
43      * config_var_map[4] = unwind_stack, none-zero means unwinding stack, other wise not unwinding
44     */
45     __uint(type, BPF_MAP_TYPE_ARRAY);
46     __uint(key_size, sizeof(u32));
47     __uint(value_size, sizeof(u32));
48     __uint(max_entries, NR_CONFIG_VARIABLES);
49 } config_var_map SEC(".maps");
50 
51 struct {
52     /*
53      * target_pid_map[0] != 0 means tracing all processes
54      */
55     __uint(type, BPF_MAP_TYPE_ARRAY);
56     __uint(key_size, sizeof(u32));
57     __uint(value_size, sizeof(u32));
58     __uint(max_entries, MAX_TARGET_PIDS + 1);
59 } target_pid_map SEC(".maps");
60 /**********************
61 *************************************************************************************/
62 
63 /******************************** BPF maps BEGIN*************************************/
64 /*start event map*/
65 struct {
66     /* Since execution of syscalls of the same process never cross over,
67      * we can simply use pid as the identifier of the start of a syscall
68      */
69     __uint(type, BPF_MAP_TYPE_HASH);
70     __uint(key_size, sizeof(u64));
71     __uint(value_size, sizeof(struct start_event_t));
72     __uint(max_entries, MAX_START_EVENTS_NUM);
73 } start_event_map SEC(".maps");
74 
75 /*pftrace stats map*/
76 struct {
77     __uint(type, BPF_MAP_TYPE_ARRAY);
78     __uint(key_size, sizeof(u32));
79     __uint(value_size, sizeof(struct pf_stat_t));
80     __uint(max_entries, PF_MAX_EVENT_TYPE);
81 } pftrace_stats_map SEC(".maps");
82 
83 /*bpf ringbuffers*/
84 struct {
85     __uint(type, BPF_MAP_TYPE_RINGBUF);
86     __uint(max_entries, BPF_RINGBUF_SIZE);
87 } bpf_ringbuf_map SEC(".maps");
88 
89 /********************************* BPF maps END *************************************/
90 
91 /******************************** inline funcs BEGIN ********************************/
92 static __always_inline
unwind_stack()93 int unwind_stack()
94 {
95     u32 index = UNWIND_FLAG_INDEX;
96     const u32 *unwind_ptr = bpf_map_lookup_elem(&config_var_map, &index);
97     u32 unwind = 0;
98     int err = bpf_probe_read_kernel(&unwind, sizeof(u32), unwind_ptr);
99     if (err) {
100         BPFLOGW(BPF_TRUE, "failed to read unwind configuration");
101     }
102     return unwind;
103 }
104 
105 static __always_inline
emit_fstrace_event(void * ctx,int64_t retval)106 int emit_fstrace_event(void* ctx, int64_t retval)
107 {
108     // get exit timestamp as soon as possible
109     u64 ctime = bpf_ktime_get_ns();
110     u64 pid_tgid = bpf_get_current_pid_tgid();
111 
112     const u64 event_size = sizeof(struct fstrace_cmplt_event_t);
113     struct fstrace_cmplt_event_t *cmplt_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
114     if (cmplt_event == NULL) {
115         BPFLOGD(BPF_TRUE, "failed to reserve space for fstrace event from BPF ringbuffer");
116         return -1;
117     }
118     __builtin_memset(cmplt_event, 0, event_size);
119 
120     const struct fstrace_start_event_t* start_event = bpf_map_lookup_elem(&start_event_map, &pid_tgid);
121     int err = bpf_probe_read_kernel(&cmplt_event->start_event, sizeof(struct fstrace_start_event_t), start_event);
122     if (err) {
123         BPFLOGD(BPF_TRUE, "fstrace event discarded: failed to read fstrace_start_event");
124         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
125         return -1;
126     }
127     if (cmplt_event->start_event.type == 0) {
128         BPFLOGI(BPF_TRUE, "fstrace event discarded: invalide fstrace start event");
129         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
130         return -1;
131     }
132 
133     cmplt_event->ctime = ctime;
134     cmplt_event->tracer = FSTRACE;
135     cmplt_event->pid = (u32) pid_tgid;
136     cmplt_event->tgid = (u32) (pid_tgid >> SHIFT_32);
137     cmplt_event->uid = bpf_get_current_uid_gid();
138     cmplt_event->gid = (bpf_get_current_uid_gid() >> SHIFT_32);
139     cmplt_event->retval = retval;
140     err = bpf_get_current_comm(cmplt_event->comm, MAX_COMM_LEN);
141     if (err) {
142         BPFLOGD(BPF_TRUE, "fstrace event discarded: failed to get process command");
143         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
144         return -1;
145     }
146 
147     // get user callchain
148     if (unwind_stack()) {
149         cmplt_event->nips =
150             bpf_get_stack(ctx, cmplt_event->ips, g_stack_limit * sizeof(u64), BPF_F_USER_STACK) / sizeof(u64);
151     }
152 
153     // send out the complete event data to perf event buffer
154     bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
155     return 0;
156 }
157 
158 static __always_inline
handle_pftrace_start_event(struct vm_fault * vmf,u32 type)159 int handle_pftrace_start_event(struct vm_fault *vmf, u32 type)
160 {
161     struct start_event_t start_event = {};
162     __builtin_memset(&start_event, 0, sizeof(start_event));
163     struct pftrace_start_event_t *pf_se = &start_event.pf_se;
164     pf_se->stime = bpf_ktime_get_ns();
165     pf_se->type = type;
166     int err = bpf_probe_read_kernel(&pf_se->addr, sizeof(vmf->address), &vmf->address);
167     if (err) {
168         BPFLOGW(BPF_TRUE, "failed read page fault address for pftrace start event");
169         // clear the start event to indicate error
170         __builtin_memset(&start_event, 0, sizeof(start_event));
171     }
172     u64 pid_tgid = bpf_get_current_pid_tgid();
173     err = (int) bpf_map_update_elem(&start_event_map, &pid_tgid, &start_event, BPF_ANY);
174     if (err < 0) {
175         // this should never happens
176         BPFLOGF(BPF_TRUE, "failed to update pftrace_start_event for pid_tgid = %lld", pid_tgid);
177         return -1;
178     }
179     return 0;
180 }
181 
182 static __always_inline
read_modify_update_page_fault_stats(u32 type,u32 duration)183 int read_modify_update_page_fault_stats(u32 type, u32 duration)
184 {
185     const struct pf_stat_t *stat_ptr = bpf_map_lookup_elem(&pftrace_stats_map, &type);
186     if (stat_ptr == NULL) {
187         BPFLOGD(BPF_TRUE, "failed to lookup pftrace stat");
188         return -1;
189     }
190     struct pf_stat_t stat;
191     int err = (int) bpf_probe_read_kernel(&stat, sizeof(stat), stat_ptr);
192     if (err < 0) {
193         BPFLOGD(BPF_TRUE, "failed to read pftrace stat");
194         return -1;
195     }
196     u32 tot_duration = stat.tot_duration + duration;
197     u32 avg_duration = tot_duration / (stat.count + 1);
198     stat.dev_duration = (duration * duration + stat.count * stat.dev_duration
199                         + stat.tot_duration * stat.avg_duration - tot_duration * avg_duration)
200                         / (stat.count + 1);
201     ++stat.count;
202     stat.tot_duration = tot_duration;
203     stat.avg_duration = avg_duration;
204     if (duration < stat.min_duration) {
205         stat.min_duration = duration;
206     } else if (duration > stat.max_duration) {
207         stat.max_duration = duration;
208     }
209     err = (int) bpf_map_update_elem(&pftrace_stats_map, &type, &stat, BPF_ANY);
210     if (err < 0) {
211         BPFLOGD(BPF_TRUE, "failed to update pftrace stat");
212         return -1;
213     }
214 
215     return 0;
216 }
217 
218 static __always_inline
read_modify_update_current_type(u32 type)219 int read_modify_update_current_type(u32 type)
220 {
221     u64 pid_tgid = bpf_get_current_pid_tgid();
222     const struct pftrace_start_event_t *se_ptr = bpf_map_lookup_elem(&start_event_map, &pid_tgid);
223     struct pftrace_start_event_t start_event;
224     int err = bpf_probe_read_kernel(&start_event, sizeof(start_event), se_ptr);
225     if (err) {
226         BPFLOGW(BPF_TRUE, "failed to read pftrace start event to update event type");
227         return 0;
228     }
229     u32 old_type = start_event.type;
230     if (type) {
231         start_event.type = type;
232     }
233     return old_type;
234 }
235 
236 static __always_inline
handle_return_value(struct pftrace_cmplt_event_t * cmplt_event,long long retval)237 int handle_return_value(struct pftrace_cmplt_event_t* cmplt_event, long long retval)
238 {
239     switch (read_modify_update_current_type(0)) {
240         case PF_PAGE_CACHE_HIT: {
241             struct file *fpin = (struct file *) retval;
242             if (fpin == NULL) {
243                 cmplt_event->size = 0;
244             } else {
245                 cmplt_event->size = 1;
246             }
247             break;
248         }
249         case PF_FILE_BACKED_IN:
250         case PF_SWAP_FROM_ZRAM:
251         case PF_SWAP_FROM_DISK:
252         case PF_ZERO_FILL_PAGE:
253         case PF_FAKE_ZERO_PAGE:
254         case PF_COPY_ON_WRITE: {
255             vm_fault_t vmf_flags = (vm_fault_t) retval;
256             if (vmf_flags & VM_FAULT_ERROR) {
257                 cmplt_event->size = 0;
258             } else {
259                 cmplt_event->size = 1;
260             }
261             break;
262         }
263         default: return -1;
264     }
265     return 0;
266 }
267 
268 static __always_inline
emit_pftrace_event(void * ctx,int64_t retval)269 int emit_pftrace_event(void* ctx, int64_t retval)
270 {
271     // get timestamp as soon as possible
272     u64 ctime =  bpf_ktime_get_ns();
273     u64 pid_tgid = bpf_get_current_pid_tgid();
274 
275     const u64 event_size = sizeof(struct pftrace_cmplt_event_t);
276     struct pftrace_cmplt_event_t* cmplt_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
277     if (cmplt_event == NULL) {
278         BPFLOGD(BPF_TRUE, "failed to reserve space for pftrace event from BPF ringbuffer");
279         return -1;
280     }
281     __builtin_memset(cmplt_event, 0, event_size);
282 
283     const struct pftrace_start_event_t *se_ptr = bpf_map_lookup_elem(&start_event_map, &pid_tgid);
284     int err = bpf_probe_read_kernel(&cmplt_event->start_event, sizeof(cmplt_event->start_event), se_ptr);
285     if (err) {
286         BPFLOGI(BPF_TRUE, "pftrace event discarded: failed to read pftrace start event");
287         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
288         return -1;
289     }
290     if (cmplt_event->start_event.type == 0) {
291         BPFLOGI(BPF_TRUE, "pftrace event discarded: invalide pftrace start event");
292         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
293         return -1;
294     }
295     cmplt_event->tracer = PFTRACE;
296     cmplt_event->ctime = ctime;
297     cmplt_event->pid = (u32) pid_tgid;
298     cmplt_event->tgid = (pid_tgid >> SHIFT_32);
299     cmplt_event->uid = bpf_get_current_uid_gid();
300     cmplt_event->gid = (bpf_get_current_uid_gid() >> SHIFT_32);
301     err = bpf_get_current_comm(cmplt_event->comm, MAX_COMM_LEN);
302     if (err < 0) {
303         BPFLOGD(BPF_TRUE, "pftrace event discarded: failed to get process command");
304         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
305         return -1;
306     }
307     err = handle_return_value(cmplt_event, retval);
308     if (err) {
309         BPFLOGW(BPF_TRUE, "pftrace event discarded: failed to handle pftrace return value");
310         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
311         return -1;
312     }
313 
314     // get user callchain
315     if (unwind_stack()) {
316         cmplt_event->nips =
317             bpf_get_stack(ctx, cmplt_event->ips, g_stack_limit * sizeof(u64), BPF_F_USER_STACK) / sizeof(u64);
318     }
319 
320     if (read_modify_update_page_fault_stats(cmplt_event->start_event.type,
321                                             cmplt_event->ctime - cmplt_event->start_event.stime) != 0) {
322         BPFLOGD(BPF_TRUE, "pftrace event discarded: failed to update pftrace stats");
323         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
324         return -1;
325     }
326 
327     bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
328     return 0;
329 }
330 
331 static __always_inline
emit_event(void * ctx,int64_t retval,u32 tracer)332 int emit_event(void* ctx, int64_t retval, u32 tracer)
333 {
334     switch (tracer) {
335         case FSTRACE:   return emit_fstrace_event(ctx, retval);
336         case PFTRACE:   return emit_pftrace_event(ctx, retval);
337         default:
338             BPFLOGD(BPF_TRUE, "unkonwn event source with id = %d", tracer);
339             return -1;
340     }
341 }
342 
343 static __always_inline
is_target_process(const char * target_comm,const size_t comm_size)344 int is_target_process(const char* target_comm, const size_t comm_size)
345 {
346     char curr_comm[MAX_COMM_LEN] = "\0";
347     long retval = bpf_get_current_comm(curr_comm, sizeof(curr_comm));
348     if (retval == 0) {
349         size_t min_comm = comm_size < sizeof(curr_comm) ? comm_size : sizeof(curr_comm);
350         for (size_t j = 0; j <= min_comm; ++j) {
351             if (j == min_comm) {
352                 char fmt[] = "current comm is %s\n";
353                 bpf_trace_printk(fmt, sizeof(fmt), curr_comm);
354                 return 0;
355             }
356             if (target_comm[j] != curr_comm[j]) {
357                 break;
358             }
359         }
360     }
361     return -1;
362 }
363 
364 // if both the pid and tgid values are - 1, check whether the event occurs in the current process
365 static __always_inline
check_current_pid(const int32_t pid,const int32_t tgid)366 int check_current_pid(const int32_t pid, const int32_t tgid)
367 {
368     u32 index = 0;
369     const int32_t* tracer_pid_ptr = bpf_map_lookup_elem(&config_var_map, &index);
370     int32_t tracer_pid = -1;
371     bpf_probe_read_kernel(&tracer_pid, sizeof(int32_t), tracer_pid_ptr);
372     int32_t curr_pid = pid;
373     if (curr_pid < 0) {
374         curr_pid = (int32_t)bpf_get_current_pid_tgid();
375     }
376     int32_t curr_tgid = tgid;
377     if (curr_tgid < 0) {
378         curr_tgid = (int32_t)(bpf_get_current_pid_tgid() >> SHIFT_32);
379     }
380     if (curr_pid == tracer_pid || curr_tgid == tracer_pid) {
381         // currrent process is not a target process
382         return -1;
383     }
384     const u32 trace_all_index = 0;
385     int32_t target_pids[MAX_TARGET_PIDS + 1];
386     __builtin_memset(target_pids, 0, sizeof(target_pids));
387     const int32_t *target_pids_ptr = bpf_map_lookup_elem(&target_pid_map, &trace_all_index);
388     if (target_pids_ptr == NULL) {
389         BPFLOGW(BPF_TRUE, "failed to lookup target pid map, will trace all processes");
390         return 0;
391     }
392     int err = bpf_probe_read_kernel(target_pids, sizeof(int32_t) * (MAX_TARGET_PIDS + 1), target_pids_ptr);
393     if (err) {
394         BPFLOGW(BPF_TRUE, "failed to read target pids, will trace all processes");
395         return 0;
396     }
397     if (target_pids[trace_all_index] != 0) {
398         return 0;
399     }
400     for (u32 index = 1; index != MAX_TARGET_PIDS; ++index) {
401         if (target_pids[index] < 0) {
402             break;
403         }
404         if (target_pids[index] == curr_pid || target_pids[index] == curr_tgid) {
405             return 0;
406         }
407     }
408     return -1;
409 }
410 
411 static __always_inline
get_mountpoint_by_inode(char * filename,int len,const struct inode * host)412 size_t get_mountpoint_by_inode(char *filename, int len, const struct inode *host)
413 {
414     struct list_head *mountsHead = BPF_CORE_READ(host, i_sb, s_mounts.next);
415     struct mount *mnt = NULL;
416     mnt = list_entry(mountsHead, struct mount, mnt_instance);
417     const u32 MAX_MOUNT_POINT = 5;
418     size_t pos = 0;
419     for (u32 cnt = MAX_MOUNT_POINT; cnt != 0; --cnt) {
420         long name_len = 0;
421         const u8 *name = BPF_CORE_READ(mnt, mnt_mountpoint, d_name.name);
422         if (BPF_CORE_READ(mnt, mnt_mountpoint, d_name.len) <= 1) {
423             break;
424         }
425         name_len = bpf_probe_read_kernel_str(filename + pos, MAX_DENTRY_NAME_LEN, name);
426         if (name_len <= 1) {
427             BPFLOGD(BPF_TRUE, "failed to read dentry name from kernel stack buffer");
428             break;
429         }
430         pos += (size_t)name_len;
431         filename[pos - 1] = '/';
432         mnt = BPF_CORE_READ(mnt, mnt_parent);
433     }
434     return pos;
435 }
436 
437 static __always_inline
get_filename_by_inode(char * filename,const size_t len,const struct inode * host)438 int get_filename_by_inode(char *filename, const size_t len, const struct inode *host)
439 {
440     int err = 0;
441     struct hlist_node *curr_hlist_node = BPF_CORE_READ(host, i_dentry.first);
442     if (curr_hlist_node == NULL) {
443         BPFLOGD(BPF_TRUE, "failed to get alias dentries of the inode");
444         return -1;
445     }
446 
447     const struct dentry *curr_dentry = NULL;
448     const u32 MAX_ALIAS_DENTRY = 100;
449     for (u32 cnt = MAX_ALIAS_DENTRY; cnt != 0; --cnt) {
450         curr_dentry = container_of(curr_hlist_node, struct dentry, d_u);
451         struct inode *curr_inode = BPF_CORE_READ(curr_dentry, d_inode);
452         if (curr_inode == NULL) {
453             BPFLOGD(BPF_TRUE, "failed to get the current inode");
454             return -1;
455         }
456         if (curr_inode == host) {
457             break;
458         }
459         curr_hlist_node = BPF_CORE_READ(curr_hlist_node, next);
460         if (curr_hlist_node == NULL) {
461             BPFLOGD(BPF_TRUE, "failed to get the next hlist_node");
462             break;
463         }
464     }
465 
466     unsigned int flags = BPF_CORE_READ(curr_dentry, d_flags);
467     flags = (flags & 0xFFFFFF) >> FILE_TYPE_BITS;
468     if (flags != DCACHE_DIRECTORY_TYPE && flags != DCACHE_REGULAR_TYPE) {
469         return 0;
470     }
471 
472     size_t pos = 0;
473     const u32 MAX_BACKTRACE_DEPTH = 20;
474     for (u32 cnt = MAX_BACKTRACE_DEPTH; cnt != 0; --cnt) {
475         if (err || curr_dentry == NULL) {
476             break;
477         }
478         unsigned int name_len = BPF_CORE_READ(curr_dentry, d_name.len);
479         const u8 *name = BPF_CORE_READ(curr_dentry, d_name.name);
480         if (name_len <= 1) {
481             break;
482         }
483         long dentry_name_len = bpf_probe_read_kernel_str(filename + pos, MAX_DENTRY_NAME_LEN, name);
484         if (dentry_name_len <= 1) {
485             BPFLOGD(BPF_TRUE, "failed to read dentry name from kernel stack buffer");
486             break;
487         }
488         pos += (size_t)dentry_name_len;
489         size_t index = pos > 1 ? pos - 1 : 0;
490         filename[index] = '/';
491         struct dentry *temp_dentry = BPF_CORE_READ(curr_dentry, d_parent);
492         if (temp_dentry == curr_dentry || temp_dentry == NULL) {
493             break;
494         }
495         curr_dentry = temp_dentry;
496     }
497     pos += get_mountpoint_by_inode(filename + pos, len - pos, host);
498     return pos + 1;
499 }
500 
501 static __always_inline
get_filename_by_bio(char * filename,const size_t len,const struct bio * bio)502 int get_filename_by_bio(char *filename, const size_t len, const struct bio *bio)
503 {
504     if (filename == NULL || len == 0 || bio == NULL) {
505         BPFLOGD(BPF_TRUE, "get_filename_by_bio() error: invalid argument");
506         return -1;
507     }
508     struct inode *host = BPF_CORE_READ(bio, bi_io_vec, bv_page, mapping, host);
509     if (host == NULL) {
510         BPFLOGD(BPF_TRUE, "failed to get the bio associated inode");
511         return -1;
512     }
513 
514     return get_filename_by_inode(filename, len, host);
515 }
516 
517 static __always_inline
get_file_by_fd(const struct files_struct * files,const unsigned int fd)518 struct file* get_file_by_fd(const struct files_struct *files, const unsigned int fd)
519 {
520     if (files == NULL) {
521         BPFLOGD(BPF_TRUE, "get_file_by_fd() error: invalid argument");
522         return NULL;
523     }
524     if (fd >= BPF_CORE_READ(files, fdt, max_fds)) {
525         BPFLOGD(BPF_TRUE, "get_file_by_fd() error: invalid argument");
526         return NULL;
527     }
528     struct file **fd_array = BPF_CORE_READ(files, fdt, fd);
529     if (fd_array == NULL) {
530         BPFLOGD(BPF_TRUE, "failed to get fd array");
531         return NULL;
532     }
533     struct file *filp = NULL;
534     int err = bpf_probe_read_kernel(&filp, sizeof(filp), &fd_array[fd]);
535     if (err || filp == NULL) {
536         BPFLOGD(BPF_TRUE, "failed to get file");
537         return NULL;
538     }
539     return filp;
540 }
541 
542 static __always_inline
get_filename_by_file(char * filename,const size_t len,const struct file * filp)543 int get_filename_by_file(char *filename, const size_t len, const struct file *filp)
544 {
545     if (filename == NULL || filp == NULL || len == 0) {
546         BPFLOGD(BPF_TRUE, "get_filename_by_file() error: invalid argument");
547         return -1;
548     }
549     struct inode *f_inode = BPF_CORE_READ(filp, f_inode);
550     if (f_inode == NULL) {
551         BPFLOGD(BPF_TRUE, "failed to get inode");
552         return -1;
553     }
554     return get_filename_by_inode(filename, len, f_inode);
555 }
556 
557 static __always_inline
emit_strtrace_event(u64 stime,u32 type,const void * addr,u32 stracer)558 int emit_strtrace_event(u64 stime, u32 type, const void *addr, u32 stracer)
559 {
560     if (addr == NULL) {
561         BPFLOGD(BPF_TRUE, "strtrace event discarded: invalid argument");
562         return -1;
563     }
564     const u64 event_size = sizeof(struct strtrace_cmplt_event_t);
565     struct strtrace_cmplt_event_t *cmplt_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
566     if (cmplt_event == NULL) {
567         BPFLOGD(BPF_TRUE, "failed to reserve space for strtrace event from BPF ringbuffer");
568         return -1;
569     }
570     __builtin_memset(cmplt_event, 0, event_size);
571     cmplt_event->tracer = STRTRACE;
572     cmplt_event->start_event.type = type;
573     cmplt_event->start_event.stracer = stracer;
574     cmplt_event->start_event.stime = stime;
575     cmplt_event->start_event.addr = addr;
576     cmplt_event->pid = bpf_get_current_pid_tgid();
577     cmplt_event->tgid = (bpf_get_current_pid_tgid() >> SHIFT_32);
578     int err = 0;
579     switch (stracer) {
580         case BIOTRACE: {
581             err = get_filename_by_bio(cmplt_event->filename, MAX_FILENAME_LEN, addr);
582             break;
583         }
584         case FSTRACE: {
585             if (type == SYS_OPENAT2) {
586                 err = bpf_probe_read_user_str(cmplt_event->filename, MAX_FILENAME_LEN, addr);
587                 break;
588             }
589             if (type == SYS_CLOSE) {
590                 const struct sys_close_args_t *args = (const struct sys_close_args_t *) addr;
591                 const struct file *filp = get_file_by_fd(args->files, args->fd);
592                 err = get_filename_by_file(cmplt_event->filename, MAX_FILENAME_LEN, filp);
593                 break;
594             }
595             BPFLOGD(BPF_TRUE, "strtrace event discarded: bad source event type = %d of fstrace", type);
596             bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
597             return -1;
598         }
599         default: {
600             BPFLOGD(BPF_TRUE, "strtrace event discarded: bad strtrace source tracer = %d", stracer);
601             bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
602             return -1;
603         }
604     }
605 
606     if (err <= 0) {
607         BPFLOGD(BPF_TRUE, "strtrace event discarded: failed to read path for tracer = %d", stracer);
608         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
609         return -1;
610     }
611     cmplt_event->len = (__u32)err;
612     bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
613     return 0;
614 }
615 
616 static __always_inline
get_biotrace_event_type_by_flags(unsigned int cmd_flags)617 u32 get_biotrace_event_type_by_flags(unsigned int cmd_flags)
618 {
619     if (cmd_flags & REQ_META) {
620         if ((cmd_flags & REQ_OP_MASK) == REQ_OP_READ) {
621             return BIO_METADATA_READ;
622         }
623         if ((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE) {
624             return BIO_METADATA_WRITE;
625         }
626         return 0;
627     }
628     if (cmd_flags & REQ_SWAP) {
629         if ((cmd_flags & REQ_OP_MASK) == REQ_OP_READ) {
630             return BIO_PAGE_IN;
631         }
632         if ((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE) {
633             return BIO_PAGE_OUT;
634         }
635         return 0;
636     }
637     if ((cmd_flags & REQ_OP_MASK) == REQ_OP_READ) {
638         return BIO_DATA_READ;
639     }
640     if ((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE) {
641         return BIO_DATA_WRITE;
642     }
643     return 0;
644 }
645 /***************************** inline funcs END **********************************/
646 
647 /***************************** pftrace BPF progs BEGING *****************************/
648 SEC("kprobe/__do_fault")
BPF_KPROBE(__do_fault_entry,struct vm_fault * vmf)649 int BPF_KPROBE(__do_fault_entry, struct vm_fault *vmf)
650 {
651     if (check_current_pid(-1, -1) != 0) {
652         // not any one of target processes, skip it
653         return 0;
654     }
655     return handle_pftrace_start_event(vmf, (u32) PF_FILE_BACKED_IN);
656 }
657 
658 SEC("kretprobe/__do_fault")
BPF_KRETPROBE(__do_fault_exit,int64_t vmf_flags)659 int BPF_KRETPROBE(__do_fault_exit, int64_t vmf_flags)
660 {
661     if (check_current_pid(-1, -1) != 0) {
662         // not any one of target processes, skip it
663         return 0;
664     }
665     return emit_event(ctx, vmf_flags, PFTRACE);
666 }
667 
668 SEC("kprobe/do_swap_page")
BPF_KPROBE(do_swap_page_entry,struct vm_fault * vmf)669 int BPF_KPROBE(do_swap_page_entry, struct vm_fault *vmf)
670 {
671     if (check_current_pid(-1, -1) != 0) {
672         // not any one of target processes, skip it
673         return 0;
674     }
675     return handle_pftrace_start_event(vmf, (u32) PF_SWAP_FROM_DISK);
676 }
677 
678 SEC("kretprobe/do_swap_page")
BPF_KRETPROBE(do_swap_page_exit,int64_t vmf_flags)679 int BPF_KRETPROBE(do_swap_page_exit, int64_t vmf_flags)
680 {
681     if (check_current_pid(-1, -1) != 0) {
682         // not any one of target processes, skip it
683         return 0;
684     }
685     return emit_event(ctx, vmf_flags, PFTRACE);
686 }
687 
688 SEC("kprobe/do_wp_page")
BPF_KPROBE(do_wp_page_entry,struct vm_fault * vmf)689 int BPF_KPROBE(do_wp_page_entry, struct vm_fault *vmf)
690 {
691     if (check_current_pid(-1, -1) != 0) {
692         // not any one of target processes, skip it
693         return 0;
694     }
695     return handle_pftrace_start_event(vmf, (u32) PF_COPY_ON_WRITE);
696 }
697 
698 SEC("kretprobe/do_wp_page")
BPF_KRETPROBE(do_wp_page_exit,int64_t vmf_flags)699 int BPF_KRETPROBE(do_wp_page_exit, int64_t vmf_flags)
700 {
701     if (check_current_pid(-1, -1) != 0) {
702         // not any one of target processes, skip it
703         return 0;
704     }
705     return emit_event(ctx, vmf_flags, PFTRACE);
706 }
707 /*************************** pftrace BPF progs END *******************************/
708 
709 /**************************** bio BPF progs BEGING *******************************/
710 static __always_inline
handle_blk_issue(struct request * rq)711 int handle_blk_issue(struct request *rq)
712 {
713     if (rq == NULL) {
714         BPFLOGD(BPF_TRUE, "request is NULL");
715         return 0;
716     }
717     const u64 start_event_map_key = (const u64)rq;
718     struct start_event_t start_event = {};
719     __builtin_memset(&start_event, 0, sizeof(start_event));
720     struct biotrace_start_event_t *bio_se = &start_event.bio_se;
721     bio_se->stime = bpf_ktime_get_ns();
722     bio_se->type = get_biotrace_event_type_by_flags(BPF_CORE_READ(rq, cmd_flags));
723     if (bio_se->type == 0) {
724         BPFLOGD(BPF_TRUE, "failed to get biotrace event type");
725         return 0;
726     }
727     emit_strtrace_event(bio_se->stime, bio_se->type, BPF_CORE_READ(rq, bio), BIOTRACE);
728     u64 tgid_pid = bpf_get_current_pid_tgid();
729     bio_se->pid = (u32) tgid_pid;
730     bio_se->tgid = (u32) (tgid_pid >> SHIFT_32);
731     bpf_get_current_comm(bio_se->comm, MAX_COMM_LEN);
732     bio_se->size = BPF_CORE_READ(rq, bio, bi_iter.bi_size);
733     bpf_map_update_elem(&start_event_map, &start_event_map_key, &start_event, BPF_ANY);
734     return 0;
735 }
736 
737 SEC("tp_btf/block_rq_issue")
block_issue(u64 * ctx)738 int block_issue(u64 *ctx)
739 {
740     if (check_current_pid(-1, -1) != 0) {
741         return 0;
742     }
743     if (LINUX_KERNEL_VERSION < KERNEL_VERSION(5, 10, 137)) {
744         return handle_blk_issue((void *)ctx[1]);
745     } else {
746         return handle_blk_issue((void *)ctx[0]);
747     }
748 }
749 
750 SEC("kprobe/blk_update_request")
BPF_PROG(blk_update_request,struct request * rq)751 int BPF_PROG(blk_update_request, struct request *rq)
752 {
753     if (check_current_pid(-1, -1) != 0) {
754         return 0;
755     }
756     u64 ctime = bpf_ktime_get_ns();
757     const u64 event_size = sizeof(struct biotrace_cmplt_event_t);
758     struct biotrace_cmplt_event_t *cmplt_event= bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
759     if (cmplt_event == NULL) {
760         BPFLOGD(BPF_TRUE, "failed to reserve space for biotrace event from BPF ringbuffer");
761         return 0;
762     }
763     __builtin_memset(cmplt_event, 0, event_size);
764     const u64 start_event_map_key = (const u64)rq;
765     const struct biotrace_start_event_t *start_event = bpf_map_lookup_elem(&start_event_map, &start_event_map_key);
766     if (start_event == NULL) {
767         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
768         return 0;
769     }
770     cmplt_event->start_event = *start_event;
771     if (cmplt_event->start_event.type == 0) {
772         BPFLOGI(BPF_TRUE, "biotrace event discarded: invalide biotrace start event");
773         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
774         return 0;
775     }
776     cmplt_event->tracer = BIOTRACE;
777     cmplt_event->ctime = ctime;
778     cmplt_event->prio = BPF_CORE_READ(rq, bio, bi_ioprio);
779     cmplt_event->blkcnt = BPF_CORE_READ(rq, bio, bi_iter.bi_sector);
780     if (unwind_stack()) {
781         cmplt_event->nips = bpf_get_stack(ctx, cmplt_event->ips, g_stack_limit * sizeof(u64), 0) / sizeof(u64);
782     }
783     bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
784     return 0;
785 }
786 /*************************** bio BPF progs END *******************************/
787 
788 /*************************** user BPF progs START ****************************/
789 SEC("uretprobe//system/lib/ld-musl-aarch64.so.1:dlopen")
BPF_KRETPROBE(uretprobe_dlopen,void * ret)790 int BPF_KRETPROBE(uretprobe_dlopen, void *ret)
791 {
792     if (check_current_pid(-1, -1) != 0) {
793         return 0;
794     }
795     if (ret == NULL) {
796         return 0;
797     }
798 
799     const u64 event_size = sizeof(struct dlopen_trace_start_event_t);
800     struct dlopen_trace_start_event_t *start_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
801     if (start_event == NULL) {
802         BPFLOGD(BPF_TRUE, "failed to reserve space for biotrace event from BPF ringbuffer");
803         return -1;
804     }
805     start_event->type = DLOPEN_TRACE;
806     u64 tgid_pid = bpf_get_current_pid_tgid();
807     start_event->tgid = (u32)(tgid_pid >> SHIFT_32);
808     bpf_ringbuf_submit(start_event, BPF_RB_FORCE_WAKEUP);
809     return 0;
810 }
811 /*************************** user BPF progs END ****************************/
812 #include "fstrace_progs.h"
813 
814 char _license[] SEC("license") = "GPL";
815 u32 _version SEC("version") = LINUX_VERSION_CODE;
816