• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include <linux/version.h>
17 #include "vmlinux.h"
18 #include "bpf_helpers.h"
19 #include "bpf_tracing.h"
20 #include "bpf_core_read.h"
21 #include "hiebpf_types.h"
22 #include "bpf_log_writer.h"
23 
24 #ifndef VM_FAULT_ERROR
25 #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS |    \
26             VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON |    \
27             VM_FAULT_HWPOISON_LARGE | VM_FAULT_FALLBACK)
28 #endif
29 
30 extern int LINUX_KERNEL_VERSION __kconfig;
31 const volatile unsigned int g_stack_limit = 0;
32 static const int SHIFT_32 = 32;
33 
34 // global configuration data
35 // const volatile int tracer_pid = -1;
36 // const volatile int bpf_log_level = BPF_LOG_DEBUG;
37 // const volatile int max_stack_depth = MAX_STACK_LIMIT;
38 // const volatile int target_pids[MAX_TARGET_PIDS];
39 
40 /*********************************************************************************
41  * use BPF map to store global configuration data instead if global variables
42  * are not available
43  ********************/
44 struct {
45     /*
46      * config_var_map[0] = tracer_pid, tracer_pid != -1 means not tracing the tracer itself
47      * config_var_map[1] = bpf_log_level
48      * config_var_map[2] = max_stack_limit
49      * config_var_map[3] = max_stack_depth,
50      * config_var_map[4] = unwind_stack, none-zero means unwinding stack, other wise not unwinding
51     */
52     __uint(type, BPF_MAP_TYPE_ARRAY);
53     __uint(key_size, sizeof(u32));
54     __uint(value_size, sizeof(u32));
55     __uint(max_entries, NR_CONFIG_VARIABLES);
56 } config_var_map SEC(".maps");
57 
58 struct {
59     /*
60      * target_pid_map[0] != 0 means tracing all processes
61      */
62     __uint(type, BPF_MAP_TYPE_ARRAY);
63     __uint(key_size, sizeof(u32));
64     __uint(value_size, sizeof(u32));
65     __uint(max_entries, MAX_TARGET_PIDS + 1);
66 } target_pid_map SEC(".maps");
67 /**********************
68 *************************************************************************************/
69 
70 /******************************** BPF maps BEGIN*************************************/
71 /*start event map*/
72 struct {
73     /* Since execution of syscalls of the same process never cross over,
74      * we can simply use pid as the identifier of the start of a syscall
75      */
76     __uint(type, BPF_MAP_TYPE_HASH);
77     __uint(key_size, sizeof(u64));
78     __uint(value_size, sizeof(struct start_event_t));
79     __uint(max_entries, MAX_START_EVENTS_NUM);
80 } start_event_map SEC(".maps");
81 
82 /*pftrace stats map*/
83 struct {
84     __uint(type, BPF_MAP_TYPE_ARRAY);
85     __uint(key_size, sizeof(u32));
86     __uint(value_size, sizeof(struct pf_stat_t));
87     __uint(max_entries, PF_MAX_EVENT_TYPE);
88 } pftrace_stats_map SEC(".maps");
89 
90 /*bpf ringbuffers*/
91 struct {
92     __uint(type, BPF_MAP_TYPE_RINGBUF);
93     __uint(max_entries, BPF_RINGBUF_SIZE);
94 } bpf_ringbuf_map SEC(".maps");
95 
96 /********************************* BPF maps END *************************************/
97 
98 /******************************** inline funcs BEGIN ********************************/
99 static __always_inline
unwind_stack()100 int unwind_stack()
101 {
102     u32 index = UNWIND_FLAG_INDEX;
103     const u32 *unwind_ptr = bpf_map_lookup_elem(&config_var_map, &index);
104     u32 unwind = 0;
105     int err = bpf_probe_read_kernel(&unwind, sizeof(u32), unwind_ptr);
106     if (err) {
107         BPFLOGW(BPF_TRUE, "failed to read unwind configuration");
108     }
109     return unwind;
110 }
111 
112 static __always_inline
emit_fstrace_event(void * ctx,int64_t retval)113 int emit_fstrace_event(void* ctx, int64_t retval)
114 {
115     // get exit timestamp as soon as possible
116     u64 ctime = bpf_ktime_get_ns();
117     u64 pid_tgid = bpf_get_current_pid_tgid();
118 
119     const u64 event_size = sizeof(struct fstrace_cmplt_event_t);
120     struct fstrace_cmplt_event_t *cmplt_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
121     if (cmplt_event == NULL) {
122         BPFLOGD(BPF_TRUE, "failed to reserve space for fstrace event from BPF ringbuffer");
123         return -1;
124     }
125     __builtin_memset(cmplt_event, 0, event_size);
126 
127     const struct fstrace_start_event_t* start_event = bpf_map_lookup_elem(&start_event_map, &pid_tgid);
128     int err = bpf_probe_read_kernel(&cmplt_event->start_event, sizeof(struct fstrace_start_event_t), start_event);
129     if (err) {
130         BPFLOGD(BPF_TRUE, "fstrace event discarded: failed to read fstrace_start_event");
131         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
132         return -1;
133     }
134     if (cmplt_event->start_event.type == 0) {
135         BPFLOGI(BPF_TRUE, "fstrace event discarded: invalide fstrace start event");
136         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
137         return -1;
138     }
139 
140     cmplt_event->ctime = ctime;
141     cmplt_event->tracer = FSTRACE;
142     cmplt_event->pid = (u32) pid_tgid;
143     cmplt_event->tgid = (u32) (pid_tgid >> SHIFT_32);
144     cmplt_event->uid = bpf_get_current_uid_gid();
145     cmplt_event->gid = (bpf_get_current_uid_gid() >> SHIFT_32);
146     cmplt_event->retval = retval;
147     err = bpf_get_current_comm(cmplt_event->comm, MAX_COMM_LEN);
148     if (err) {
149         BPFLOGD(BPF_TRUE, "fstrace event discarded: failed to get process command");
150         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
151         return -1;
152     }
153 
154     // get user callchain
155     if (unwind_stack()) {
156         cmplt_event->nips =
157             bpf_get_stack(ctx, cmplt_event->ips, g_stack_limit * sizeof(u64), BPF_F_USER_STACK) / sizeof(u64);
158     }
159 
160     // send out the complete event data to perf event buffer
161     bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
162     return 0;
163 }
164 
165 static __always_inline
handle_pftrace_start_event(struct vm_fault * vmf,u32 type)166 int handle_pftrace_start_event(struct vm_fault *vmf, u32 type)
167 {
168     struct start_event_t start_event = {};
169     __builtin_memset(&start_event, 0, sizeof(start_event));
170     struct pftrace_start_event_t *pf_se = &start_event.pf_se;
171     pf_se->stime = bpf_ktime_get_ns();
172     pf_se->type = type;
173     int err = bpf_probe_read_kernel(&pf_se->addr, sizeof(vmf->address), &vmf->address);
174     if (err) {
175         BPFLOGW(BPF_TRUE, "failed read page fault address for pftrace start event");
176         // clear the start event to indicate error
177         __builtin_memset(&start_event, 0, sizeof(start_event));
178     }
179     u64 pid_tgid = bpf_get_current_pid_tgid();
180     err = (int) bpf_map_update_elem(&start_event_map, &pid_tgid, &start_event, BPF_ANY);
181     if (err < 0) {
182         // this should never happens
183         BPFLOGF(BPF_TRUE, "failed to update pftrace_start_event for pid_tgid = %lld", pid_tgid);
184         return -1;
185     }
186     return 0;
187 }
188 
189 static __always_inline
read_modify_update_page_fault_stats(u32 type,u32 duration)190 int read_modify_update_page_fault_stats(u32 type, u32 duration)
191 {
192     const struct pf_stat_t *stat_ptr = bpf_map_lookup_elem(&pftrace_stats_map, &type);
193     if (stat_ptr == NULL) {
194         BPFLOGD(BPF_TRUE, "failed to lookup pftrace stat");
195         return -1;
196     }
197     struct pf_stat_t stat;
198     int err = (int) bpf_probe_read_kernel(&stat, sizeof(stat), stat_ptr);
199     if (err < 0) {
200         BPFLOGD(BPF_TRUE, "failed to read pftrace stat");
201         return -1;
202     }
203     u32 tot_duration = stat.tot_duration + duration;
204     u32 avg_duration = tot_duration / (stat.count + 1);
205     stat.dev_duration = (duration * duration + stat.count * stat.dev_duration
206                         + stat.tot_duration * stat.avg_duration - tot_duration * avg_duration)
207                         / (stat.count + 1);
208     ++stat.count;
209     stat.tot_duration = tot_duration;
210     stat.avg_duration = avg_duration;
211     if (duration < stat.min_duration) {
212         stat.min_duration = duration;
213     } else if (duration > stat.max_duration) {
214         stat.max_duration = duration;
215     }
216     err = (int) bpf_map_update_elem(&pftrace_stats_map, &type, &stat, BPF_ANY);
217     if (err < 0) {
218         BPFLOGD(BPF_TRUE, "failed to update pftrace stat");
219         return -1;
220     }
221 
222     return 0;
223 }
224 
225 static __always_inline
read_modify_update_current_type(u32 type)226 int read_modify_update_current_type(u32 type)
227 {
228     u64 pid_tgid = bpf_get_current_pid_tgid();
229     const struct pftrace_start_event_t *se_ptr = bpf_map_lookup_elem(&start_event_map, &pid_tgid);
230     struct pftrace_start_event_t start_event;
231     int err = bpf_probe_read_kernel(&start_event, sizeof(start_event), se_ptr);
232     if (err) {
233         BPFLOGW(BPF_TRUE, "failed to read pftrace start event to update event type");
234         return 0;
235     }
236     u32 old_type = start_event.type;
237     if (type) {
238         start_event.type = type;
239     }
240     return old_type;
241 }
242 
243 static __always_inline
handle_return_value(struct pftrace_cmplt_event_t * cmplt_event,long long retval)244 int handle_return_value(struct pftrace_cmplt_event_t* cmplt_event, long long retval)
245 {
246     switch (read_modify_update_current_type(0)) {
247         case PF_PAGE_CACHE_HIT: {
248             struct file *fpin = (struct file *) retval;
249             if (fpin == NULL) {
250                 cmplt_event->size = 0;
251             } else {
252                 cmplt_event->size = 1;
253             }
254             break;
255         }
256         case PF_FILE_BACKED_IN:
257         case PF_SWAP_FROM_ZRAM:
258         case PF_SWAP_FROM_DISK:
259         case PF_ZERO_FILL_PAGE:
260         case PF_FAKE_ZERO_PAGE:
261         case PF_COPY_ON_WRITE: {
262             vm_fault_t vmf_flags = (vm_fault_t) retval;
263             if (vmf_flags & VM_FAULT_ERROR) {
264                 cmplt_event->size = 0;
265             } else {
266                 cmplt_event->size = 1;
267             }
268             break;
269         }
270         default: return -1;
271     }
272     return 0;
273 }
274 
275 static __always_inline
emit_pftrace_event(void * ctx,int64_t retval)276 int emit_pftrace_event(void* ctx, int64_t retval)
277 {
278     // get timestamp as soon as possible
279     u64 ctime =  bpf_ktime_get_ns();
280     u64 pid_tgid = bpf_get_current_pid_tgid();
281 
282     const u64 event_size = sizeof(struct pftrace_cmplt_event_t);
283     struct pftrace_cmplt_event_t* cmplt_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
284     if (cmplt_event == NULL) {
285         BPFLOGD(BPF_TRUE, "failed to reserve space for pftrace event from BPF ringbuffer");
286         return -1;
287     }
288     __builtin_memset(cmplt_event, 0, event_size);
289 
290     const struct pftrace_start_event_t *se_ptr = bpf_map_lookup_elem(&start_event_map, &pid_tgid);
291     int err = bpf_probe_read_kernel(&cmplt_event->start_event, sizeof(cmplt_event->start_event), se_ptr);
292     if (err) {
293         BPFLOGI(BPF_TRUE, "pftrace event discarded: failed to read pftrace start event");
294         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
295         return -1;
296     }
297     if (cmplt_event->start_event.type == 0) {
298         BPFLOGI(BPF_TRUE, "pftrace event discarded: invalide pftrace start event");
299         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
300         return -1;
301     }
302     cmplt_event->tracer = PFTRACE;
303     cmplt_event->ctime = ctime;
304     cmplt_event->pid = (u32) pid_tgid;
305     cmplt_event->tgid = (pid_tgid >> SHIFT_32);
306     cmplt_event->uid = bpf_get_current_uid_gid();
307     cmplt_event->gid = (bpf_get_current_uid_gid() >> SHIFT_32);
308     err = bpf_get_current_comm(cmplt_event->comm, MAX_COMM_LEN);
309     if (err < 0) {
310         BPFLOGD(BPF_TRUE, "pftrace event discarded: failed to get process command");
311         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
312         return -1;
313     }
314     err = handle_return_value(cmplt_event, retval);
315     if (err) {
316         BPFLOGW(BPF_TRUE, "pftrace event discarded: failed to handle pftrace return value");
317         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
318         return -1;
319     }
320 
321     // get user callchain
322     if (unwind_stack()) {
323         cmplt_event->nips =
324             bpf_get_stack(ctx, cmplt_event->ips, g_stack_limit * sizeof(u64), BPF_F_USER_STACK) / sizeof(u64);
325     }
326 
327     if (read_modify_update_page_fault_stats(cmplt_event->start_event.type,
328                                             cmplt_event->ctime - cmplt_event->start_event.stime) != 0) {
329         BPFLOGD(BPF_TRUE, "pftrace event discarded: failed to update pftrace stats");
330         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
331         return -1;
332     }
333 
334     bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
335     return 0;
336 }
337 
338 static __always_inline
emit_event(void * ctx,int64_t retval,u32 tracer)339 int emit_event(void* ctx, int64_t retval, u32 tracer)
340 {
341     switch (tracer) {
342         case FSTRACE:   return emit_fstrace_event(ctx, retval);
343         case PFTRACE:   return emit_pftrace_event(ctx, retval);
344         default:
345             BPFLOGD(BPF_TRUE, "unkonwn event source with id = %d", tracer);
346             return -1;
347     }
348 }
349 
350 static __always_inline
is_target_process(const char * target_comm,const size_t comm_size)351 int is_target_process(const char* target_comm, const size_t comm_size)
352 {
353     char curr_comm[MAX_COMM_LEN] = "\0";
354     long retval = bpf_get_current_comm(curr_comm, sizeof(curr_comm));
355     if (retval == 0) {
356         size_t min_comm = comm_size < sizeof(curr_comm) ? comm_size : sizeof(curr_comm);
357         for (size_t j = 0; j <= min_comm; ++j) {
358             if (j == min_comm) {
359                 char fmt[] = "current comm is %s\n";
360                 bpf_trace_printk(fmt, sizeof(fmt), curr_comm);
361                 return 0;
362             }
363             if (target_comm[j] != curr_comm[j]) {
364                 break;
365             }
366         }
367     }
368     return -1;
369 }
370 
371 // if both the pid and tgid values are - 1, check whether the event occurs in the current process
372 static __always_inline
check_current_pid(const int32_t pid,const int32_t tgid)373 int check_current_pid(const int32_t pid, const int32_t tgid)
374 {
375     u32 index = 0;
376     const int32_t* tracer_pid_ptr = bpf_map_lookup_elem(&config_var_map, &index);
377     int32_t tracer_pid = -1;
378     bpf_probe_read_kernel(&tracer_pid, sizeof(int32_t), tracer_pid_ptr);
379     int32_t curr_pid = pid;
380     if (curr_pid < 0) {
381         curr_pid = (int32_t)bpf_get_current_pid_tgid();
382     }
383     int32_t curr_tgid = tgid;
384     if (curr_tgid < 0) {
385         curr_tgid = (int32_t)(bpf_get_current_pid_tgid() >> SHIFT_32);
386     }
387     if (curr_pid == tracer_pid || curr_tgid == tracer_pid) {
388         // currrent process is not a target process
389         return -1;
390     }
391     const u32 trace_all_index = 0;
392     int32_t target_pids[MAX_TARGET_PIDS + 1];
393     __builtin_memset(target_pids, 0, sizeof(target_pids));
394     const int32_t *target_pids_ptr = bpf_map_lookup_elem(&target_pid_map, &trace_all_index);
395     if (target_pids_ptr == NULL) {
396         BPFLOGW(BPF_TRUE, "failed to lookup target pid map, will trace all processes");
397         return 0;
398     }
399     int err = bpf_probe_read_kernel(target_pids, sizeof(int32_t) * (MAX_TARGET_PIDS + 1), target_pids_ptr);
400     if (err) {
401         BPFLOGW(BPF_TRUE, "failed to read target pids, will trace all processes");
402         return 0;
403     }
404     if (target_pids[trace_all_index] != 0) {
405         return 0;
406     }
407     for (u32 index = 1; index != MAX_TARGET_PIDS; ++index) {
408         if (target_pids[index] < 0) {
409             break;
410         }
411         if (target_pids[index] == curr_pid || target_pids[index] == curr_tgid) {
412             return 0;
413         }
414     }
415     return -1;
416 }
417 
418 static __always_inline
get_mountpoint_by_inode(char * filename,int len,const struct inode * host)419 size_t get_mountpoint_by_inode(char *filename, int len, const struct inode *host)
420 {
421     struct list_head *mountsHead = BPF_CORE_READ(host, i_sb, s_mounts.next);
422     struct mount *mnt = NULL;
423     mnt = list_entry(mountsHead, struct mount, mnt_instance);
424     const u32 MAX_MOUNT_POINT = 5;
425     size_t pos = 0;
426     for (u32 cnt = MAX_MOUNT_POINT; cnt != 0; --cnt) {
427         long name_len = 0;
428         const u8 *name = BPF_CORE_READ(mnt, mnt_mountpoint, d_name.name);
429         if (BPF_CORE_READ(mnt, mnt_mountpoint, d_name.len) <= 1) {
430             break;
431         }
432         name_len = bpf_probe_read_kernel_str(filename + pos, MAX_DENTRY_NAME_LEN, name);
433         if (name_len <= 1) {
434             BPFLOGD(BPF_TRUE, "failed to read dentry name from kernel stack buffer");
435             break;
436         }
437         pos += (size_t)name_len;
438         filename[pos - 1] = '/';
439         mnt = BPF_CORE_READ(mnt, mnt_parent);
440     }
441     return pos;
442 }
443 
444 static __always_inline
get_filename_by_inode(char * filename,const size_t len,const struct inode * host)445 int get_filename_by_inode(char *filename, const size_t len, const struct inode *host)
446 {
447     int err = 0;
448     struct hlist_node *curr_hlist_node = BPF_CORE_READ(host, i_dentry.first);
449     if (curr_hlist_node == NULL) {
450         BPFLOGD(BPF_TRUE, "failed to get alias dentries of the inode");
451         return -1;
452     }
453 
454     const struct dentry *curr_dentry = NULL;
455     const u32 MAX_ALIAS_DENTRY = 100;
456     for (u32 cnt = MAX_ALIAS_DENTRY; cnt != 0; --cnt) {
457         curr_dentry = container_of(curr_hlist_node, struct dentry, d_u);
458         struct inode *curr_inode = BPF_CORE_READ(curr_dentry, d_inode);
459         if (curr_inode == NULL) {
460             BPFLOGD(BPF_TRUE, "failed to get the current inode");
461             return -1;
462         }
463         if (curr_inode == host) {
464             break;
465         }
466         curr_hlist_node = BPF_CORE_READ(curr_hlist_node, next);
467         if (curr_hlist_node == NULL) {
468             BPFLOGD(BPF_TRUE, "failed to get the next hlist_node");
469             break;
470         }
471     }
472 
473     unsigned int flags = BPF_CORE_READ(curr_dentry, d_flags);
474     flags = (flags & 0xFFFFFF) >> FILE_TYPE_BITS;
475     if (flags != DCACHE_DIRECTORY_TYPE && flags != DCACHE_REGULAR_TYPE) {
476         return 0;
477     }
478 
479     size_t pos = 0;
480     const u32 MAX_BACKTRACE_DEPTH = 20;
481     for (u32 cnt = MAX_BACKTRACE_DEPTH; cnt != 0; --cnt) {
482         if (err || curr_dentry == NULL) {
483             break;
484         }
485         unsigned int name_len = BPF_CORE_READ(curr_dentry, d_name.len);
486         const u8 *name = BPF_CORE_READ(curr_dentry, d_name.name);
487         if (name_len <= 1) {
488             break;
489         }
490         long dentry_name_len = bpf_probe_read_kernel_str(filename + pos, MAX_DENTRY_NAME_LEN, name);
491         if (dentry_name_len <= 1) {
492             BPFLOGD(BPF_TRUE, "failed to read dentry name from kernel stack buffer");
493             break;
494         }
495         pos += (size_t)dentry_name_len;
496         filename[pos - 1] = '/';
497         struct dentry *temp_dentry = BPF_CORE_READ(curr_dentry, d_parent);
498         if (temp_dentry == curr_dentry || temp_dentry == NULL) {
499             break;
500         }
501         curr_dentry = temp_dentry;
502     }
503     pos += get_mountpoint_by_inode(filename + pos, len - pos, host);
504     return pos + 1;
505 }
506 
507 static __always_inline
get_filename_by_bio(char * filename,const size_t len,const struct bio * bio)508 int get_filename_by_bio(char *filename, const size_t len, const struct bio *bio)
509 {
510     if (filename == NULL || len == 0 || bio == NULL) {
511         BPFLOGD(BPF_TRUE, "get_filename_by_bio() error: invalid argument");
512         return -1;
513     }
514     struct inode *host = BPF_CORE_READ(bio, bi_io_vec, bv_page, mapping, host);
515     if (host == NULL) {
516         BPFLOGD(BPF_TRUE, "failed to get the bio associated inode");
517         return -1;
518     }
519 
520     return get_filename_by_inode(filename, len, host);
521 }
522 
523 static __always_inline
get_file_by_fd(const struct files_struct * files,const unsigned int fd)524 struct file* get_file_by_fd(const struct files_struct *files, const unsigned int fd)
525 {
526     if (files == NULL) {
527         BPFLOGD(BPF_TRUE, "get_file_by_fd() error: invalid argument");
528         return NULL;
529     }
530     if (fd >= BPF_CORE_READ(files, fdt, max_fds)) {
531         BPFLOGD(BPF_TRUE, "get_file_by_fd() error: invalid argument");
532         return NULL;
533     }
534     struct file **fd_array = BPF_CORE_READ(files, fdt, fd);
535     if (fd_array == NULL) {
536         BPFLOGD(BPF_TRUE, "failed to get fd array");
537         return NULL;
538     }
539     struct file *filp = NULL;
540     int err = bpf_probe_read_kernel(&filp, sizeof(filp), &fd_array[fd]);
541     if (err || filp == NULL) {
542         BPFLOGD(BPF_TRUE, "failed to get file");
543         return NULL;
544     }
545     return filp;
546 }
547 
548 static __always_inline
get_filename_by_file(char * filename,const size_t len,const struct file * filp)549 int get_filename_by_file(char *filename, const size_t len, const struct file *filp)
550 {
551     if (filename == NULL || filp == NULL || len == 0) {
552         BPFLOGD(BPF_TRUE, "get_filename_by_file() error: invalid argument");
553         return -1;
554     }
555     struct inode *f_inode = BPF_CORE_READ(filp, f_inode);
556     if (f_inode == NULL) {
557         BPFLOGD(BPF_TRUE, "failed to get inode");
558         return -1;
559     }
560     return get_filename_by_inode(filename, len, f_inode);
561 }
562 
563 static __always_inline
emit_strtrace_event(u64 stime,u32 type,const void * addr,u32 stracer)564 int emit_strtrace_event(u64 stime, u32 type, const void *addr, u32 stracer)
565 {
566     if (addr == NULL) {
567         BPFLOGD(BPF_TRUE, "strtrace event discarded: invalid argument");
568         return -1;
569     }
570     const u64 event_size = sizeof(struct strtrace_cmplt_event_t);
571     struct strtrace_cmplt_event_t *cmplt_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
572     if (cmplt_event == NULL) {
573         BPFLOGD(BPF_TRUE, "failed to reserve space for strtrace event from BPF ringbuffer");
574         return -1;
575     }
576     __builtin_memset(cmplt_event, 0, event_size);
577     cmplt_event->tracer = STRTRACE;
578     cmplt_event->start_event.type = type;
579     cmplt_event->start_event.stracer = stracer;
580     cmplt_event->start_event.stime = stime;
581     cmplt_event->start_event.addr = addr;
582     cmplt_event->pid = bpf_get_current_pid_tgid();
583     cmplt_event->tgid = (bpf_get_current_pid_tgid() >> SHIFT_32);
584     int err = 0;
585     switch (stracer) {
586         case BIOTRACE: {
587             err = get_filename_by_bio(cmplt_event->filename, MAX_FILENAME_LEN, addr);
588             break;
589         }
590         case FSTRACE: {
591             if (type == SYS_OPENAT2) {
592                 err = bpf_probe_read_user_str(cmplt_event->filename, MAX_FILENAME_LEN, addr);
593                 break;
594             }
595             if (type == SYS_CLOSE) {
596                 const struct sys_close_args_t *args = (const struct sys_close_args_t *) addr;
597                 const struct file *filp = get_file_by_fd(args->files, args->fd);
598                 err = get_filename_by_file(cmplt_event->filename, MAX_FILENAME_LEN, filp);
599                 break;
600             }
601             BPFLOGD(BPF_TRUE, "strtrace event discarded: bad source event type = %d of fstrace", type);
602             bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
603             return -1;
604         }
605         default: {
606             BPFLOGD(BPF_TRUE, "strtrace event discarded: bad strtrace source tracer = %d", stracer);
607             bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
608             return -1;
609         }
610     }
611 
612     if (err <= 0) {
613         BPFLOGD(BPF_TRUE, "strtrace event discarded: failed to read path for tracer = %d", stracer);
614         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
615         return -1;
616     }
617     cmplt_event->len = (__u32)err;
618     bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
619     return 0;
620 }
621 
622 static __always_inline
get_biotrace_event_type_by_flags(unsigned int cmd_flags)623 u32 get_biotrace_event_type_by_flags(unsigned int cmd_flags)
624 {
625     if (cmd_flags & REQ_META) {
626         if ((cmd_flags & REQ_OP_MASK) == REQ_OP_READ) {
627             return BIO_METADATA_READ;
628         }
629         if ((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE) {
630             return BIO_METADATA_WRITE;
631         }
632         return 0;
633     }
634     if (cmd_flags & REQ_SWAP) {
635         if ((cmd_flags & REQ_OP_MASK) == REQ_OP_READ) {
636             return BIO_PAGE_IN;
637         }
638         if ((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE) {
639             return BIO_PAGE_OUT;
640         }
641         return 0;
642     }
643     if ((cmd_flags & REQ_OP_MASK) == REQ_OP_READ) {
644         return BIO_DATA_READ;
645     }
646     if ((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE) {
647         return BIO_DATA_WRITE;
648     }
649     return 0;
650 }
651 /***************************** inline funcs END **********************************/
652 
653 /***************************** pftrace BPF progs BEGING *****************************/
654 SEC("kprobe/__do_fault")
BPF_KPROBE(__do_fault_entry,struct vm_fault * vmf)655 int BPF_KPROBE(__do_fault_entry, struct vm_fault *vmf)
656 {
657     if (check_current_pid(-1, -1) != 0) {
658         // not any one of target processes, skip it
659         return 0;
660     }
661     return handle_pftrace_start_event(vmf, (u32) PF_FILE_BACKED_IN);
662 }
663 
664 SEC("kretprobe/__do_fault")
BPF_KRETPROBE(__do_fault_exit,int64_t vmf_flags)665 int BPF_KRETPROBE(__do_fault_exit, int64_t vmf_flags)
666 {
667     if (check_current_pid(-1, -1) != 0) {
668         // not any one of target processes, skip it
669         return 0;
670     }
671     return emit_event(ctx, vmf_flags, PFTRACE);
672 }
673 
674 SEC("kprobe/do_swap_page")
BPF_KPROBE(do_swap_page_entry,struct vm_fault * vmf)675 int BPF_KPROBE(do_swap_page_entry, struct vm_fault *vmf)
676 {
677     if (check_current_pid(-1, -1) != 0) {
678         // not any one of target processes, skip it
679         return 0;
680     }
681     return handle_pftrace_start_event(vmf, (u32) PF_SWAP_FROM_DISK);
682 }
683 
684 SEC("kretprobe/do_swap_page")
BPF_KRETPROBE(do_swap_page_exit,int64_t vmf_flags)685 int BPF_KRETPROBE(do_swap_page_exit, int64_t vmf_flags)
686 {
687     if (check_current_pid(-1, -1) != 0) {
688         // not any one of target processes, skip it
689         return 0;
690     }
691     return emit_event(ctx, vmf_flags, PFTRACE);
692 }
693 
694 SEC("kprobe/do_wp_page")
BPF_KPROBE(do_wp_page_entry,struct vm_fault * vmf)695 int BPF_KPROBE(do_wp_page_entry, struct vm_fault *vmf)
696 {
697     if (check_current_pid(-1, -1) != 0) {
698         // not any one of target processes, skip it
699         return 0;
700     }
701     return handle_pftrace_start_event(vmf, (u32) PF_COPY_ON_WRITE);
702 }
703 
704 SEC("kretprobe/do_wp_page")
BPF_KRETPROBE(do_wp_page_exit,int64_t vmf_flags)705 int BPF_KRETPROBE(do_wp_page_exit, int64_t vmf_flags)
706 {
707     if (check_current_pid(-1, -1) != 0) {
708         // not any one of target processes, skip it
709         return 0;
710     }
711     return emit_event(ctx, vmf_flags, PFTRACE);
712 }
713 /*************************** pftrace BPF progs END *******************************/
714 
715 /**************************** bio BPF progs BEGING *******************************/
716 static __always_inline
handle_blk_issue(struct request * rq)717 int handle_blk_issue(struct request *rq)
718 {
719     if (rq == NULL) {
720         BPFLOGD(BPF_TRUE, "request is NULL");
721         return 0;
722     }
723     const u64 start_event_map_key = (const u64)rq;
724     struct start_event_t start_event = {};
725     __builtin_memset(&start_event, 0, sizeof(start_event));
726     struct biotrace_start_event_t *bio_se = &start_event.bio_se;
727     bio_se->stime = bpf_ktime_get_ns();
728     bio_se->type = get_biotrace_event_type_by_flags(BPF_CORE_READ(rq, cmd_flags));
729     if (bio_se->type == 0) {
730         BPFLOGD(BPF_TRUE, "failed to get biotrace event type");
731         return 0;
732     }
733     emit_strtrace_event(bio_se->stime, bio_se->type, BPF_CORE_READ(rq, bio), BIOTRACE);
734     u64 tgid_pid = bpf_get_current_pid_tgid();
735     bio_se->pid = (u32) tgid_pid;
736     bio_se->tgid = (u32) (tgid_pid >> SHIFT_32);
737     bpf_get_current_comm(bio_se->comm, MAX_COMM_LEN);
738     bio_se->size = BPF_CORE_READ(rq, bio, bi_iter.bi_size);
739     bpf_map_update_elem(&start_event_map, &start_event_map_key, &start_event, BPF_ANY);
740     return 0;
741 }
742 
743 SEC("tp_btf/block_rq_issue")
block_issue(u64 * ctx)744 int block_issue(u64 *ctx)
745 {
746     if (check_current_pid(-1, -1) != 0) {
747         return 0;
748     }
749     if (LINUX_KERNEL_VERSION < KERNEL_VERSION(5, 10, 137)) {
750         return handle_blk_issue((void *)ctx[1]);
751     } else {
752         return handle_blk_issue((void *)ctx[0]);
753     }
754 }
755 
756 SEC("kprobe/blk_update_request")
BPF_PROG(blk_update_request,struct request * rq)757 int BPF_PROG(blk_update_request, struct request *rq)
758 {
759     if (check_current_pid(-1, -1) != 0) {
760         return 0;
761     }
762     u64 ctime = bpf_ktime_get_ns();
763     const u64 event_size = sizeof(struct biotrace_cmplt_event_t);
764     struct biotrace_cmplt_event_t *cmplt_event= bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
765     if (cmplt_event == NULL) {
766         BPFLOGD(BPF_TRUE, "failed to reserve space for biotrace event from BPF ringbuffer");
767         return 0;
768     }
769     __builtin_memset(cmplt_event, 0, event_size);
770     const u64 start_event_map_key = (const u64)rq;
771     const struct biotrace_start_event_t *start_event = bpf_map_lookup_elem(&start_event_map, &start_event_map_key);
772     if (start_event == NULL) {
773         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
774         return 0;
775     }
776     cmplt_event->start_event = *start_event;
777     if (cmplt_event->start_event.type == 0) {
778         BPFLOGI(BPF_TRUE, "biotrace event discarded: invalide biotrace start event");
779         bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
780         return 0;
781     }
782     cmplt_event->tracer = BIOTRACE;
783     cmplt_event->ctime = ctime;
784     cmplt_event->prio = BPF_CORE_READ(rq, bio, bi_ioprio);
785     cmplt_event->blkcnt = BPF_CORE_READ(rq, bio, bi_iter.bi_sector);
786     if (unwind_stack()) {
787         cmplt_event->nips = bpf_get_stack(ctx, cmplt_event->ips, g_stack_limit * sizeof(u64), 0) / sizeof(u64);
788     }
789     bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
790     return 0;
791 }
792 /*************************** bio BPF progs END *******************************/
793 
794 /*************************** user BPF progs START ****************************/
795 SEC("uretprobe//system/lib/ld-musl-aarch64.so.1:dlopen")
BPF_KRETPROBE(uretprobe_dlopen,void * ret)796 int BPF_KRETPROBE(uretprobe_dlopen, void *ret)
797 {
798     if (check_current_pid(-1, -1) != 0) {
799         return 0;
800     }
801     if (ret == NULL) {
802         return 0;
803     }
804 
805     const u64 event_size = sizeof(struct dlopen_trace_start_event_t);
806     struct dlopen_trace_start_event_t *start_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
807     if (start_event == NULL) {
808         BPFLOGD(BPF_TRUE, "failed to reserve space for biotrace event from BPF ringbuffer");
809         return -1;
810     }
811     start_event->type = DLOPEN_TRACE;
812     u64 tgid_pid = bpf_get_current_pid_tgid();
813     start_event->tgid = (u32)(tgid_pid >> SHIFT_32);
814     bpf_ringbuf_submit(start_event, BPF_RB_FORCE_WAKEUP);
815     return 0;
816 }
817 /*************************** user BPF progs END ****************************/
818 #include "fstrace_progs.h"
819 
820 char _license[] SEC("license") = "GPL";
821 u32 _version SEC("version") = LINUX_VERSION_CODE;
822