1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include <linux/version.h>
17 #include "vmlinux.h"
18 #include "bpf_helpers.h"
19 #include "bpf_tracing.h"
20 #include "bpf_core_read.h"
21 #include "hiebpf_types.h"
22 #include "bpf_log_writer.h"
23
24 #ifndef VM_FAULT_ERROR
25 #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | \
26 VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON | \
27 VM_FAULT_HWPOISON_LARGE | VM_FAULT_FALLBACK)
28 #endif
29
30 extern int LINUX_KERNEL_VERSION __kconfig;
31 const volatile unsigned int g_stack_limit = 0;
32 static const int SHIFT_32 = 32;
33
34 // global configuration data
35 // const volatile int tracer_pid = -1;
36 // const volatile int bpf_log_level = BPF_LOG_DEBUG;
37 // const volatile int max_stack_depth = MAX_STACK_LIMIT;
38 // const volatile int target_pids[MAX_TARGET_PIDS];
39
40 /*********************************************************************************
41 * use BPF map to store global configuration data instead if global variables
42 * are not available
43 ********************/
44 struct {
45 /*
46 * config_var_map[0] = tracer_pid, tracer_pid != -1 means not tracing the tracer itself
47 * config_var_map[1] = bpf_log_level
48 * config_var_map[2] = max_stack_limit
49 * config_var_map[3] = max_stack_depth,
50 * config_var_map[4] = unwind_stack, none-zero means unwinding stack, other wise not unwinding
51 */
52 __uint(type, BPF_MAP_TYPE_ARRAY);
53 __uint(key_size, sizeof(u32));
54 __uint(value_size, sizeof(u32));
55 __uint(max_entries, NR_CONFIG_VARIABLES);
56 } config_var_map SEC(".maps");
57
58 struct {
59 /*
60 * target_pid_map[0] != 0 means tracing all processes
61 */
62 __uint(type, BPF_MAP_TYPE_ARRAY);
63 __uint(key_size, sizeof(u32));
64 __uint(value_size, sizeof(u32));
65 __uint(max_entries, MAX_TARGET_PIDS + 1);
66 } target_pid_map SEC(".maps");
67 /**********************
68 *************************************************************************************/
69
70 /******************************** BPF maps BEGIN*************************************/
71 /*start event map*/
72 struct {
73 /* Since execution of syscalls of the same process never cross over,
74 * we can simply use pid as the identifier of the start of a syscall
75 */
76 __uint(type, BPF_MAP_TYPE_HASH);
77 __uint(key_size, sizeof(u64));
78 __uint(value_size, sizeof(struct start_event_t));
79 __uint(max_entries, MAX_START_EVENTS_NUM);
80 } start_event_map SEC(".maps");
81
82 /*pftrace stats map*/
83 struct {
84 __uint(type, BPF_MAP_TYPE_ARRAY);
85 __uint(key_size, sizeof(u32));
86 __uint(value_size, sizeof(struct pf_stat_t));
87 __uint(max_entries, PF_MAX_EVENT_TYPE);
88 } pftrace_stats_map SEC(".maps");
89
90 /*bpf ringbuffers*/
91 struct {
92 __uint(type, BPF_MAP_TYPE_RINGBUF);
93 __uint(max_entries, BPF_RINGBUF_SIZE);
94 } bpf_ringbuf_map SEC(".maps");
95
96 /********************************* BPF maps END *************************************/
97
98 /******************************** inline funcs BEGIN ********************************/
99 static __always_inline
unwind_stack()100 int unwind_stack()
101 {
102 u32 index = UNWIND_FLAG_INDEX;
103 const u32 *unwind_ptr = bpf_map_lookup_elem(&config_var_map, &index);
104 u32 unwind = 0;
105 int err = bpf_probe_read_kernel(&unwind, sizeof(u32), unwind_ptr);
106 if (err) {
107 BPFLOGW(BPF_TRUE, "failed to read unwind configuration");
108 }
109 return unwind;
110 }
111
112 static __always_inline
emit_fstrace_event(void * ctx,int64_t retval)113 int emit_fstrace_event(void* ctx, int64_t retval)
114 {
115 // get exit timestamp as soon as possible
116 u64 ctime = bpf_ktime_get_ns();
117 u64 pid_tgid = bpf_get_current_pid_tgid();
118
119 const u64 event_size = sizeof(struct fstrace_cmplt_event_t);
120 struct fstrace_cmplt_event_t *cmplt_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
121 if (cmplt_event == NULL) {
122 BPFLOGD(BPF_TRUE, "failed to reserve space for fstrace event from BPF ringbuffer");
123 return -1;
124 }
125 __builtin_memset(cmplt_event, 0, event_size);
126
127 const struct fstrace_start_event_t* start_event = bpf_map_lookup_elem(&start_event_map, &pid_tgid);
128 int err = bpf_probe_read_kernel(&cmplt_event->start_event, sizeof(struct fstrace_start_event_t), start_event);
129 if (err) {
130 BPFLOGD(BPF_TRUE, "fstrace event discarded: failed to read fstrace_start_event");
131 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
132 return -1;
133 }
134 if (cmplt_event->start_event.type == 0) {
135 BPFLOGI(BPF_TRUE, "fstrace event discarded: invalide fstrace start event");
136 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
137 return -1;
138 }
139
140 cmplt_event->ctime = ctime;
141 cmplt_event->tracer = FSTRACE;
142 cmplt_event->pid = (u32) pid_tgid;
143 cmplt_event->tgid = (u32) (pid_tgid >> SHIFT_32);
144 cmplt_event->uid = bpf_get_current_uid_gid();
145 cmplt_event->gid = (bpf_get_current_uid_gid() >> SHIFT_32);
146 cmplt_event->retval = retval;
147 err = bpf_get_current_comm(cmplt_event->comm, MAX_COMM_LEN);
148 if (err) {
149 BPFLOGD(BPF_TRUE, "fstrace event discarded: failed to get process command");
150 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
151 return -1;
152 }
153
154 // get user callchain
155 if (unwind_stack()) {
156 cmplt_event->nips =
157 bpf_get_stack(ctx, cmplt_event->ips, g_stack_limit * sizeof(u64), BPF_F_USER_STACK) / sizeof(u64);
158 }
159
160 // send out the complete event data to perf event buffer
161 bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
162 return 0;
163 }
164
165 static __always_inline
handle_pftrace_start_event(struct vm_fault * vmf,u32 type)166 int handle_pftrace_start_event(struct vm_fault *vmf, u32 type)
167 {
168 struct start_event_t start_event = {};
169 __builtin_memset(&start_event, 0, sizeof(start_event));
170 struct pftrace_start_event_t *pf_se = &start_event.pf_se;
171 pf_se->stime = bpf_ktime_get_ns();
172 pf_se->type = type;
173 int err = bpf_probe_read_kernel(&pf_se->addr, sizeof(vmf->address), &vmf->address);
174 if (err) {
175 BPFLOGW(BPF_TRUE, "failed read page fault address for pftrace start event");
176 // clear the start event to indicate error
177 __builtin_memset(&start_event, 0, sizeof(start_event));
178 }
179 u64 pid_tgid = bpf_get_current_pid_tgid();
180 err = (int) bpf_map_update_elem(&start_event_map, &pid_tgid, &start_event, BPF_ANY);
181 if (err < 0) {
182 // this should never happens
183 BPFLOGF(BPF_TRUE, "failed to update pftrace_start_event for pid_tgid = %lld", pid_tgid);
184 return -1;
185 }
186 return 0;
187 }
188
189 static __always_inline
read_modify_update_page_fault_stats(u32 type,u32 duration)190 int read_modify_update_page_fault_stats(u32 type, u32 duration)
191 {
192 const struct pf_stat_t *stat_ptr = bpf_map_lookup_elem(&pftrace_stats_map, &type);
193 if (stat_ptr == NULL) {
194 BPFLOGD(BPF_TRUE, "failed to lookup pftrace stat");
195 return -1;
196 }
197 struct pf_stat_t stat;
198 int err = (int) bpf_probe_read_kernel(&stat, sizeof(stat), stat_ptr);
199 if (err < 0) {
200 BPFLOGD(BPF_TRUE, "failed to read pftrace stat");
201 return -1;
202 }
203 u32 tot_duration = stat.tot_duration + duration;
204 u32 avg_duration = tot_duration / (stat.count + 1);
205 stat.dev_duration = (duration * duration + stat.count * stat.dev_duration
206 + stat.tot_duration * stat.avg_duration - tot_duration * avg_duration)
207 / (stat.count + 1);
208 ++stat.count;
209 stat.tot_duration = tot_duration;
210 stat.avg_duration = avg_duration;
211 if (duration < stat.min_duration) {
212 stat.min_duration = duration;
213 } else if (duration > stat.max_duration) {
214 stat.max_duration = duration;
215 }
216 err = (int) bpf_map_update_elem(&pftrace_stats_map, &type, &stat, BPF_ANY);
217 if (err < 0) {
218 BPFLOGD(BPF_TRUE, "failed to update pftrace stat");
219 return -1;
220 }
221
222 return 0;
223 }
224
225 static __always_inline
read_modify_update_current_type(u32 type)226 int read_modify_update_current_type(u32 type)
227 {
228 u64 pid_tgid = bpf_get_current_pid_tgid();
229 const struct pftrace_start_event_t *se_ptr = bpf_map_lookup_elem(&start_event_map, &pid_tgid);
230 struct pftrace_start_event_t start_event;
231 int err = bpf_probe_read_kernel(&start_event, sizeof(start_event), se_ptr);
232 if (err) {
233 BPFLOGW(BPF_TRUE, "failed to read pftrace start event to update event type");
234 return 0;
235 }
236 u32 old_type = start_event.type;
237 if (type) {
238 start_event.type = type;
239 }
240 return old_type;
241 }
242
243 static __always_inline
handle_return_value(struct pftrace_cmplt_event_t * cmplt_event,long long retval)244 int handle_return_value(struct pftrace_cmplt_event_t* cmplt_event, long long retval)
245 {
246 switch (read_modify_update_current_type(0)) {
247 case PF_PAGE_CACHE_HIT: {
248 struct file *fpin = (struct file *) retval;
249 if (fpin == NULL) {
250 cmplt_event->size = 0;
251 } else {
252 cmplt_event->size = 1;
253 }
254 break;
255 }
256 case PF_FILE_BACKED_IN:
257 case PF_SWAP_FROM_ZRAM:
258 case PF_SWAP_FROM_DISK:
259 case PF_ZERO_FILL_PAGE:
260 case PF_FAKE_ZERO_PAGE:
261 case PF_COPY_ON_WRITE: {
262 vm_fault_t vmf_flags = (vm_fault_t) retval;
263 if (vmf_flags & VM_FAULT_ERROR) {
264 cmplt_event->size = 0;
265 } else {
266 cmplt_event->size = 1;
267 }
268 break;
269 }
270 default: return -1;
271 }
272 return 0;
273 }
274
275 static __always_inline
emit_pftrace_event(void * ctx,int64_t retval)276 int emit_pftrace_event(void* ctx, int64_t retval)
277 {
278 // get timestamp as soon as possible
279 u64 ctime = bpf_ktime_get_ns();
280 u64 pid_tgid = bpf_get_current_pid_tgid();
281
282 const u64 event_size = sizeof(struct pftrace_cmplt_event_t);
283 struct pftrace_cmplt_event_t* cmplt_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
284 if (cmplt_event == NULL) {
285 BPFLOGD(BPF_TRUE, "failed to reserve space for pftrace event from BPF ringbuffer");
286 return -1;
287 }
288 __builtin_memset(cmplt_event, 0, event_size);
289
290 const struct pftrace_start_event_t *se_ptr = bpf_map_lookup_elem(&start_event_map, &pid_tgid);
291 int err = bpf_probe_read_kernel(&cmplt_event->start_event, sizeof(cmplt_event->start_event), se_ptr);
292 if (err) {
293 BPFLOGI(BPF_TRUE, "pftrace event discarded: failed to read pftrace start event");
294 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
295 return -1;
296 }
297 if (cmplt_event->start_event.type == 0) {
298 BPFLOGI(BPF_TRUE, "pftrace event discarded: invalide pftrace start event");
299 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
300 return -1;
301 }
302 cmplt_event->tracer = PFTRACE;
303 cmplt_event->ctime = ctime;
304 cmplt_event->pid = (u32) pid_tgid;
305 cmplt_event->tgid = (pid_tgid >> SHIFT_32);
306 cmplt_event->uid = bpf_get_current_uid_gid();
307 cmplt_event->gid = (bpf_get_current_uid_gid() >> SHIFT_32);
308 err = bpf_get_current_comm(cmplt_event->comm, MAX_COMM_LEN);
309 if (err < 0) {
310 BPFLOGD(BPF_TRUE, "pftrace event discarded: failed to get process command");
311 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
312 return -1;
313 }
314 err = handle_return_value(cmplt_event, retval);
315 if (err) {
316 BPFLOGW(BPF_TRUE, "pftrace event discarded: failed to handle pftrace return value");
317 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
318 return -1;
319 }
320
321 // get user callchain
322 if (unwind_stack()) {
323 cmplt_event->nips =
324 bpf_get_stack(ctx, cmplt_event->ips, g_stack_limit * sizeof(u64), BPF_F_USER_STACK) / sizeof(u64);
325 }
326
327 if (read_modify_update_page_fault_stats(cmplt_event->start_event.type,
328 cmplt_event->ctime - cmplt_event->start_event.stime) != 0) {
329 BPFLOGD(BPF_TRUE, "pftrace event discarded: failed to update pftrace stats");
330 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
331 return -1;
332 }
333
334 bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
335 return 0;
336 }
337
338 static __always_inline
emit_event(void * ctx,int64_t retval,u32 tracer)339 int emit_event(void* ctx, int64_t retval, u32 tracer)
340 {
341 switch (tracer) {
342 case FSTRACE: return emit_fstrace_event(ctx, retval);
343 case PFTRACE: return emit_pftrace_event(ctx, retval);
344 default:
345 BPFLOGD(BPF_TRUE, "unkonwn event source with id = %d", tracer);
346 return -1;
347 }
348 }
349
350 static __always_inline
is_target_process(const char * target_comm,const size_t comm_size)351 int is_target_process(const char* target_comm, const size_t comm_size)
352 {
353 char curr_comm[MAX_COMM_LEN] = "\0";
354 long retval = bpf_get_current_comm(curr_comm, sizeof(curr_comm));
355 if (retval == 0) {
356 size_t min_comm = comm_size < sizeof(curr_comm) ? comm_size : sizeof(curr_comm);
357 for (size_t j = 0; j <= min_comm; ++j) {
358 if (j == min_comm) {
359 char fmt[] = "current comm is %s\n";
360 bpf_trace_printk(fmt, sizeof(fmt), curr_comm);
361 return 0;
362 }
363 if (target_comm[j] != curr_comm[j]) {
364 break;
365 }
366 }
367 }
368 return -1;
369 }
370
371 // if both the pid and tgid values are - 1, check whether the event occurs in the current process
372 static __always_inline
check_current_pid(const int32_t pid,const int32_t tgid)373 int check_current_pid(const int32_t pid, const int32_t tgid)
374 {
375 u32 index = 0;
376 const int32_t* tracer_pid_ptr = bpf_map_lookup_elem(&config_var_map, &index);
377 int32_t tracer_pid = -1;
378 bpf_probe_read_kernel(&tracer_pid, sizeof(int32_t), tracer_pid_ptr);
379 int32_t curr_pid = pid;
380 if (curr_pid < 0) {
381 curr_pid = (int32_t)bpf_get_current_pid_tgid();
382 }
383 int32_t curr_tgid = tgid;
384 if (curr_tgid < 0) {
385 curr_tgid = (int32_t)(bpf_get_current_pid_tgid() >> SHIFT_32);
386 }
387 if (curr_pid == tracer_pid || curr_tgid == tracer_pid) {
388 // currrent process is not a target process
389 return -1;
390 }
391 const u32 trace_all_index = 0;
392 int32_t target_pids[MAX_TARGET_PIDS + 1];
393 __builtin_memset(target_pids, 0, sizeof(target_pids));
394 const int32_t *target_pids_ptr = bpf_map_lookup_elem(&target_pid_map, &trace_all_index);
395 if (target_pids_ptr == NULL) {
396 BPFLOGW(BPF_TRUE, "failed to lookup target pid map, will trace all processes");
397 return 0;
398 }
399 int err = bpf_probe_read_kernel(target_pids, sizeof(int32_t) * (MAX_TARGET_PIDS + 1), target_pids_ptr);
400 if (err) {
401 BPFLOGW(BPF_TRUE, "failed to read target pids, will trace all processes");
402 return 0;
403 }
404 if (target_pids[trace_all_index] != 0) {
405 return 0;
406 }
407 for (u32 index = 1; index != MAX_TARGET_PIDS; ++index) {
408 if (target_pids[index] < 0) {
409 break;
410 }
411 if (target_pids[index] == curr_pid || target_pids[index] == curr_tgid) {
412 return 0;
413 }
414 }
415 return -1;
416 }
417
418 static __always_inline
get_mountpoint_by_inode(char * filename,int len,const struct inode * host)419 size_t get_mountpoint_by_inode(char *filename, int len, const struct inode *host)
420 {
421 struct list_head *mountsHead = BPF_CORE_READ(host, i_sb, s_mounts.next);
422 struct mount *mnt = NULL;
423 mnt = list_entry(mountsHead, struct mount, mnt_instance);
424 const u32 MAX_MOUNT_POINT = 5;
425 size_t pos = 0;
426 for (u32 cnt = MAX_MOUNT_POINT; cnt != 0; --cnt) {
427 long name_len = 0;
428 const u8 *name = BPF_CORE_READ(mnt, mnt_mountpoint, d_name.name);
429 if (BPF_CORE_READ(mnt, mnt_mountpoint, d_name.len) <= 1) {
430 break;
431 }
432 name_len = bpf_probe_read_kernel_str(filename + pos, MAX_DENTRY_NAME_LEN, name);
433 if (name_len <= 1) {
434 BPFLOGD(BPF_TRUE, "failed to read dentry name from kernel stack buffer");
435 break;
436 }
437 pos += (size_t)name_len;
438 filename[pos - 1] = '/';
439 mnt = BPF_CORE_READ(mnt, mnt_parent);
440 }
441 return pos;
442 }
443
444 static __always_inline
get_filename_by_inode(char * filename,const size_t len,const struct inode * host)445 int get_filename_by_inode(char *filename, const size_t len, const struct inode *host)
446 {
447 int err = 0;
448 struct hlist_node *curr_hlist_node = BPF_CORE_READ(host, i_dentry.first);
449 if (curr_hlist_node == NULL) {
450 BPFLOGD(BPF_TRUE, "failed to get alias dentries of the inode");
451 return -1;
452 }
453
454 const struct dentry *curr_dentry = NULL;
455 const u32 MAX_ALIAS_DENTRY = 100;
456 for (u32 cnt = MAX_ALIAS_DENTRY; cnt != 0; --cnt) {
457 curr_dentry = container_of(curr_hlist_node, struct dentry, d_u);
458 struct inode *curr_inode = BPF_CORE_READ(curr_dentry, d_inode);
459 if (curr_inode == NULL) {
460 BPFLOGD(BPF_TRUE, "failed to get the current inode");
461 return -1;
462 }
463 if (curr_inode == host) {
464 break;
465 }
466 curr_hlist_node = BPF_CORE_READ(curr_hlist_node, next);
467 if (curr_hlist_node == NULL) {
468 BPFLOGD(BPF_TRUE, "failed to get the next hlist_node");
469 break;
470 }
471 }
472
473 unsigned int flags = BPF_CORE_READ(curr_dentry, d_flags);
474 flags = (flags & 0xFFFFFF) >> FILE_TYPE_BITS;
475 if (flags != DCACHE_DIRECTORY_TYPE && flags != DCACHE_REGULAR_TYPE) {
476 return 0;
477 }
478
479 size_t pos = 0;
480 const u32 MAX_BACKTRACE_DEPTH = 20;
481 for (u32 cnt = MAX_BACKTRACE_DEPTH; cnt != 0; --cnt) {
482 if (err || curr_dentry == NULL) {
483 break;
484 }
485 unsigned int name_len = BPF_CORE_READ(curr_dentry, d_name.len);
486 const u8 *name = BPF_CORE_READ(curr_dentry, d_name.name);
487 if (name_len <= 1) {
488 break;
489 }
490 long dentry_name_len = bpf_probe_read_kernel_str(filename + pos, MAX_DENTRY_NAME_LEN, name);
491 if (dentry_name_len <= 1) {
492 BPFLOGD(BPF_TRUE, "failed to read dentry name from kernel stack buffer");
493 break;
494 }
495 pos += (size_t)dentry_name_len;
496 filename[pos - 1] = '/';
497 struct dentry *temp_dentry = BPF_CORE_READ(curr_dentry, d_parent);
498 if (temp_dentry == curr_dentry || temp_dentry == NULL) {
499 break;
500 }
501 curr_dentry = temp_dentry;
502 }
503 pos += get_mountpoint_by_inode(filename + pos, len - pos, host);
504 return pos + 1;
505 }
506
507 static __always_inline
get_filename_by_bio(char * filename,const size_t len,const struct bio * bio)508 int get_filename_by_bio(char *filename, const size_t len, const struct bio *bio)
509 {
510 if (filename == NULL || len == 0 || bio == NULL) {
511 BPFLOGD(BPF_TRUE, "get_filename_by_bio() error: invalid argument");
512 return -1;
513 }
514 struct inode *host = BPF_CORE_READ(bio, bi_io_vec, bv_page, mapping, host);
515 if (host == NULL) {
516 BPFLOGD(BPF_TRUE, "failed to get the bio associated inode");
517 return -1;
518 }
519
520 return get_filename_by_inode(filename, len, host);
521 }
522
523 static __always_inline
get_file_by_fd(const struct files_struct * files,const unsigned int fd)524 struct file* get_file_by_fd(const struct files_struct *files, const unsigned int fd)
525 {
526 if (files == NULL) {
527 BPFLOGD(BPF_TRUE, "get_file_by_fd() error: invalid argument");
528 return NULL;
529 }
530 if (fd >= BPF_CORE_READ(files, fdt, max_fds)) {
531 BPFLOGD(BPF_TRUE, "get_file_by_fd() error: invalid argument");
532 return NULL;
533 }
534 struct file **fd_array = BPF_CORE_READ(files, fdt, fd);
535 if (fd_array == NULL) {
536 BPFLOGD(BPF_TRUE, "failed to get fd array");
537 return NULL;
538 }
539 struct file *filp = NULL;
540 int err = bpf_probe_read_kernel(&filp, sizeof(filp), &fd_array[fd]);
541 if (err || filp == NULL) {
542 BPFLOGD(BPF_TRUE, "failed to get file");
543 return NULL;
544 }
545 return filp;
546 }
547
548 static __always_inline
get_filename_by_file(char * filename,const size_t len,const struct file * filp)549 int get_filename_by_file(char *filename, const size_t len, const struct file *filp)
550 {
551 if (filename == NULL || filp == NULL || len == 0) {
552 BPFLOGD(BPF_TRUE, "get_filename_by_file() error: invalid argument");
553 return -1;
554 }
555 struct inode *f_inode = BPF_CORE_READ(filp, f_inode);
556 if (f_inode == NULL) {
557 BPFLOGD(BPF_TRUE, "failed to get inode");
558 return -1;
559 }
560 return get_filename_by_inode(filename, len, f_inode);
561 }
562
563 static __always_inline
emit_strtrace_event(u64 stime,u32 type,const void * addr,u32 stracer)564 int emit_strtrace_event(u64 stime, u32 type, const void *addr, u32 stracer)
565 {
566 if (addr == NULL) {
567 BPFLOGD(BPF_TRUE, "strtrace event discarded: invalid argument");
568 return -1;
569 }
570 const u64 event_size = sizeof(struct strtrace_cmplt_event_t);
571 struct strtrace_cmplt_event_t *cmplt_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
572 if (cmplt_event == NULL) {
573 BPFLOGD(BPF_TRUE, "failed to reserve space for strtrace event from BPF ringbuffer");
574 return -1;
575 }
576 __builtin_memset(cmplt_event, 0, event_size);
577 cmplt_event->tracer = STRTRACE;
578 cmplt_event->start_event.type = type;
579 cmplt_event->start_event.stracer = stracer;
580 cmplt_event->start_event.stime = stime;
581 cmplt_event->start_event.addr = addr;
582 cmplt_event->pid = bpf_get_current_pid_tgid();
583 cmplt_event->tgid = (bpf_get_current_pid_tgid() >> SHIFT_32);
584 int err = 0;
585 switch (stracer) {
586 case BIOTRACE: {
587 err = get_filename_by_bio(cmplt_event->filename, MAX_FILENAME_LEN, addr);
588 break;
589 }
590 case FSTRACE: {
591 if (type == SYS_OPENAT2) {
592 err = bpf_probe_read_user_str(cmplt_event->filename, MAX_FILENAME_LEN, addr);
593 break;
594 }
595 if (type == SYS_CLOSE) {
596 const struct sys_close_args_t *args = (const struct sys_close_args_t *) addr;
597 const struct file *filp = get_file_by_fd(args->files, args->fd);
598 err = get_filename_by_file(cmplt_event->filename, MAX_FILENAME_LEN, filp);
599 break;
600 }
601 BPFLOGD(BPF_TRUE, "strtrace event discarded: bad source event type = %d of fstrace", type);
602 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
603 return -1;
604 }
605 default: {
606 BPFLOGD(BPF_TRUE, "strtrace event discarded: bad strtrace source tracer = %d", stracer);
607 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
608 return -1;
609 }
610 }
611
612 if (err <= 0) {
613 BPFLOGD(BPF_TRUE, "strtrace event discarded: failed to read path for tracer = %d", stracer);
614 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
615 return -1;
616 }
617 cmplt_event->len = (__u32)err;
618 bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
619 return 0;
620 }
621
622 static __always_inline
get_biotrace_event_type_by_flags(unsigned int cmd_flags)623 u32 get_biotrace_event_type_by_flags(unsigned int cmd_flags)
624 {
625 if (cmd_flags & REQ_META) {
626 if ((cmd_flags & REQ_OP_MASK) == REQ_OP_READ) {
627 return BIO_METADATA_READ;
628 }
629 if ((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE) {
630 return BIO_METADATA_WRITE;
631 }
632 return 0;
633 }
634 if (cmd_flags & REQ_SWAP) {
635 if ((cmd_flags & REQ_OP_MASK) == REQ_OP_READ) {
636 return BIO_PAGE_IN;
637 }
638 if ((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE) {
639 return BIO_PAGE_OUT;
640 }
641 return 0;
642 }
643 if ((cmd_flags & REQ_OP_MASK) == REQ_OP_READ) {
644 return BIO_DATA_READ;
645 }
646 if ((cmd_flags & REQ_OP_MASK) == REQ_OP_WRITE) {
647 return BIO_DATA_WRITE;
648 }
649 return 0;
650 }
651 /***************************** inline funcs END **********************************/
652
653 /***************************** pftrace BPF progs BEGING *****************************/
654 SEC("kprobe/__do_fault")
BPF_KPROBE(__do_fault_entry,struct vm_fault * vmf)655 int BPF_KPROBE(__do_fault_entry, struct vm_fault *vmf)
656 {
657 if (check_current_pid(-1, -1) != 0) {
658 // not any one of target processes, skip it
659 return 0;
660 }
661 return handle_pftrace_start_event(vmf, (u32) PF_FILE_BACKED_IN);
662 }
663
664 SEC("kretprobe/__do_fault")
BPF_KRETPROBE(__do_fault_exit,int64_t vmf_flags)665 int BPF_KRETPROBE(__do_fault_exit, int64_t vmf_flags)
666 {
667 if (check_current_pid(-1, -1) != 0) {
668 // not any one of target processes, skip it
669 return 0;
670 }
671 return emit_event(ctx, vmf_flags, PFTRACE);
672 }
673
674 SEC("kprobe/do_swap_page")
BPF_KPROBE(do_swap_page_entry,struct vm_fault * vmf)675 int BPF_KPROBE(do_swap_page_entry, struct vm_fault *vmf)
676 {
677 if (check_current_pid(-1, -1) != 0) {
678 // not any one of target processes, skip it
679 return 0;
680 }
681 return handle_pftrace_start_event(vmf, (u32) PF_SWAP_FROM_DISK);
682 }
683
684 SEC("kretprobe/do_swap_page")
BPF_KRETPROBE(do_swap_page_exit,int64_t vmf_flags)685 int BPF_KRETPROBE(do_swap_page_exit, int64_t vmf_flags)
686 {
687 if (check_current_pid(-1, -1) != 0) {
688 // not any one of target processes, skip it
689 return 0;
690 }
691 return emit_event(ctx, vmf_flags, PFTRACE);
692 }
693
694 SEC("kprobe/do_wp_page")
BPF_KPROBE(do_wp_page_entry,struct vm_fault * vmf)695 int BPF_KPROBE(do_wp_page_entry, struct vm_fault *vmf)
696 {
697 if (check_current_pid(-1, -1) != 0) {
698 // not any one of target processes, skip it
699 return 0;
700 }
701 return handle_pftrace_start_event(vmf, (u32) PF_COPY_ON_WRITE);
702 }
703
704 SEC("kretprobe/do_wp_page")
BPF_KRETPROBE(do_wp_page_exit,int64_t vmf_flags)705 int BPF_KRETPROBE(do_wp_page_exit, int64_t vmf_flags)
706 {
707 if (check_current_pid(-1, -1) != 0) {
708 // not any one of target processes, skip it
709 return 0;
710 }
711 return emit_event(ctx, vmf_flags, PFTRACE);
712 }
713 /*************************** pftrace BPF progs END *******************************/
714
715 /**************************** bio BPF progs BEGING *******************************/
716 static __always_inline
handle_blk_issue(struct request * rq)717 int handle_blk_issue(struct request *rq)
718 {
719 if (rq == NULL) {
720 BPFLOGD(BPF_TRUE, "request is NULL");
721 return 0;
722 }
723 const u64 start_event_map_key = (const u64)rq;
724 struct start_event_t start_event = {};
725 __builtin_memset(&start_event, 0, sizeof(start_event));
726 struct biotrace_start_event_t *bio_se = &start_event.bio_se;
727 bio_se->stime = bpf_ktime_get_ns();
728 bio_se->type = get_biotrace_event_type_by_flags(BPF_CORE_READ(rq, cmd_flags));
729 if (bio_se->type == 0) {
730 BPFLOGD(BPF_TRUE, "failed to get biotrace event type");
731 return 0;
732 }
733 emit_strtrace_event(bio_se->stime, bio_se->type, BPF_CORE_READ(rq, bio), BIOTRACE);
734 u64 tgid_pid = bpf_get_current_pid_tgid();
735 bio_se->pid = (u32) tgid_pid;
736 bio_se->tgid = (u32) (tgid_pid >> SHIFT_32);
737 bpf_get_current_comm(bio_se->comm, MAX_COMM_LEN);
738 bio_se->size = BPF_CORE_READ(rq, bio, bi_iter.bi_size);
739 bpf_map_update_elem(&start_event_map, &start_event_map_key, &start_event, BPF_ANY);
740 return 0;
741 }
742
743 SEC("tp_btf/block_rq_issue")
block_issue(u64 * ctx)744 int block_issue(u64 *ctx)
745 {
746 if (check_current_pid(-1, -1) != 0) {
747 return 0;
748 }
749 if (LINUX_KERNEL_VERSION < KERNEL_VERSION(5, 10, 137)) {
750 return handle_blk_issue((void *)ctx[1]);
751 } else {
752 return handle_blk_issue((void *)ctx[0]);
753 }
754 }
755
756 SEC("kprobe/blk_update_request")
BPF_PROG(blk_update_request,struct request * rq)757 int BPF_PROG(blk_update_request, struct request *rq)
758 {
759 if (check_current_pid(-1, -1) != 0) {
760 return 0;
761 }
762 u64 ctime = bpf_ktime_get_ns();
763 const u64 event_size = sizeof(struct biotrace_cmplt_event_t);
764 struct biotrace_cmplt_event_t *cmplt_event= bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
765 if (cmplt_event == NULL) {
766 BPFLOGD(BPF_TRUE, "failed to reserve space for biotrace event from BPF ringbuffer");
767 return 0;
768 }
769 __builtin_memset(cmplt_event, 0, event_size);
770 const u64 start_event_map_key = (const u64)rq;
771 const struct biotrace_start_event_t *start_event = bpf_map_lookup_elem(&start_event_map, &start_event_map_key);
772 if (start_event == NULL) {
773 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
774 return 0;
775 }
776 cmplt_event->start_event = *start_event;
777 if (cmplt_event->start_event.type == 0) {
778 BPFLOGI(BPF_TRUE, "biotrace event discarded: invalide biotrace start event");
779 bpf_ringbuf_discard(cmplt_event, BPF_RB_NO_WAKEUP);
780 return 0;
781 }
782 cmplt_event->tracer = BIOTRACE;
783 cmplt_event->ctime = ctime;
784 cmplt_event->prio = BPF_CORE_READ(rq, bio, bi_ioprio);
785 cmplt_event->blkcnt = BPF_CORE_READ(rq, bio, bi_iter.bi_sector);
786 if (unwind_stack()) {
787 cmplt_event->nips = bpf_get_stack(ctx, cmplt_event->ips, g_stack_limit * sizeof(u64), 0) / sizeof(u64);
788 }
789 bpf_ringbuf_submit(cmplt_event, BPF_RB_FORCE_WAKEUP);
790 return 0;
791 }
792 /*************************** bio BPF progs END *******************************/
793
794 /*************************** user BPF progs START ****************************/
795 SEC("uretprobe//system/lib/ld-musl-aarch64.so.1:dlopen")
BPF_KRETPROBE(uretprobe_dlopen,void * ret)796 int BPF_KRETPROBE(uretprobe_dlopen, void *ret)
797 {
798 if (check_current_pid(-1, -1) != 0) {
799 return 0;
800 }
801 if (ret == NULL) {
802 return 0;
803 }
804
805 const u64 event_size = sizeof(struct dlopen_trace_start_event_t);
806 struct dlopen_trace_start_event_t *start_event = bpf_ringbuf_reserve(&bpf_ringbuf_map, event_size, 0);
807 if (start_event == NULL) {
808 BPFLOGD(BPF_TRUE, "failed to reserve space for biotrace event from BPF ringbuffer");
809 return -1;
810 }
811 start_event->type = DLOPEN_TRACE;
812 u64 tgid_pid = bpf_get_current_pid_tgid();
813 start_event->tgid = (u32)(tgid_pid >> SHIFT_32);
814 bpf_ringbuf_submit(start_event, BPF_RB_FORCE_WAKEUP);
815 return 0;
816 }
817 /*************************** user BPF progs END ****************************/
818 #include "fstrace_progs.h"
819
820 char _license[] SEC("license") = "GPL";
821 u32 _version SEC("version") = LINUX_VERSION_CODE;
822