// SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2020 Wenbo Zhang #include #include #include #include #include "biosnoop.h" #define MAX_ENTRIES 10240 const volatile bool filter_cg = false; const volatile bool targ_queued = false; const volatile bool filter_dev = false; const volatile __u32 targ_dev = 0; extern __u32 LINUX_KERNEL_VERSION __kconfig; struct { __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); __type(key, u32); __type(value, u32); __uint(max_entries, 1); } cgroup_map SEC(".maps"); struct piddata { char comm[TASK_COMM_LEN]; u32 pid; }; struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, MAX_ENTRIES); __type(key, struct request *); __type(value, struct piddata); __uint(map_flags, BPF_F_NO_PREALLOC); } infobyreq SEC(".maps"); struct stage { u64 insert; u64 issue; __u32 dev; }; struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(max_entries, MAX_ENTRIES); __type(key, struct request *); __type(value, struct stage); } start SEC(".maps"); struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); __uint(key_size, sizeof(u32)); __uint(value_size, sizeof(u32)); } events SEC(".maps"); static __always_inline int trace_pid(struct request *rq) { u64 id = bpf_get_current_pid_tgid(); struct piddata piddata = {}; piddata.pid = id >> 32; bpf_get_current_comm(&piddata.comm, sizeof(&piddata.comm)); bpf_map_update_elem(&infobyreq, &rq, &piddata, 0); return 0; } SEC("fentry/blk_account_io_start") int BPF_PROG(blk_account_io_start, struct request *rq) { if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) return 0; return trace_pid(rq); } SEC("kprobe/blk_account_io_merge_bio") int BPF_KPROBE(blk_account_io_merge_bio, struct request *rq) { if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) return 0; return trace_pid(rq); } static __always_inline int trace_rq_start(struct request *rq, bool insert) { struct stage *stagep, stage = {}; u64 ts = bpf_ktime_get_ns(); stagep = bpf_map_lookup_elem(&start, &rq); if (!stagep) { struct gendisk *disk = BPF_CORE_READ(rq, rq_disk); stage.dev = disk ? MKDEV(BPF_CORE_READ(disk, major), BPF_CORE_READ(disk, first_minor)) : 0; if (filter_dev && targ_dev != stage.dev) return 0; stagep = &stage; } if (insert) stagep->insert = ts; else stagep->issue = ts; if (stagep == &stage) bpf_map_update_elem(&start, &rq, stagep, 0); return 0; } SEC("tp_btf/block_rq_insert") int BPF_PROG(block_rq_insert) { if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) return 0; /** * commit a54895fa (v5.11-rc1) changed tracepoint argument list * from TP_PROTO(struct request_queue *q, struct request *rq) * to TP_PROTO(struct request *rq) */ if (LINUX_KERNEL_VERSION > KERNEL_VERSION(5, 10, 0)) return trace_rq_start((void *)ctx[0], true); else return trace_rq_start((void *)ctx[1], true); } SEC("tp_btf/block_rq_issue") int BPF_PROG(block_rq_issue) { if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) return 0; /** * commit a54895fa (v5.11-rc1) changed tracepoint argument list * from TP_PROTO(struct request_queue *q, struct request *rq) * to TP_PROTO(struct request *rq) */ if (LINUX_KERNEL_VERSION > KERNEL_VERSION(5, 10, 0)) return trace_rq_start((void *)ctx[0], false); else return trace_rq_start((void *)ctx[1], false); } SEC("tp_btf/block_rq_complete") int BPF_PROG(block_rq_complete, struct request *rq, int error, unsigned int nr_bytes) { if (filter_cg && !bpf_current_task_under_cgroup(&cgroup_map, 0)) return 0; u64 ts = bpf_ktime_get_ns(); struct piddata *piddatap; struct event event = {}; struct stage *stagep; s64 delta; stagep = bpf_map_lookup_elem(&start, &rq); if (!stagep) return 0; delta = (s64)(ts - stagep->issue); if (delta < 0) goto cleanup; piddatap = bpf_map_lookup_elem(&infobyreq, &rq); if (!piddatap) { event.comm[0] = '?'; } else { __builtin_memcpy(&event.comm, piddatap->comm, sizeof(event.comm)); event.pid = piddatap->pid; } event.delta = delta; if (targ_queued && BPF_CORE_READ(rq, q, elevator)) { if (!stagep->insert) event.qdelta = -1; /* missed or don't insert entry */ else event.qdelta = stagep->issue - stagep->insert; } event.ts = ts; event.sector = rq->__sector; event.len = rq->__data_len; event.cmd_flags = rq->cmd_flags; event.dev = stagep->dev; bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, &event, sizeof(event)); cleanup: bpf_map_delete_elem(&start, &rq); bpf_map_delete_elem(&infobyreq, &rq); return 0; } char LICENSE[] SEC("license") = "GPL";