1 /* 2 * time_in_state eBPF program 3 * 4 * Copyright (C) 2018 Google 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License version 8 * 2 as published by the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * 15 */ 16 17 #include <bpf_helpers.h> 18 #include <bpf_timeinstate.h> 19 20 DEFINE_BPF_MAP_GRW(total_time_in_state_map, PERCPU_ARRAY, uint32_t, uint64_t, MAX_FREQS_FOR_TOTAL, 21 AID_SYSTEM) 22 23 DEFINE_BPF_MAP_GRW(uid_time_in_state_map, PERCPU_HASH, time_key_t, tis_val_t, 1024, AID_SYSTEM) 24 25 DEFINE_BPF_MAP_GRW(uid_concurrent_times_map, PERCPU_HASH, time_key_t, concurrent_val_t, 1024, AID_SYSTEM) 26 DEFINE_BPF_MAP_GRW(uid_last_update_map, HASH, uint32_t, uint64_t, 1024, AID_SYSTEM) 27 28 DEFINE_BPF_MAP_GWO(cpu_last_update_map, PERCPU_ARRAY, uint32_t, uint64_t, 1, AID_SYSTEM) 29 30 DEFINE_BPF_MAP_GWO(cpu_policy_map, ARRAY, uint32_t, uint32_t, 1024, AID_SYSTEM) 31 DEFINE_BPF_MAP_GWO(policy_freq_idx_map, ARRAY, uint32_t, uint8_t, 1024, AID_SYSTEM) 32 33 DEFINE_BPF_MAP_GWO(freq_to_idx_map, HASH, freq_idx_key_t, uint8_t, 2048, AID_SYSTEM) 34 35 DEFINE_BPF_MAP_GWO(nr_active_map, ARRAY, uint32_t, uint32_t, 1, AID_SYSTEM) 36 DEFINE_BPF_MAP_GWO(policy_nr_active_map, ARRAY, uint32_t, uint32_t, 1024, AID_SYSTEM) 37 38 DEFINE_BPF_MAP_GWO(pid_tracked_hash_map, HASH, uint32_t, pid_t, MAX_TRACKED_PIDS, AID_SYSTEM) 39 DEFINE_BPF_MAP_GWO(pid_tracked_map, ARRAY, uint32_t, tracked_pid_t, MAX_TRACKED_PIDS, AID_SYSTEM) 40 DEFINE_BPF_MAP_GWO(pid_task_aggregation_map, HASH, pid_t, uint16_t, 1024, AID_SYSTEM) 41 DEFINE_BPF_MAP_GRO(pid_time_in_state_map, PERCPU_HASH, aggregated_task_tis_key_t, tis_val_t, 1024, 42 AID_SYSTEM) 43 44 struct switch_args { 45 unsigned long long ignore; 46 char prev_comm[16]; 47 int prev_pid; 48 int prev_prio; 49 long long prev_state; 50 char next_comm[16]; 51 int next_pid; 52 int next_prio; 53 }; 54 55 DEFINE_BPF_PROG("tracepoint/sched/sched_switch", AID_ROOT, AID_SYSTEM, tp_sched_switch) 56 (struct switch_args* args) { 57 const int ALLOW = 1; // return 1 to avoid blocking simpleperf from receiving events. 58 uint32_t zero = 0; 59 uint64_t* last = bpf_cpu_last_update_map_lookup_elem(&zero); 60 if (!last) return ALLOW; 61 uint64_t old_last = *last; 62 uint64_t time = bpf_ktime_get_ns(); 63 *last = time; 64 65 uint32_t* active = bpf_nr_active_map_lookup_elem(&zero); 66 if (!active) return ALLOW; 67 68 uint32_t cpu = bpf_get_smp_processor_id(); 69 uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu); 70 if (!policyp) return ALLOW; 71 uint32_t policy = *policyp; 72 73 uint32_t* policy_active = bpf_policy_nr_active_map_lookup_elem(&policy); 74 if (!policy_active) return ALLOW; 75 76 uint32_t nactive = *active - 1; 77 uint32_t policy_nactive = *policy_active - 1; 78 79 if (!args->prev_pid || (!old_last && args->next_pid)) { 80 __sync_fetch_and_add(active, 1); 81 __sync_fetch_and_add(policy_active, 1); 82 } 83 84 // Return here in 2 scenarios: 85 // 1) prev_pid == 0, so we're exiting idle. No UID stats need updating, and active CPUs can't be 86 // decreasing. 87 // 2) old_last == 0, so this is the first time we've seen this CPU. Any delta will be invalid, 88 // and our active CPU counts don't include this CPU yet so we shouldn't decrement them even 89 // if we're going idle. 90 if (!args->prev_pid || !old_last) return ALLOW; 91 92 if (!args->next_pid) { 93 __sync_fetch_and_add(active, -1); 94 __sync_fetch_and_add(policy_active, -1); 95 } 96 97 uint8_t* freq_idxp = bpf_policy_freq_idx_map_lookup_elem(&policy); 98 if (!freq_idxp || !*freq_idxp) return ALLOW; 99 // freq_to_idx_map uses 1 as its minimum index so that *freq_idxp == 0 only when uninitialized 100 uint8_t freq_idx = *freq_idxp - 1; 101 102 uint32_t uid = bpf_get_current_uid_gid(); 103 time_key_t key = {.uid = uid, .bucket = freq_idx / FREQS_PER_ENTRY}; 104 tis_val_t* val = bpf_uid_time_in_state_map_lookup_elem(&key); 105 if (!val) { 106 tis_val_t zero_val = {.ar = {0}}; 107 bpf_uid_time_in_state_map_update_elem(&key, &zero_val, BPF_NOEXIST); 108 val = bpf_uid_time_in_state_map_lookup_elem(&key); 109 } 110 uint64_t delta = time - old_last; 111 if (val) val->ar[freq_idx % FREQS_PER_ENTRY] += delta; 112 113 // Add delta to total. 114 const uint32_t total_freq_idx = freq_idx < MAX_FREQS_FOR_TOTAL ? freq_idx : 115 MAX_FREQS_FOR_TOTAL - 1; 116 uint64_t* total = bpf_total_time_in_state_map_lookup_elem(&total_freq_idx); 117 if (total) *total += delta; 118 119 const int pid = args->prev_pid; 120 const pid_t tgid = bpf_get_current_pid_tgid() >> 32; 121 bool is_tgid_tracked = false; 122 123 // eBPF verifier does not currently allow loops. 124 // Instruct the C compiler to unroll the loop into a series of steps. 125 #pragma unroll 126 for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) { 127 const uint32_t key = index; 128 tracked_pid_t* tracked_pid = bpf_pid_tracked_map_lookup_elem(&key); 129 if (!tracked_pid) continue; 130 if (tracked_pid->state == TRACKED_PID_STATE_UNUSED) { 131 // Reached the end of the list 132 break; 133 } 134 135 if (tracked_pid->state == TRACKED_PID_STATE_ACTIVE && tracked_pid->pid == tgid) { 136 is_tgid_tracked = true; 137 break; 138 } 139 } 140 141 if (is_tgid_tracked) { 142 // If this process is marked for time-in-state tracking, aggregate the CPU time-in-state 143 // with other threads sharing the same TGID and aggregation key. 144 uint16_t* aggregation_key = bpf_pid_task_aggregation_map_lookup_elem(&pid); 145 aggregated_task_tis_key_t task_key = { 146 .tgid = tgid, 147 .aggregation_key = aggregation_key ? *aggregation_key : 0, 148 .bucket = freq_idx / FREQS_PER_ENTRY}; 149 tis_val_t* task_val = bpf_pid_time_in_state_map_lookup_elem(&task_key); 150 if (!task_val) { 151 tis_val_t zero_val = {.ar = {0}}; 152 bpf_pid_time_in_state_map_update_elem(&task_key, &zero_val, BPF_NOEXIST); 153 task_val = bpf_pid_time_in_state_map_lookup_elem(&task_key); 154 } 155 if (task_val) task_val->ar[freq_idx % FREQS_PER_ENTRY] += delta; 156 } 157 key.bucket = nactive / CPUS_PER_ENTRY; 158 concurrent_val_t* ct = bpf_uid_concurrent_times_map_lookup_elem(&key); 159 if (!ct) { 160 concurrent_val_t zero_val = {.active = {0}, .policy = {0}}; 161 bpf_uid_concurrent_times_map_update_elem(&key, &zero_val, BPF_NOEXIST); 162 ct = bpf_uid_concurrent_times_map_lookup_elem(&key); 163 } 164 if (ct) ct->active[nactive % CPUS_PER_ENTRY] += delta; 165 166 if (policy_nactive / CPUS_PER_ENTRY != key.bucket) { 167 key.bucket = policy_nactive / CPUS_PER_ENTRY; 168 ct = bpf_uid_concurrent_times_map_lookup_elem(&key); 169 if (!ct) { 170 concurrent_val_t zero_val = {.active = {0}, .policy = {0}}; 171 bpf_uid_concurrent_times_map_update_elem(&key, &zero_val, BPF_NOEXIST); 172 ct = bpf_uid_concurrent_times_map_lookup_elem(&key); 173 } 174 } 175 if (ct) ct->policy[policy_nactive % CPUS_PER_ENTRY] += delta; 176 uint64_t* uid_last_update = bpf_uid_last_update_map_lookup_elem(&uid); 177 if (uid_last_update) { 178 *uid_last_update = time; 179 } else { 180 bpf_uid_last_update_map_update_elem(&uid, &time, BPF_NOEXIST); 181 } 182 return ALLOW; 183 } 184 185 struct cpufreq_args { 186 unsigned long long ignore; 187 unsigned int state; 188 unsigned int cpu_id; 189 }; 190 191 DEFINE_BPF_PROG("tracepoint/power/cpu_frequency", AID_ROOT, AID_SYSTEM, tp_cpufreq) 192 (struct cpufreq_args* args) { 193 uint32_t cpu = args->cpu_id; 194 unsigned int new = args->state; 195 uint32_t* policyp = bpf_cpu_policy_map_lookup_elem(&cpu); 196 if (!policyp) return 0; 197 uint32_t policy = *policyp; 198 freq_idx_key_t key = {.policy = policy, .freq = new}; 199 uint8_t* idxp = bpf_freq_to_idx_map_lookup_elem(&key); 200 if (!idxp) return 0; 201 uint8_t idx = *idxp; 202 bpf_policy_freq_idx_map_update_elem(&policy, &idx, BPF_ANY); 203 return 0; 204 } 205 206 // The format of the sched/sched_process_free event is described in 207 // adb shell cat /d/tracing/events/sched/sched_process_free/format 208 struct sched_process_free_args { 209 unsigned long long ignore; 210 char comm[16]; 211 pid_t pid; 212 int prio; 213 }; 214 215 DEFINE_BPF_PROG("tracepoint/sched/sched_process_free", AID_ROOT, AID_SYSTEM, tp_sched_process_free) 216 (struct sched_process_free_args* args) { 217 const int ALLOW = 1; 218 219 int pid = args->pid; 220 bool is_last = true; 221 222 // eBPF verifier does not currently allow loops. 223 // Instruct the C compiler to unroll the loop into a series of steps. 224 #pragma unroll 225 for (uint32_t index = 0; index < MAX_TRACKED_PIDS; index++) { 226 const uint32_t key = MAX_TRACKED_PIDS - index - 1; 227 tracked_pid_t* tracked_pid = bpf_pid_tracked_map_lookup_elem(&key); 228 if (!tracked_pid) continue; 229 if (tracked_pid->pid == pid) { 230 tracked_pid->pid = 0; 231 tracked_pid->state = is_last ? TRACKED_PID_STATE_UNUSED : TRACKED_PID_STATE_EXITED; 232 bpf_pid_tracked_hash_map_delete_elem(&key); 233 break; 234 } 235 if (tracked_pid->state == TRACKED_PID_STATE_ACTIVE) { 236 is_last = false; 237 } 238 } 239 240 bpf_pid_task_aggregation_map_delete_elem(&pid); 241 return ALLOW; 242 } 243 244 LICENSE("GPL"); 245