1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/importers/ftrace/sched_event_tracker.h"
18
19 #include <math.h>
20
21 #include "perfetto/ext/base/utils.h"
22 #include "src/trace_processor/importers/common/args_tracker.h"
23 #include "src/trace_processor/importers/common/event_tracker.h"
24 #include "src/trace_processor/importers/common/process_tracker.h"
25 #include "src/trace_processor/importers/common/system_info_tracker.h"
26 #include "src/trace_processor/importers/ftrace/ftrace_descriptors.h"
27 #include "src/trace_processor/importers/ftrace/thread_state_tracker.h"
28 #include "src/trace_processor/storage/stats.h"
29 #include "src/trace_processor/types/task_state.h"
30 #include "src/trace_processor/types/trace_processor_context.h"
31 #include "src/trace_processor/types/variadic.h"
32
33 #include "protos/perfetto/trace/ftrace/ftrace_event.pbzero.h"
34 #include "protos/perfetto/trace/ftrace/sched.pbzero.h"
35
36 namespace perfetto {
37 namespace trace_processor {
38
SchedEventTracker(TraceProcessorContext * context)39 SchedEventTracker::SchedEventTracker(TraceProcessorContext* context)
40 : waker_utid_id_(context->storage->InternString("waker_utid")),
41 context_(context) {
42 // pre-parse sched_switch
43 auto* switch_descriptor = GetMessageDescriptorForId(
44 protos::pbzero::FtraceEvent::kSchedSwitchFieldNumber);
45 PERFETTO_CHECK(switch_descriptor->max_field_id == kSchedSwitchMaxFieldId);
46
47 for (size_t i = 1; i <= kSchedSwitchMaxFieldId; i++) {
48 sched_switch_field_ids_[i] =
49 context->storage->InternString(switch_descriptor->fields[i].name);
50 }
51 sched_switch_id_ = context->storage->InternString(switch_descriptor->name);
52
53 // pre-parse sched_waking
54 auto* waking_descriptor = GetMessageDescriptorForId(
55 protos::pbzero::FtraceEvent::kSchedWakingFieldNumber);
56 PERFETTO_CHECK(waking_descriptor->max_field_id == kSchedWakingMaxFieldId);
57
58 for (size_t i = 1; i <= kSchedWakingMaxFieldId; i++) {
59 sched_waking_field_ids_[i] =
60 context->storage->InternString(waking_descriptor->fields[i].name);
61 }
62 sched_waking_id_ = context->storage->InternString(waking_descriptor->name);
63
64 // Pre-allocate space for 128 CPUs, which should be enough for most hosts.
65 // It's OK if this number is too small, the vector will be grown on-demand.
66 pending_sched_per_cpu_.reserve(128);
67 }
68
69 SchedEventTracker::~SchedEventTracker() = default;
70
PushSchedSwitch(uint32_t cpu,int64_t ts,uint32_t prev_pid,base::StringView prev_comm,int32_t prev_prio,int64_t prev_state,uint32_t next_pid,base::StringView next_comm,int32_t next_prio)71 void SchedEventTracker::PushSchedSwitch(uint32_t cpu,
72 int64_t ts,
73 uint32_t prev_pid,
74 base::StringView prev_comm,
75 int32_t prev_prio,
76 int64_t prev_state,
77 uint32_t next_pid,
78 base::StringView next_comm,
79 int32_t next_prio) {
80 // At this stage all events should be globally timestamp ordered.
81 if (ts < context_->event_tracker->max_timestamp()) {
82 PERFETTO_ELOG(
83 "sched_switch event out of order by %.4f ms, skipping",
84 static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
85 1e6);
86 context_->storage->IncrementStats(stats::sched_switch_out_of_order);
87 return;
88 }
89 context_->event_tracker->UpdateMaxTimestamp(ts);
90
91 StringId next_comm_id = context_->storage->InternString(next_comm);
92 UniqueTid next_utid = context_->process_tracker->UpdateThreadName(
93 next_pid, next_comm_id, ThreadNamePriority::kFtrace);
94
95 // First use this data to close the previous slice.
96 bool prev_pid_match_prev_next_pid = false;
97 auto* pending_sched = PendingSchedByCPU(cpu);
98 uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
99 StringId prev_state_string_id = TaskStateToStringId(prev_state);
100 if (prev_state_string_id == kNullStringId) {
101 context_->storage->IncrementStats(stats::task_state_invalid);
102 }
103 if (pending_slice_idx < std::numeric_limits<uint32_t>::max()) {
104 prev_pid_match_prev_next_pid = prev_pid == pending_sched->last_pid;
105 if (PERFETTO_LIKELY(prev_pid_match_prev_next_pid)) {
106 ClosePendingSlice(pending_slice_idx, ts, prev_state_string_id);
107 } else {
108 // If the pids are not consistent, make a note of this.
109 context_->storage->IncrementStats(stats::mismatched_sched_switch_tids);
110 }
111 }
112
113 // We have to intern prev_comm again because our assumption that
114 // this event's |prev_comm| == previous event's |next_comm| does not hold
115 // if the thread changed its name while scheduled.
116 StringId prev_comm_id = context_->storage->InternString(prev_comm);
117 UniqueTid prev_utid = context_->process_tracker->UpdateThreadName(
118 prev_pid, prev_comm_id, ThreadNamePriority::kFtrace);
119
120 auto new_slice_idx = AddRawEventAndStartSlice(
121 cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio, prev_state,
122 next_utid, next_pid, next_comm_id, next_prio);
123
124 // Finally, update the info for the next sched switch on this CPU.
125 pending_sched->pending_slice_storage_idx = new_slice_idx;
126 pending_sched->last_pid = next_pid;
127 pending_sched->last_utid = next_utid;
128 pending_sched->last_prio = next_prio;
129
130 // Update the ThreadState table.
131 ThreadStateTracker::GetOrCreate(context_)->PushSchedSwitchEvent(
132 ts, cpu, prev_utid, prev_state_string_id, next_utid);
133 }
134
PushSchedSwitchCompact(uint32_t cpu,int64_t ts,int64_t prev_state,uint32_t next_pid,int32_t next_prio,StringId next_comm_id)135 void SchedEventTracker::PushSchedSwitchCompact(uint32_t cpu,
136 int64_t ts,
137 int64_t prev_state,
138 uint32_t next_pid,
139 int32_t next_prio,
140 StringId next_comm_id) {
141 // At this stage all events should be globally timestamp ordered.
142 if (ts < context_->event_tracker->max_timestamp()) {
143 PERFETTO_ELOG(
144 "sched_switch event out of order by %.4f ms, skipping",
145 static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
146 1e6);
147 context_->storage->IncrementStats(stats::sched_switch_out_of_order);
148 return;
149 }
150 context_->event_tracker->UpdateMaxTimestamp(ts);
151
152 UniqueTid next_utid = context_->process_tracker->UpdateThreadName(
153 next_pid, next_comm_id, ThreadNamePriority::kFtrace);
154
155 auto* pending_sched = PendingSchedByCPU(cpu);
156
157 // If we're processing the first compact event for this cpu, don't start a
158 // slice since we're missing the "prev_*" fields. The successive events will
159 // create slices as normal, but the first per-cpu switch is effectively
160 // discarded.
161 if (pending_sched->last_utid == std::numeric_limits<UniqueTid>::max()) {
162 context_->storage->IncrementStats(stats::compact_sched_switch_skipped);
163
164 pending_sched->last_pid = next_pid;
165 pending_sched->last_utid = next_utid;
166 pending_sched->last_prio = next_prio;
167 // Note: no pending slice, so leave |pending_slice_storage_idx| in its
168 // invalid state.
169 return;
170 }
171
172 // Close the pending slice if any (we won't have one when processing the first
173 // two compact events for a given cpu).
174 uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
175 StringId prev_state_string_id = TaskStateToStringId(prev_state);
176 if (prev_state_string_id == kNullStringId) {
177 context_->storage->IncrementStats(stats::task_state_invalid);
178 }
179 if (pending_slice_idx < std::numeric_limits<uint32_t>::max())
180 ClosePendingSlice(pending_slice_idx, ts, prev_state_string_id);
181
182 // Use the previous event's values to infer this event's "prev_*" fields.
183 // There are edge cases, but this assumption should still produce sensible
184 // results in the absence of data loss.
185 UniqueTid prev_utid = pending_sched->last_utid;
186 uint32_t prev_pid = pending_sched->last_pid;
187 int32_t prev_prio = pending_sched->last_prio;
188
189 // Do a fresh task name lookup in case it was updated by a task_rename while
190 // scheduled.
191 StringId prev_comm_id =
192 context_->storage->thread_table().name()[prev_utid].value_or(
193 kNullStringId);
194
195 auto new_slice_idx = AddRawEventAndStartSlice(
196 cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio, prev_state,
197 next_utid, next_pid, next_comm_id, next_prio);
198
199 // Finally, update the info for the next sched switch on this CPU.
200 pending_sched->pending_slice_storage_idx = new_slice_idx;
201 pending_sched->last_pid = next_pid;
202 pending_sched->last_utid = next_utid;
203 pending_sched->last_prio = next_prio;
204
205 // Update the ThreadState table.
206 ThreadStateTracker::GetOrCreate(context_)->PushSchedSwitchEvent(
207 ts, cpu, prev_utid, prev_state_string_id, next_utid);
208 }
209
210 // Processes a sched_waking that was decoded from a compact representation,
211 // adding to the raw and instants tables.
PushSchedWakingCompact(uint32_t cpu,int64_t ts,uint32_t wakee_pid,int32_t target_cpu,int32_t prio,StringId comm_id)212 void SchedEventTracker::PushSchedWakingCompact(uint32_t cpu,
213 int64_t ts,
214 uint32_t wakee_pid,
215 int32_t target_cpu,
216 int32_t prio,
217 StringId comm_id) {
218 // At this stage all events should be globally timestamp ordered.
219 if (ts < context_->event_tracker->max_timestamp()) {
220 PERFETTO_ELOG(
221 "sched_waking event out of order by %.4f ms, skipping",
222 static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
223 1e6);
224 context_->storage->IncrementStats(stats::sched_waking_out_of_order);
225 return;
226 }
227 context_->event_tracker->UpdateMaxTimestamp(ts);
228
229 // We infer the task that emitted the event (i.e. common_pid) from the
230 // scheduling slices. Drop the event if we haven't seen any sched_switch
231 // events for this cpu yet.
232 // Note that if sched_switch wasn't enabled, we will have to skip all
233 // compact waking events.
234 auto* pending_sched = PendingSchedByCPU(cpu);
235 if (pending_sched->last_utid == std::numeric_limits<UniqueTid>::max()) {
236 context_->storage->IncrementStats(stats::compact_sched_waking_skipped);
237 return;
238 }
239 auto curr_utid = pending_sched->last_utid;
240
241 if (PERFETTO_LIKELY(context_->config.ingest_ftrace_in_raw_table)) {
242 // Add an entry to the raw table.
243 RawId id = context_->storage->mutable_ftrace_event_table()
244 ->Insert({ts, sched_waking_id_, cpu, curr_utid})
245 .id;
246
247 using SW = protos::pbzero::SchedWakingFtraceEvent;
248 auto inserter = context_->args_tracker->AddArgsTo(id);
249 auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
250 StringId key = sched_waking_field_ids_[static_cast<size_t>(field_num)];
251 inserter.AddArg(key, var);
252 };
253 add_raw_arg(SW::kCommFieldNumber, Variadic::String(comm_id));
254 add_raw_arg(SW::kPidFieldNumber, Variadic::Integer(wakee_pid));
255 add_raw_arg(SW::kPrioFieldNumber, Variadic::Integer(prio));
256 add_raw_arg(SW::kTargetCpuFieldNumber, Variadic::Integer(target_cpu));
257 }
258
259 // Add a waking entry to the ThreadState table.
260 auto wakee_utid = context_->process_tracker->GetOrCreateThread(wakee_pid);
261 ThreadStateTracker::GetOrCreate(context_)->PushWakingEvent(ts, wakee_utid,
262 curr_utid);
263 }
264
265 PERFETTO_ALWAYS_INLINE
AddRawEventAndStartSlice(uint32_t cpu,int64_t ts,UniqueTid prev_utid,uint32_t prev_pid,StringId prev_comm_id,int32_t prev_prio,int64_t prev_state,UniqueTid next_utid,uint32_t next_pid,StringId next_comm_id,int32_t next_prio)266 uint32_t SchedEventTracker::AddRawEventAndStartSlice(uint32_t cpu,
267 int64_t ts,
268 UniqueTid prev_utid,
269 uint32_t prev_pid,
270 StringId prev_comm_id,
271 int32_t prev_prio,
272 int64_t prev_state,
273 UniqueTid next_utid,
274 uint32_t next_pid,
275 StringId next_comm_id,
276 int32_t next_prio) {
277 if (PERFETTO_LIKELY(context_->config.ingest_ftrace_in_raw_table)) {
278 // Push the raw event - this is done as the raw ftrace event codepath does
279 // not insert sched_switch.
280 RawId id = context_->storage->mutable_ftrace_event_table()
281 ->Insert({ts, sched_switch_id_, cpu, prev_utid})
282 .id;
283
284 // Note: this ordering is important. The events should be pushed in the same
285 // order as the order of fields in the proto; this is used by the raw table
286 // to index these events using the field ids.
287 using SS = protos::pbzero::SchedSwitchFtraceEvent;
288
289 auto inserter = context_->args_tracker->AddArgsTo(id);
290 auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
291 StringId key = sched_switch_field_ids_[static_cast<size_t>(field_num)];
292 inserter.AddArg(key, var);
293 };
294 add_raw_arg(SS::kPrevCommFieldNumber, Variadic::String(prev_comm_id));
295 add_raw_arg(SS::kPrevPidFieldNumber, Variadic::Integer(prev_pid));
296 add_raw_arg(SS::kPrevPrioFieldNumber, Variadic::Integer(prev_prio));
297 add_raw_arg(SS::kPrevStateFieldNumber, Variadic::Integer(prev_state));
298 add_raw_arg(SS::kNextCommFieldNumber, Variadic::String(next_comm_id));
299 add_raw_arg(SS::kNextPidFieldNumber, Variadic::Integer(next_pid));
300 add_raw_arg(SS::kNextPrioFieldNumber, Variadic::Integer(next_prio));
301 }
302
303 // Open a new scheduling slice, corresponding to the task that was
304 // just switched to. Set the duration to -1, to indicate that the event is not
305 // finished. Duration will be updated later after event finish.
306 auto* sched = context_->storage->mutable_sched_slice_table();
307 auto row_and_id = sched->Insert(
308 {ts, /* duration */ -1, cpu, next_utid, kNullStringId, next_prio});
309 SchedId sched_id = row_and_id.id;
310 return *sched->id().IndexOf(sched_id);
311 }
312
TaskStateToStringId(int64_t task_state_int)313 StringId SchedEventTracker::TaskStateToStringId(int64_t task_state_int) {
314 using ftrace_utils::TaskState;
315
316 std::optional<VersionNumber> kernel_version =
317 SystemInfoTracker::GetOrCreate(context_)->GetKernelVersion();
318 TaskState task_state = TaskState::FromRawPrevState(
319 static_cast<uint16_t>(task_state_int), kernel_version);
320 return task_state.is_valid()
321 ? context_->storage->InternString(task_state.ToString().data())
322 : kNullStringId;
323 }
324
325 PERFETTO_ALWAYS_INLINE
ClosePendingSlice(uint32_t pending_slice_idx,int64_t ts,StringId prev_state)326 void SchedEventTracker::ClosePendingSlice(uint32_t pending_slice_idx,
327 int64_t ts,
328 StringId prev_state) {
329 auto* slices = context_->storage->mutable_sched_slice_table();
330
331 int64_t duration = ts - slices->ts()[pending_slice_idx];
332 slices->mutable_dur()->Set(pending_slice_idx, duration);
333
334 // We store the state as a uint16 as we only consider values up to 2048
335 // when unpacking the information inside; this allows savings of 48 bits
336 // per slice.
337 slices->mutable_end_state()->Set(pending_slice_idx, prev_state);
338 }
339
340 } // namespace trace_processor
341 } // namespace perfetto
342