1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/importers/ftrace/sched_event_tracker.h"
18
19 #include <math.h>
20
21 #include "perfetto/ext/base/utils.h"
22 #include "src/trace_processor/importers/common/args_tracker.h"
23 #include "src/trace_processor/importers/common/event_tracker.h"
24 #include "src/trace_processor/importers/common/process_tracker.h"
25 #include "src/trace_processor/importers/common/system_info_tracker.h"
26 #include "src/trace_processor/importers/ftrace/ftrace_descriptors.h"
27 #include "src/trace_processor/storage/stats.h"
28 #include "src/trace_processor/types/task_state.h"
29 #include "src/trace_processor/types/trace_processor_context.h"
30 #include "src/trace_processor/types/variadic.h"
31
32 #include "protos/perfetto/trace/ftrace/ftrace_event.pbzero.h"
33 #include "protos/perfetto/trace/ftrace/sched.pbzero.h"
34
35 namespace perfetto {
36 namespace trace_processor {
37
SchedEventTracker(TraceProcessorContext * context)38 SchedEventTracker::SchedEventTracker(TraceProcessorContext* context)
39 : waker_utid_id_(context->storage->InternString("waker_utid")),
40 context_(context) {
41 // pre-parse sched_switch
42 auto* switch_descriptor = GetMessageDescriptorForId(
43 protos::pbzero::FtraceEvent::kSchedSwitchFieldNumber);
44 PERFETTO_CHECK(switch_descriptor->max_field_id == kSchedSwitchMaxFieldId);
45
46 for (size_t i = 1; i <= kSchedSwitchMaxFieldId; i++) {
47 sched_switch_field_ids_[i] =
48 context->storage->InternString(switch_descriptor->fields[i].name);
49 }
50 sched_switch_id_ = context->storage->InternString(switch_descriptor->name);
51
52 // pre-parse sched_waking
53 auto* waking_descriptor = GetMessageDescriptorForId(
54 protos::pbzero::FtraceEvent::kSchedWakingFieldNumber);
55 PERFETTO_CHECK(waking_descriptor->max_field_id == kSchedWakingMaxFieldId);
56
57 for (size_t i = 1; i <= kSchedWakingMaxFieldId; i++) {
58 sched_waking_field_ids_[i] =
59 context->storage->InternString(waking_descriptor->fields[i].name);
60 }
61 sched_waking_id_ = context->storage->InternString(waking_descriptor->name);
62
63 // Pre-allocate space for 128 CPUs, which should be enough for most hosts.
64 // It's OK if this number is too small, the vector will be grown on-demand.
65 pending_sched_per_cpu_.reserve(128);
66 }
67
68 SchedEventTracker::~SchedEventTracker() = default;
69
PushSchedSwitch(uint32_t cpu,int64_t ts,uint32_t prev_pid,base::StringView prev_comm,int32_t prev_prio,int64_t prev_state,uint32_t next_pid,base::StringView next_comm,int32_t next_prio)70 void SchedEventTracker::PushSchedSwitch(uint32_t cpu,
71 int64_t ts,
72 uint32_t prev_pid,
73 base::StringView prev_comm,
74 int32_t prev_prio,
75 int64_t prev_state,
76 uint32_t next_pid,
77 base::StringView next_comm,
78 int32_t next_prio) {
79 // At this stage all events should be globally timestamp ordered.
80 if (ts < context_->event_tracker->max_timestamp()) {
81 PERFETTO_ELOG(
82 "sched_switch event out of order by %.4f ms, skipping",
83 static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
84 1e6);
85 context_->storage->IncrementStats(stats::sched_switch_out_of_order);
86 return;
87 }
88 context_->event_tracker->UpdateMaxTimestamp(ts);
89
90 StringId next_comm_id = context_->storage->InternString(next_comm);
91 UniqueTid next_utid = context_->process_tracker->UpdateThreadName(
92 next_pid, next_comm_id, ThreadNamePriority::kFtrace);
93
94 // First use this data to close the previous slice.
95 bool prev_pid_match_prev_next_pid = false;
96 auto* pending_sched = PendingSchedByCPU(cpu);
97 uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
98 if (pending_slice_idx < std::numeric_limits<uint32_t>::max()) {
99 prev_pid_match_prev_next_pid = prev_pid == pending_sched->last_pid;
100 if (PERFETTO_LIKELY(prev_pid_match_prev_next_pid)) {
101 ClosePendingSlice(pending_slice_idx, ts, prev_state);
102 } else {
103 // If the pids are not consistent, make a note of this.
104 context_->storage->IncrementStats(stats::mismatched_sched_switch_tids);
105 }
106 }
107
108 // We have to intern prev_comm again because our assumption that
109 // this event's |prev_comm| == previous event's |next_comm| does not hold
110 // if the thread changed its name while scheduled.
111 StringId prev_comm_id = context_->storage->InternString(prev_comm);
112 UniqueTid prev_utid = context_->process_tracker->UpdateThreadName(
113 prev_pid, prev_comm_id, ThreadNamePriority::kFtrace);
114
115 auto new_slice_idx = AddRawEventAndStartSlice(
116 cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio, prev_state,
117 next_utid, next_pid, next_comm_id, next_prio);
118
119 // Finally, update the info for the next sched switch on this CPU.
120 pending_sched->pending_slice_storage_idx = new_slice_idx;
121 pending_sched->last_pid = next_pid;
122 pending_sched->last_utid = next_utid;
123 pending_sched->last_prio = next_prio;
124 }
125
PushSchedSwitchCompact(uint32_t cpu,int64_t ts,int64_t prev_state,uint32_t next_pid,int32_t next_prio,StringId next_comm_id)126 void SchedEventTracker::PushSchedSwitchCompact(uint32_t cpu,
127 int64_t ts,
128 int64_t prev_state,
129 uint32_t next_pid,
130 int32_t next_prio,
131 StringId next_comm_id) {
132 // At this stage all events should be globally timestamp ordered.
133 if (ts < context_->event_tracker->max_timestamp()) {
134 PERFETTO_ELOG(
135 "sched_switch event out of order by %.4f ms, skipping",
136 static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
137 1e6);
138 context_->storage->IncrementStats(stats::sched_switch_out_of_order);
139 return;
140 }
141 context_->event_tracker->UpdateMaxTimestamp(ts);
142
143 UniqueTid next_utid = context_->process_tracker->UpdateThreadName(
144 next_pid, next_comm_id, ThreadNamePriority::kFtrace);
145
146 auto* pending_sched = PendingSchedByCPU(cpu);
147
148 // If we're processing the first compact event for this cpu, don't start a
149 // slice since we're missing the "prev_*" fields. The successive events will
150 // create slices as normal, but the first per-cpu switch is effectively
151 // discarded.
152 if (pending_sched->last_utid == std::numeric_limits<UniqueTid>::max()) {
153 context_->storage->IncrementStats(stats::compact_sched_switch_skipped);
154
155 pending_sched->last_pid = next_pid;
156 pending_sched->last_utid = next_utid;
157 pending_sched->last_prio = next_prio;
158 // Note: no pending slice, so leave |pending_slice_storage_idx| in its
159 // invalid state.
160 return;
161 }
162
163 // Close the pending slice if any (we won't have one when processing the first
164 // two compact events for a given cpu).
165 uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
166 if (pending_slice_idx < std::numeric_limits<uint32_t>::max())
167 ClosePendingSlice(pending_slice_idx, ts, prev_state);
168
169 // Use the previous event's values to infer this event's "prev_*" fields.
170 // There are edge cases, but this assumption should still produce sensible
171 // results in the absence of data loss.
172 UniqueTid prev_utid = pending_sched->last_utid;
173 uint32_t prev_pid = pending_sched->last_pid;
174 int32_t prev_prio = pending_sched->last_prio;
175
176 // Do a fresh task name lookup in case it was updated by a task_rename while
177 // scheduled.
178 StringId prev_comm_id =
179 context_->storage->thread_table().name()[prev_utid].value_or(
180 kNullStringId);
181
182 auto new_slice_idx = AddRawEventAndStartSlice(
183 cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio, prev_state,
184 next_utid, next_pid, next_comm_id, next_prio);
185
186 // Finally, update the info for the next sched switch on this CPU.
187 pending_sched->pending_slice_storage_idx = new_slice_idx;
188 pending_sched->last_pid = next_pid;
189 pending_sched->last_utid = next_utid;
190 pending_sched->last_prio = next_prio;
191 }
192
193 PERFETTO_ALWAYS_INLINE
AddRawEventAndStartSlice(uint32_t cpu,int64_t ts,UniqueTid prev_utid,uint32_t prev_pid,StringId prev_comm_id,int32_t prev_prio,int64_t prev_state,UniqueTid next_utid,uint32_t next_pid,StringId next_comm_id,int32_t next_prio)194 uint32_t SchedEventTracker::AddRawEventAndStartSlice(uint32_t cpu,
195 int64_t ts,
196 UniqueTid prev_utid,
197 uint32_t prev_pid,
198 StringId prev_comm_id,
199 int32_t prev_prio,
200 int64_t prev_state,
201 UniqueTid next_utid,
202 uint32_t next_pid,
203 StringId next_comm_id,
204 int32_t next_prio) {
205 if (PERFETTO_LIKELY(context_->config.ingest_ftrace_in_raw_table)) {
206 // Push the raw event - this is done as the raw ftrace event codepath does
207 // not insert sched_switch.
208 RawId id = context_->storage->mutable_raw_table()
209 ->Insert({ts, sched_switch_id_, cpu, prev_utid})
210 .id;
211
212 // Note: this ordering is important. The events should be pushed in the same
213 // order as the order of fields in the proto; this is used by the raw table
214 // to index these events using the field ids.
215 using SS = protos::pbzero::SchedSwitchFtraceEvent;
216
217 auto inserter = context_->args_tracker->AddArgsTo(id);
218 auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
219 StringId key = sched_switch_field_ids_[static_cast<size_t>(field_num)];
220 inserter.AddArg(key, var);
221 };
222 add_raw_arg(SS::kPrevCommFieldNumber, Variadic::String(prev_comm_id));
223 add_raw_arg(SS::kPrevPidFieldNumber, Variadic::Integer(prev_pid));
224 add_raw_arg(SS::kPrevPrioFieldNumber, Variadic::Integer(prev_prio));
225 add_raw_arg(SS::kPrevStateFieldNumber, Variadic::Integer(prev_state));
226 add_raw_arg(SS::kNextCommFieldNumber, Variadic::String(next_comm_id));
227 add_raw_arg(SS::kNextPidFieldNumber, Variadic::Integer(next_pid));
228 add_raw_arg(SS::kNextPrioFieldNumber, Variadic::Integer(next_prio));
229 }
230
231 // Open a new scheduling slice, corresponding to the task that was
232 // just switched to.
233 auto* sched = context_->storage->mutable_sched_slice_table();
234 auto row_and_id = sched->Insert(
235 {ts, 0 /* duration */, cpu, next_utid, kNullStringId, next_prio});
236 SchedId sched_id = row_and_id.id;
237 return *sched->id().IndexOf(sched_id);
238 }
239
240 PERFETTO_ALWAYS_INLINE
ClosePendingSlice(uint32_t pending_slice_idx,int64_t ts,int64_t prev_state)241 void SchedEventTracker::ClosePendingSlice(uint32_t pending_slice_idx,
242 int64_t ts,
243 int64_t prev_state) {
244 auto* slices = context_->storage->mutable_sched_slice_table();
245
246 int64_t duration = ts - slices->ts()[pending_slice_idx];
247 slices->mutable_dur()->Set(pending_slice_idx, duration);
248
249 // We store the state as a uint16 as we only consider values up to 2048
250 // when unpacking the information inside; this allows savings of 48 bits
251 // per slice.
252 auto kernel_version =
253 SystemInfoTracker::GetOrCreate(context_)->GetKernelVersion();
254 auto task_state = ftrace_utils::TaskState(static_cast<uint16_t>(prev_state),
255 kernel_version);
256 if (!task_state.is_valid()) {
257 context_->storage->IncrementStats(stats::task_state_invalid);
258 }
259 auto id = task_state.is_valid()
260 ? context_->storage->InternString(task_state.ToString().data())
261 : kNullStringId;
262 slices->mutable_end_state()->Set(pending_slice_idx, id);
263 }
264
265 // Processes a sched_waking that was decoded from a compact representation,
266 // adding to the raw and instants tables.
PushSchedWakingCompact(uint32_t cpu,int64_t ts,uint32_t wakee_pid,int32_t target_cpu,int32_t prio,StringId comm_id)267 void SchedEventTracker::PushSchedWakingCompact(uint32_t cpu,
268 int64_t ts,
269 uint32_t wakee_pid,
270 int32_t target_cpu,
271 int32_t prio,
272 StringId comm_id) {
273 // At this stage all events should be globally timestamp ordered.
274 if (ts < context_->event_tracker->max_timestamp()) {
275 PERFETTO_ELOG(
276 "sched_waking event out of order by %.4f ms, skipping",
277 static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
278 1e6);
279 context_->storage->IncrementStats(stats::sched_waking_out_of_order);
280 return;
281 }
282 context_->event_tracker->UpdateMaxTimestamp(ts);
283
284 // We infer the task that emitted the event (i.e. common_pid) from the
285 // scheduling slices. Drop the event if we haven't seen any sched_switch
286 // events for this cpu yet.
287 // Note that if sched_switch wasn't enabled, we will have to skip all
288 // compact waking events.
289 auto* pending_sched = PendingSchedByCPU(cpu);
290 if (pending_sched->last_utid == std::numeric_limits<UniqueTid>::max()) {
291 context_->storage->IncrementStats(stats::compact_sched_waking_skipped);
292 return;
293 }
294 auto curr_utid = pending_sched->last_utid;
295
296 if (PERFETTO_LIKELY(context_->config.ingest_ftrace_in_raw_table)) {
297 // Add an entry to the raw table.
298 RawId id = context_->storage->mutable_raw_table()
299 ->Insert({ts, sched_waking_id_, cpu, curr_utid})
300 .id;
301
302 // "success" is hardcoded as always 1 by the kernel, with a TODO to remove
303 // it.
304 static constexpr int32_t kHardcodedSuccess = 1;
305
306 using SW = protos::pbzero::SchedWakingFtraceEvent;
307 auto inserter = context_->args_tracker->AddArgsTo(id);
308 auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
309 StringId key = sched_waking_field_ids_[static_cast<size_t>(field_num)];
310 inserter.AddArg(key, var);
311 };
312 add_raw_arg(SW::kCommFieldNumber, Variadic::String(comm_id));
313 add_raw_arg(SW::kPidFieldNumber, Variadic::Integer(wakee_pid));
314 add_raw_arg(SW::kPrioFieldNumber, Variadic::Integer(prio));
315 add_raw_arg(SW::kSuccessFieldNumber, Variadic::Integer(kHardcodedSuccess));
316 add_raw_arg(SW::kTargetCpuFieldNumber, Variadic::Integer(target_cpu));
317 }
318
319 // Add a waking entry to the instants.
320 auto wakee_utid = context_->process_tracker->GetOrCreateThread(wakee_pid);
321 auto* instants = context_->storage->mutable_instant_table();
322 auto ref_type_id = context_->storage->InternString(
323 GetRefTypeStringMap()[static_cast<size_t>(RefType::kRefUtid)]);
324 tables::InstantTable::Id id =
325 instants->Insert({ts, sched_waking_id_, wakee_utid, ref_type_id}).id;
326
327 context_->args_tracker->AddArgsTo(id).AddArg(
328 waker_utid_id_, Variadic::UnsignedInteger(curr_utid));
329 }
330
FlushPendingEvents()331 void SchedEventTracker::FlushPendingEvents() {
332 // TODO(lalitm): the day this method is called before end of trace, don't
333 // flush the sched events as they will probably be pushed in the next round
334 // of ftrace events.
335 int64_t end_ts = context_->storage->GetTraceTimestampBoundsNs().second;
336 auto* slices = context_->storage->mutable_sched_slice_table();
337 for (const auto& pending_sched : pending_sched_per_cpu_) {
338 uint32_t row = pending_sched.pending_slice_storage_idx;
339 if (row == std::numeric_limits<uint32_t>::max())
340 continue;
341
342 int64_t duration = end_ts - slices->ts()[row];
343 slices->mutable_dur()->Set(row, duration);
344
345 auto state = ftrace_utils::TaskState(ftrace_utils::TaskState::kRunnable);
346 auto id = context_->storage->InternString(state.ToString().data());
347 slices->mutable_end_state()->Set(row, id);
348 }
349
350 // Re-initialize the pending_sched_per_cpu_ vector with default values, we do
351 // this instead of calling .clear() to avoid having to frequently resize the
352 // vector.
353 std::fill(pending_sched_per_cpu_.begin(), pending_sched_per_cpu_.end(),
354 PendingSchedInfo{});
355 }
356
357 } // namespace trace_processor
358 } // namespace perfetto
359