1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/importers/ftrace/sched_event_tracker.h"
18
19 #include <math.h>
20
21 #include "perfetto/ext/base/utils.h"
22 #include "src/trace_processor/importers/common/args_tracker.h"
23 #include "src/trace_processor/importers/common/event_tracker.h"
24 #include "src/trace_processor/importers/common/process_tracker.h"
25 #include "src/trace_processor/importers/common/system_info_tracker.h"
26 #include "src/trace_processor/importers/ftrace/ftrace_descriptors.h"
27 #include "src/trace_processor/storage/stats.h"
28 #include "src/trace_processor/types/task_state.h"
29 #include "src/trace_processor/types/trace_processor_context.h"
30 #include "src/trace_processor/types/variadic.h"
31
32 #include "protos/perfetto/trace/ftrace/ftrace_event.pbzero.h"
33 #include "protos/perfetto/trace/ftrace/sched.pbzero.h"
34
35 namespace perfetto {
36 namespace trace_processor {
37
SchedEventTracker(TraceProcessorContext * context)38 SchedEventTracker::SchedEventTracker(TraceProcessorContext* context)
39 : context_(context) {
40 // pre-parse sched_switch
41 auto* switch_descriptor = GetMessageDescriptorForId(
42 protos::pbzero::FtraceEvent::kSchedSwitchFieldNumber);
43 PERFETTO_CHECK(switch_descriptor->max_field_id == kSchedSwitchMaxFieldId);
44
45 for (size_t i = 1; i <= kSchedSwitchMaxFieldId; i++) {
46 sched_switch_field_ids_[i] =
47 context->storage->InternString(switch_descriptor->fields[i].name);
48 }
49 sched_switch_id_ = context->storage->InternString(switch_descriptor->name);
50
51 // pre-parse sched_waking
52 auto* waking_descriptor = GetMessageDescriptorForId(
53 protos::pbzero::FtraceEvent::kSchedWakingFieldNumber);
54 PERFETTO_CHECK(waking_descriptor->max_field_id == kSchedWakingMaxFieldId);
55
56 for (size_t i = 1; i <= kSchedWakingMaxFieldId; i++) {
57 sched_waking_field_ids_[i] =
58 context->storage->InternString(waking_descriptor->fields[i].name);
59 }
60 sched_waking_id_ = context->storage->InternString(waking_descriptor->name);
61 }
62
63 SchedEventTracker::~SchedEventTracker() = default;
64
PushSchedSwitch(uint32_t cpu,int64_t ts,uint32_t prev_pid,base::StringView prev_comm,int32_t prev_prio,int64_t prev_state,uint32_t next_pid,base::StringView next_comm,int32_t next_prio)65 void SchedEventTracker::PushSchedSwitch(uint32_t cpu,
66 int64_t ts,
67 uint32_t prev_pid,
68 base::StringView prev_comm,
69 int32_t prev_prio,
70 int64_t prev_state,
71 uint32_t next_pid,
72 base::StringView next_comm,
73 int32_t next_prio) {
74 // At this stage all events should be globally timestamp ordered.
75 if (ts < context_->event_tracker->max_timestamp()) {
76 PERFETTO_ELOG("sched_switch event out of order by %.4f ms, skipping",
77 (context_->event_tracker->max_timestamp() - ts) / 1e6);
78 context_->storage->IncrementStats(stats::sched_switch_out_of_order);
79 return;
80 }
81 context_->event_tracker->UpdateMaxTimestamp(ts);
82 PERFETTO_DCHECK(cpu < kMaxCpus);
83
84 StringId next_comm_id = context_->storage->InternString(next_comm);
85 UniqueTid next_utid =
86 context_->process_tracker->UpdateThreadName(next_pid, next_comm_id);
87
88 // First use this data to close the previous slice.
89 bool prev_pid_match_prev_next_pid = false;
90 auto* pending_sched = &pending_sched_per_cpu_[cpu];
91 uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
92 if (pending_slice_idx < std::numeric_limits<uint32_t>::max()) {
93 prev_pid_match_prev_next_pid = prev_pid == pending_sched->last_pid;
94 if (PERFETTO_LIKELY(prev_pid_match_prev_next_pid)) {
95 ClosePendingSlice(pending_slice_idx, ts, prev_state);
96 } else {
97 // If the pids are not consistent, make a note of this.
98 context_->storage->IncrementStats(stats::mismatched_sched_switch_tids);
99 }
100 }
101
102 // We have to intern prev_comm again because our assumption that
103 // this event's |prev_comm| == previous event's |next_comm| does not hold
104 // if the thread changed its name while scheduled.
105 StringId prev_comm_id = context_->storage->InternString(prev_comm);
106 UniqueTid prev_utid =
107 context_->process_tracker->UpdateThreadName(prev_pid, prev_comm_id);
108
109 auto new_slice_idx = AddRawEventAndStartSlice(
110 cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio, prev_state,
111 next_utid, next_pid, next_comm_id, next_prio);
112
113 // Finally, update the info for the next sched switch on this CPU.
114 pending_sched->pending_slice_storage_idx = new_slice_idx;
115 pending_sched->last_pid = next_pid;
116 pending_sched->last_utid = next_utid;
117 pending_sched->last_prio = next_prio;
118 }
119
PushSchedSwitchCompact(uint32_t cpu,int64_t ts,int64_t prev_state,uint32_t next_pid,int32_t next_prio,StringId next_comm_id)120 void SchedEventTracker::PushSchedSwitchCompact(uint32_t cpu,
121 int64_t ts,
122 int64_t prev_state,
123 uint32_t next_pid,
124 int32_t next_prio,
125 StringId next_comm_id) {
126 // At this stage all events should be globally timestamp ordered.
127 if (ts < context_->event_tracker->max_timestamp()) {
128 PERFETTO_ELOG("sched_switch event out of order by %.4f ms, skipping",
129 (context_->event_tracker->max_timestamp() - ts) / 1e6);
130 context_->storage->IncrementStats(stats::sched_switch_out_of_order);
131 return;
132 }
133 context_->event_tracker->UpdateMaxTimestamp(ts);
134 PERFETTO_DCHECK(cpu < kMaxCpus);
135
136 UniqueTid next_utid =
137 context_->process_tracker->UpdateThreadName(next_pid, next_comm_id);
138
139 auto* pending_sched = &pending_sched_per_cpu_[cpu];
140
141 // If we're processing the first compact event for this cpu, don't start a
142 // slice since we're missing the "prev_*" fields. The successive events will
143 // create slices as normal, but the first per-cpu switch is effectively
144 // discarded.
145 if (pending_sched->last_utid == std::numeric_limits<UniqueTid>::max()) {
146 context_->storage->IncrementStats(stats::compact_sched_switch_skipped);
147
148 pending_sched->last_pid = next_pid;
149 pending_sched->last_utid = next_utid;
150 pending_sched->last_prio = next_prio;
151 // Note: no pending slice, so leave |pending_slice_storage_idx| in its
152 // invalid state.
153 return;
154 }
155
156 // Close the pending slice if any (we won't have one when processing the first
157 // two compact events for a given cpu).
158 uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
159 if (pending_slice_idx < std::numeric_limits<uint32_t>::max())
160 ClosePendingSlice(pending_slice_idx, ts, prev_state);
161
162 // Use the previous event's values to infer this event's "prev_*" fields.
163 // There are edge cases, but this assumption should still produce sensible
164 // results in the absence of data loss.
165 UniqueTid prev_utid = pending_sched->last_utid;
166 uint32_t prev_pid = pending_sched->last_pid;
167 int32_t prev_prio = pending_sched->last_prio;
168
169 // Do a fresh task name lookup in case it was updated by a task_rename while
170 // scheduled.
171 StringId prev_comm_id = context_->storage->thread_table().name()[prev_utid];
172
173 auto new_slice_idx = AddRawEventAndStartSlice(
174 cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio, prev_state,
175 next_utid, next_pid, next_comm_id, next_prio);
176
177 // Finally, update the info for the next sched switch on this CPU.
178 pending_sched->pending_slice_storage_idx = new_slice_idx;
179 pending_sched->last_pid = next_pid;
180 pending_sched->last_utid = next_utid;
181 pending_sched->last_prio = next_prio;
182 }
183
184 PERFETTO_ALWAYS_INLINE
AddRawEventAndStartSlice(uint32_t cpu,int64_t ts,UniqueTid prev_utid,uint32_t prev_pid,StringId prev_comm_id,int32_t prev_prio,int64_t prev_state,UniqueTid next_utid,uint32_t next_pid,StringId next_comm_id,int32_t next_prio)185 uint32_t SchedEventTracker::AddRawEventAndStartSlice(uint32_t cpu,
186 int64_t ts,
187 UniqueTid prev_utid,
188 uint32_t prev_pid,
189 StringId prev_comm_id,
190 int32_t prev_prio,
191 int64_t prev_state,
192 UniqueTid next_utid,
193 uint32_t next_pid,
194 StringId next_comm_id,
195 int32_t next_prio) {
196 if (PERFETTO_LIKELY(context_->config.ingest_ftrace_in_raw_table)) {
197 // Push the raw event - this is done as the raw ftrace event codepath does
198 // not insert sched_switch.
199 RawId id = context_->storage->mutable_raw_table()
200 ->Insert({ts, sched_switch_id_, cpu, prev_utid})
201 .id;
202
203 // Note: this ordering is important. The events should be pushed in the same
204 // order as the order of fields in the proto; this is used by the raw table
205 // to index these events using the field ids.
206 using SS = protos::pbzero::SchedSwitchFtraceEvent;
207
208 auto inserter = context_->args_tracker->AddArgsTo(id);
209 auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
210 StringId key = sched_switch_field_ids_[static_cast<size_t>(field_num)];
211 inserter.AddArg(key, var);
212 };
213 add_raw_arg(SS::kPrevCommFieldNumber, Variadic::String(prev_comm_id));
214 add_raw_arg(SS::kPrevPidFieldNumber, Variadic::Integer(prev_pid));
215 add_raw_arg(SS::kPrevPrioFieldNumber, Variadic::Integer(prev_prio));
216 add_raw_arg(SS::kPrevStateFieldNumber, Variadic::Integer(prev_state));
217 add_raw_arg(SS::kNextCommFieldNumber, Variadic::String(next_comm_id));
218 add_raw_arg(SS::kNextPidFieldNumber, Variadic::Integer(next_pid));
219 add_raw_arg(SS::kNextPrioFieldNumber, Variadic::Integer(next_prio));
220 }
221
222 // Open a new scheduling slice, corresponding to the task that was
223 // just switched to.
224 auto* sched = context_->storage->mutable_sched_slice_table();
225 auto row_and_id = sched->Insert(
226 {ts, 0 /* duration */, cpu, next_utid, kNullStringId, next_prio});
227 SchedId sched_id = row_and_id.id;
228 return *sched->id().IndexOf(sched_id);
229 }
230
231 PERFETTO_ALWAYS_INLINE
ClosePendingSlice(uint32_t pending_slice_idx,int64_t ts,int64_t prev_state)232 void SchedEventTracker::ClosePendingSlice(uint32_t pending_slice_idx,
233 int64_t ts,
234 int64_t prev_state) {
235 auto* slices = context_->storage->mutable_sched_slice_table();
236
237 int64_t duration = ts - slices->ts()[pending_slice_idx];
238 slices->mutable_dur()->Set(pending_slice_idx, duration);
239
240 // We store the state as a uint16 as we only consider values up to 2048
241 // when unpacking the information inside; this allows savings of 48 bits
242 // per slice.
243 auto kernel_version =
244 SystemInfoTracker::GetOrCreate(context_)->GetKernelVersion();
245 auto task_state = ftrace_utils::TaskState(static_cast<uint16_t>(prev_state),
246 kernel_version);
247 if (!task_state.is_valid()) {
248 context_->storage->IncrementStats(stats::task_state_invalid);
249 }
250 auto id = task_state.is_valid()
251 ? context_->storage->InternString(task_state.ToString().data())
252 : kNullStringId;
253 slices->mutable_end_state()->Set(pending_slice_idx, id);
254 }
255
256 // Processes a sched_waking that was decoded from a compact representation,
257 // adding to the raw and instants tables.
PushSchedWakingCompact(uint32_t cpu,int64_t ts,uint32_t wakee_pid,int32_t target_cpu,int32_t prio,StringId comm_id)258 void SchedEventTracker::PushSchedWakingCompact(uint32_t cpu,
259 int64_t ts,
260 uint32_t wakee_pid,
261 int32_t target_cpu,
262 int32_t prio,
263 StringId comm_id) {
264 // At this stage all events should be globally timestamp ordered.
265 if (ts < context_->event_tracker->max_timestamp()) {
266 PERFETTO_ELOG("sched_waking event out of order by %.4f ms, skipping",
267 (context_->event_tracker->max_timestamp() - ts) / 1e6);
268 context_->storage->IncrementStats(stats::sched_waking_out_of_order);
269 return;
270 }
271 context_->event_tracker->UpdateMaxTimestamp(ts);
272 PERFETTO_DCHECK(cpu < kMaxCpus);
273
274 // We infer the task that emitted the event (i.e. common_pid) from the
275 // scheduling slices. Drop the event if we haven't seen any sched_switch
276 // events for this cpu yet.
277 // Note that if sched_switch wasn't enabled, we will have to skip all
278 // compact waking events.
279 auto* pending_sched = &pending_sched_per_cpu_[cpu];
280 if (pending_sched->last_utid == std::numeric_limits<UniqueTid>::max()) {
281 context_->storage->IncrementStats(stats::compact_sched_waking_skipped);
282 return;
283 }
284 auto curr_utid = pending_sched->last_utid;
285
286 if (PERFETTO_LIKELY(context_->config.ingest_ftrace_in_raw_table)) {
287 // Add an entry to the raw table.
288 RawId id = context_->storage->mutable_raw_table()
289 ->Insert({ts, sched_waking_id_, cpu, curr_utid})
290 .id;
291
292 // "success" is hardcoded as always 1 by the kernel, with a TODO to remove
293 // it.
294 static constexpr int32_t kHardcodedSuccess = 1;
295
296 using SW = protos::pbzero::SchedWakingFtraceEvent;
297 auto inserter = context_->args_tracker->AddArgsTo(id);
298 auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
299 StringId key = sched_waking_field_ids_[static_cast<size_t>(field_num)];
300 inserter.AddArg(key, var);
301 };
302 add_raw_arg(SW::kCommFieldNumber, Variadic::String(comm_id));
303 add_raw_arg(SW::kPidFieldNumber, Variadic::Integer(wakee_pid));
304 add_raw_arg(SW::kPrioFieldNumber, Variadic::Integer(prio));
305 add_raw_arg(SW::kSuccessFieldNumber, Variadic::Integer(kHardcodedSuccess));
306 add_raw_arg(SW::kTargetCpuFieldNumber, Variadic::Integer(target_cpu));
307 }
308
309 // Add a waking entry to the instants.
310 auto wakee_utid = context_->process_tracker->GetOrCreateThread(wakee_pid);
311 auto* instants = context_->storage->mutable_instant_table();
312 auto ref_type_id = context_->storage->InternString(
313 GetRefTypeStringMap()[static_cast<size_t>(RefType::kRefUtid)]);
314 instants->Insert({ts, sched_waking_id_, wakee_utid, ref_type_id});
315 }
316
FlushPendingEvents()317 void SchedEventTracker::FlushPendingEvents() {
318 // TODO(lalitm): the day this method is called before end of trace, don't
319 // flush the sched events as they will probably be pushed in the next round
320 // of ftrace events.
321 int64_t end_ts = context_->storage->GetTraceTimestampBoundsNs().second;
322 auto* slices = context_->storage->mutable_sched_slice_table();
323 for (const auto& pending_sched : pending_sched_per_cpu_) {
324 uint32_t row = pending_sched.pending_slice_storage_idx;
325 if (row == std::numeric_limits<uint32_t>::max())
326 continue;
327
328 int64_t duration = end_ts - slices->ts()[row];
329 slices->mutable_dur()->Set(row, duration);
330
331 auto state = ftrace_utils::TaskState(ftrace_utils::TaskState::kRunnable);
332 auto id = context_->storage->InternString(state.ToString().data());
333 slices->mutable_end_state()->Set(row, id);
334 }
335
336 pending_sched_per_cpu_ = {};
337 }
338
339 } // namespace trace_processor
340 } // namespace perfetto
341