• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/importers/ftrace/sched_event_tracker.h"
18 
19 #include <math.h>
20 
21 #include "perfetto/ext/base/utils.h"
22 #include "src/trace_processor/importers/common/args_tracker.h"
23 #include "src/trace_processor/importers/common/event_tracker.h"
24 #include "src/trace_processor/importers/common/process_tracker.h"
25 #include "src/trace_processor/importers/common/system_info_tracker.h"
26 #include "src/trace_processor/importers/ftrace/ftrace_descriptors.h"
27 #include "src/trace_processor/storage/stats.h"
28 #include "src/trace_processor/types/task_state.h"
29 #include "src/trace_processor/types/trace_processor_context.h"
30 #include "src/trace_processor/types/variadic.h"
31 
32 #include "protos/perfetto/trace/ftrace/ftrace_event.pbzero.h"
33 #include "protos/perfetto/trace/ftrace/sched.pbzero.h"
34 
35 namespace perfetto {
36 namespace trace_processor {
37 
SchedEventTracker(TraceProcessorContext * context)38 SchedEventTracker::SchedEventTracker(TraceProcessorContext* context)
39     : waker_utid_id_(context->storage->InternString("waker_utid")),
40       context_(context) {
41   // pre-parse sched_switch
42   auto* switch_descriptor = GetMessageDescriptorForId(
43       protos::pbzero::FtraceEvent::kSchedSwitchFieldNumber);
44   PERFETTO_CHECK(switch_descriptor->max_field_id == kSchedSwitchMaxFieldId);
45 
46   for (size_t i = 1; i <= kSchedSwitchMaxFieldId; i++) {
47     sched_switch_field_ids_[i] =
48         context->storage->InternString(switch_descriptor->fields[i].name);
49   }
50   sched_switch_id_ = context->storage->InternString(switch_descriptor->name);
51 
52   // pre-parse sched_waking
53   auto* waking_descriptor = GetMessageDescriptorForId(
54       protos::pbzero::FtraceEvent::kSchedWakingFieldNumber);
55   PERFETTO_CHECK(waking_descriptor->max_field_id == kSchedWakingMaxFieldId);
56 
57   for (size_t i = 1; i <= kSchedWakingMaxFieldId; i++) {
58     sched_waking_field_ids_[i] =
59         context->storage->InternString(waking_descriptor->fields[i].name);
60   }
61   sched_waking_id_ = context->storage->InternString(waking_descriptor->name);
62 
63   // Pre-allocate space for 128 CPUs, which should be enough for most hosts.
64   // It's OK if this number is too small, the vector will be grown on-demand.
65   pending_sched_per_cpu_.reserve(128);
66 }
67 
68 SchedEventTracker::~SchedEventTracker() = default;
69 
PushSchedSwitch(uint32_t cpu,int64_t ts,uint32_t prev_pid,base::StringView prev_comm,int32_t prev_prio,int64_t prev_state,uint32_t next_pid,base::StringView next_comm,int32_t next_prio)70 void SchedEventTracker::PushSchedSwitch(uint32_t cpu,
71                                         int64_t ts,
72                                         uint32_t prev_pid,
73                                         base::StringView prev_comm,
74                                         int32_t prev_prio,
75                                         int64_t prev_state,
76                                         uint32_t next_pid,
77                                         base::StringView next_comm,
78                                         int32_t next_prio) {
79   // At this stage all events should be globally timestamp ordered.
80   if (ts < context_->event_tracker->max_timestamp()) {
81     PERFETTO_ELOG(
82         "sched_switch event out of order by %.4f ms, skipping",
83         static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
84             1e6);
85     context_->storage->IncrementStats(stats::sched_switch_out_of_order);
86     return;
87   }
88   context_->event_tracker->UpdateMaxTimestamp(ts);
89 
90   StringId next_comm_id = context_->storage->InternString(next_comm);
91   UniqueTid next_utid = context_->process_tracker->UpdateThreadName(
92       next_pid, next_comm_id, ThreadNamePriority::kFtrace);
93 
94   // First use this data to close the previous slice.
95   bool prev_pid_match_prev_next_pid = false;
96   auto* pending_sched = PendingSchedByCPU(cpu);
97   uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
98   if (pending_slice_idx < std::numeric_limits<uint32_t>::max()) {
99     prev_pid_match_prev_next_pid = prev_pid == pending_sched->last_pid;
100     if (PERFETTO_LIKELY(prev_pid_match_prev_next_pid)) {
101       ClosePendingSlice(pending_slice_idx, ts, prev_state);
102     } else {
103       // If the pids are not consistent, make a note of this.
104       context_->storage->IncrementStats(stats::mismatched_sched_switch_tids);
105     }
106   }
107 
108   // We have to intern prev_comm again because our assumption that
109   // this event's |prev_comm| == previous event's |next_comm| does not hold
110   // if the thread changed its name while scheduled.
111   StringId prev_comm_id = context_->storage->InternString(prev_comm);
112   UniqueTid prev_utid = context_->process_tracker->UpdateThreadName(
113       prev_pid, prev_comm_id, ThreadNamePriority::kFtrace);
114 
115   auto new_slice_idx = AddRawEventAndStartSlice(
116       cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio, prev_state,
117       next_utid, next_pid, next_comm_id, next_prio);
118 
119   // Finally, update the info for the next sched switch on this CPU.
120   pending_sched->pending_slice_storage_idx = new_slice_idx;
121   pending_sched->last_pid = next_pid;
122   pending_sched->last_utid = next_utid;
123   pending_sched->last_prio = next_prio;
124 }
125 
PushSchedSwitchCompact(uint32_t cpu,int64_t ts,int64_t prev_state,uint32_t next_pid,int32_t next_prio,StringId next_comm_id)126 void SchedEventTracker::PushSchedSwitchCompact(uint32_t cpu,
127                                                int64_t ts,
128                                                int64_t prev_state,
129                                                uint32_t next_pid,
130                                                int32_t next_prio,
131                                                StringId next_comm_id) {
132   // At this stage all events should be globally timestamp ordered.
133   if (ts < context_->event_tracker->max_timestamp()) {
134     PERFETTO_ELOG(
135         "sched_switch event out of order by %.4f ms, skipping",
136         static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
137             1e6);
138     context_->storage->IncrementStats(stats::sched_switch_out_of_order);
139     return;
140   }
141   context_->event_tracker->UpdateMaxTimestamp(ts);
142 
143   UniqueTid next_utid = context_->process_tracker->UpdateThreadName(
144       next_pid, next_comm_id, ThreadNamePriority::kFtrace);
145 
146   auto* pending_sched = PendingSchedByCPU(cpu);
147 
148   // If we're processing the first compact event for this cpu, don't start a
149   // slice since we're missing the "prev_*" fields. The successive events will
150   // create slices as normal, but the first per-cpu switch is effectively
151   // discarded.
152   if (pending_sched->last_utid == std::numeric_limits<UniqueTid>::max()) {
153     context_->storage->IncrementStats(stats::compact_sched_switch_skipped);
154 
155     pending_sched->last_pid = next_pid;
156     pending_sched->last_utid = next_utid;
157     pending_sched->last_prio = next_prio;
158     // Note: no pending slice, so leave |pending_slice_storage_idx| in its
159     // invalid state.
160     return;
161   }
162 
163   // Close the pending slice if any (we won't have one when processing the first
164   // two compact events for a given cpu).
165   uint32_t pending_slice_idx = pending_sched->pending_slice_storage_idx;
166   if (pending_slice_idx < std::numeric_limits<uint32_t>::max())
167     ClosePendingSlice(pending_slice_idx, ts, prev_state);
168 
169   // Use the previous event's values to infer this event's "prev_*" fields.
170   // There are edge cases, but this assumption should still produce sensible
171   // results in the absence of data loss.
172   UniqueTid prev_utid = pending_sched->last_utid;
173   uint32_t prev_pid = pending_sched->last_pid;
174   int32_t prev_prio = pending_sched->last_prio;
175 
176   // Do a fresh task name lookup in case it was updated by a task_rename while
177   // scheduled.
178   StringId prev_comm_id =
179       context_->storage->thread_table().name()[prev_utid].value_or(
180           kNullStringId);
181 
182   auto new_slice_idx = AddRawEventAndStartSlice(
183       cpu, ts, prev_utid, prev_pid, prev_comm_id, prev_prio, prev_state,
184       next_utid, next_pid, next_comm_id, next_prio);
185 
186   // Finally, update the info for the next sched switch on this CPU.
187   pending_sched->pending_slice_storage_idx = new_slice_idx;
188   pending_sched->last_pid = next_pid;
189   pending_sched->last_utid = next_utid;
190   pending_sched->last_prio = next_prio;
191 }
192 
193 PERFETTO_ALWAYS_INLINE
AddRawEventAndStartSlice(uint32_t cpu,int64_t ts,UniqueTid prev_utid,uint32_t prev_pid,StringId prev_comm_id,int32_t prev_prio,int64_t prev_state,UniqueTid next_utid,uint32_t next_pid,StringId next_comm_id,int32_t next_prio)194 uint32_t SchedEventTracker::AddRawEventAndStartSlice(uint32_t cpu,
195                                                      int64_t ts,
196                                                      UniqueTid prev_utid,
197                                                      uint32_t prev_pid,
198                                                      StringId prev_comm_id,
199                                                      int32_t prev_prio,
200                                                      int64_t prev_state,
201                                                      UniqueTid next_utid,
202                                                      uint32_t next_pid,
203                                                      StringId next_comm_id,
204                                                      int32_t next_prio) {
205   if (PERFETTO_LIKELY(context_->config.ingest_ftrace_in_raw_table)) {
206     // Push the raw event - this is done as the raw ftrace event codepath does
207     // not insert sched_switch.
208     RawId id = context_->storage->mutable_raw_table()
209                    ->Insert({ts, sched_switch_id_, cpu, prev_utid})
210                    .id;
211 
212     // Note: this ordering is important. The events should be pushed in the same
213     // order as the order of fields in the proto; this is used by the raw table
214     // to index these events using the field ids.
215     using SS = protos::pbzero::SchedSwitchFtraceEvent;
216 
217     auto inserter = context_->args_tracker->AddArgsTo(id);
218     auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
219       StringId key = sched_switch_field_ids_[static_cast<size_t>(field_num)];
220       inserter.AddArg(key, var);
221     };
222     add_raw_arg(SS::kPrevCommFieldNumber, Variadic::String(prev_comm_id));
223     add_raw_arg(SS::kPrevPidFieldNumber, Variadic::Integer(prev_pid));
224     add_raw_arg(SS::kPrevPrioFieldNumber, Variadic::Integer(prev_prio));
225     add_raw_arg(SS::kPrevStateFieldNumber, Variadic::Integer(prev_state));
226     add_raw_arg(SS::kNextCommFieldNumber, Variadic::String(next_comm_id));
227     add_raw_arg(SS::kNextPidFieldNumber, Variadic::Integer(next_pid));
228     add_raw_arg(SS::kNextPrioFieldNumber, Variadic::Integer(next_prio));
229   }
230 
231   // Open a new scheduling slice, corresponding to the task that was
232   // just switched to.
233   auto* sched = context_->storage->mutable_sched_slice_table();
234   auto row_and_id = sched->Insert(
235       {ts, 0 /* duration */, cpu, next_utid, kNullStringId, next_prio});
236   SchedId sched_id = row_and_id.id;
237   return *sched->id().IndexOf(sched_id);
238 }
239 
240 PERFETTO_ALWAYS_INLINE
ClosePendingSlice(uint32_t pending_slice_idx,int64_t ts,int64_t prev_state)241 void SchedEventTracker::ClosePendingSlice(uint32_t pending_slice_idx,
242                                           int64_t ts,
243                                           int64_t prev_state) {
244   auto* slices = context_->storage->mutable_sched_slice_table();
245 
246   int64_t duration = ts - slices->ts()[pending_slice_idx];
247   slices->mutable_dur()->Set(pending_slice_idx, duration);
248 
249   // We store the state as a uint16 as we only consider values up to 2048
250   // when unpacking the information inside; this allows savings of 48 bits
251   // per slice.
252   auto kernel_version =
253       SystemInfoTracker::GetOrCreate(context_)->GetKernelVersion();
254   auto task_state = ftrace_utils::TaskState(static_cast<uint16_t>(prev_state),
255                                             kernel_version);
256   if (!task_state.is_valid()) {
257     context_->storage->IncrementStats(stats::task_state_invalid);
258   }
259   auto id = task_state.is_valid()
260                 ? context_->storage->InternString(task_state.ToString().data())
261                 : kNullStringId;
262   slices->mutable_end_state()->Set(pending_slice_idx, id);
263 }
264 
265 // Processes a sched_waking that was decoded from a compact representation,
266 // adding to the raw and instants tables.
PushSchedWakingCompact(uint32_t cpu,int64_t ts,uint32_t wakee_pid,int32_t target_cpu,int32_t prio,StringId comm_id)267 void SchedEventTracker::PushSchedWakingCompact(uint32_t cpu,
268                                                int64_t ts,
269                                                uint32_t wakee_pid,
270                                                int32_t target_cpu,
271                                                int32_t prio,
272                                                StringId comm_id) {
273   // At this stage all events should be globally timestamp ordered.
274   if (ts < context_->event_tracker->max_timestamp()) {
275     PERFETTO_ELOG(
276         "sched_waking event out of order by %.4f ms, skipping",
277         static_cast<double>(context_->event_tracker->max_timestamp() - ts) /
278             1e6);
279     context_->storage->IncrementStats(stats::sched_waking_out_of_order);
280     return;
281   }
282   context_->event_tracker->UpdateMaxTimestamp(ts);
283 
284   // We infer the task that emitted the event (i.e. common_pid) from the
285   // scheduling slices. Drop the event if we haven't seen any sched_switch
286   // events for this cpu yet.
287   // Note that if sched_switch wasn't enabled, we will have to skip all
288   // compact waking events.
289   auto* pending_sched = PendingSchedByCPU(cpu);
290   if (pending_sched->last_utid == std::numeric_limits<UniqueTid>::max()) {
291     context_->storage->IncrementStats(stats::compact_sched_waking_skipped);
292     return;
293   }
294   auto curr_utid = pending_sched->last_utid;
295 
296   if (PERFETTO_LIKELY(context_->config.ingest_ftrace_in_raw_table)) {
297     // Add an entry to the raw table.
298     RawId id = context_->storage->mutable_raw_table()
299                    ->Insert({ts, sched_waking_id_, cpu, curr_utid})
300                    .id;
301 
302     // "success" is hardcoded as always 1 by the kernel, with a TODO to remove
303     // it.
304     static constexpr int32_t kHardcodedSuccess = 1;
305 
306     using SW = protos::pbzero::SchedWakingFtraceEvent;
307     auto inserter = context_->args_tracker->AddArgsTo(id);
308     auto add_raw_arg = [this, &inserter](int field_num, Variadic var) {
309       StringId key = sched_waking_field_ids_[static_cast<size_t>(field_num)];
310       inserter.AddArg(key, var);
311     };
312     add_raw_arg(SW::kCommFieldNumber, Variadic::String(comm_id));
313     add_raw_arg(SW::kPidFieldNumber, Variadic::Integer(wakee_pid));
314     add_raw_arg(SW::kPrioFieldNumber, Variadic::Integer(prio));
315     add_raw_arg(SW::kSuccessFieldNumber, Variadic::Integer(kHardcodedSuccess));
316     add_raw_arg(SW::kTargetCpuFieldNumber, Variadic::Integer(target_cpu));
317   }
318 
319   // Add a waking entry to the instants.
320   auto wakee_utid = context_->process_tracker->GetOrCreateThread(wakee_pid);
321   auto* instants = context_->storage->mutable_instant_table();
322   auto ref_type_id = context_->storage->InternString(
323       GetRefTypeStringMap()[static_cast<size_t>(RefType::kRefUtid)]);
324   tables::InstantTable::Id id =
325       instants->Insert({ts, sched_waking_id_, wakee_utid, ref_type_id}).id;
326 
327   context_->args_tracker->AddArgsTo(id).AddArg(
328       waker_utid_id_, Variadic::UnsignedInteger(curr_utid));
329 }
330 
FlushPendingEvents()331 void SchedEventTracker::FlushPendingEvents() {
332   // TODO(lalitm): the day this method is called before end of trace, don't
333   // flush the sched events as they will probably be pushed in the next round
334   // of ftrace events.
335   int64_t end_ts = context_->storage->GetTraceTimestampBoundsNs().second;
336   auto* slices = context_->storage->mutable_sched_slice_table();
337   for (const auto& pending_sched : pending_sched_per_cpu_) {
338     uint32_t row = pending_sched.pending_slice_storage_idx;
339     if (row == std::numeric_limits<uint32_t>::max())
340       continue;
341 
342     int64_t duration = end_ts - slices->ts()[row];
343     slices->mutable_dur()->Set(row, duration);
344 
345     auto state = ftrace_utils::TaskState(ftrace_utils::TaskState::kRunnable);
346     auto id = context_->storage->InternString(state.ToString().data());
347     slices->mutable_end_state()->Set(row, id);
348   }
349 
350   // Re-initialize the pending_sched_per_cpu_ vector with default values, we do
351   // this instead of calling .clear() to avoid having to frequently resize the
352   // vector.
353   std::fill(pending_sched_per_cpu_.begin(), pending_sched_per_cpu_.end(),
354             PendingSchedInfo{});
355 }
356 
357 }  // namespace trace_processor
358 }  // namespace perfetto
359