• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACED_PROBES_FTRACE_CPU_READER_H_
18 #define SRC_TRACED_PROBES_FTRACE_CPU_READER_H_
19 
20 #include <string.h>
21 #include <cstdint>
22 
23 #include <optional>
24 #include <set>
25 
26 #include "perfetto/ext/base/paged_memory.h"
27 #include "perfetto/ext/base/scoped_file.h"
28 #include "perfetto/ext/base/utils.h"
29 #include "perfetto/ext/traced/data_source_types.h"
30 #include "perfetto/ext/tracing/core/trace_writer.h"
31 #include "perfetto/protozero/message.h"
32 #include "perfetto/protozero/message_handle.h"
33 #include "src/traced/probes/ftrace/compact_sched.h"
34 #include "src/traced/probes/ftrace/ftrace_metadata.h"
35 
36 #include "protos/perfetto/trace/trace_packet.pbzero.h"
37 
38 namespace perfetto {
39 
40 class FtraceDataSource;
41 class LazyKernelSymbolizer;
42 class ProtoTranslationTable;
43 struct FtraceClockSnapshot;
44 struct FtraceDataSourceConfig;
45 
46 namespace protos {
47 namespace pbzero {
48 class FtraceEventBundle;
49 enum FtraceClock : int32_t;
50 enum FtraceParseStatus : int32_t;
51 }  // namespace pbzero
52 }  // namespace protos
53 
54 // Reads raw ftrace data for a cpu, parses it, and writes it into the perfetto
55 // tracing buffers.
56 class CpuReader {
57  public:
58   // Buffers used when parsing a chunk of ftrace data, allocated by
59   // FtraceController and repeatedly reused by all CpuReaders:
60   // * paged memory into which we read raw ftrace data.
61   // * buffers to accumulate and emit scheduling data in a structure-of-arrays
62   //   format (packed proto fields).
63   class ParsingBuffers {
64    public:
AllocateIfNeeded()65     void AllocateIfNeeded() {
66       // PagedMemory stays valid as long as it was allocated once.
67       if (!ftrace_data_.IsValid()) {
68         ftrace_data_ = base::PagedMemory::Allocate(base::GetSysPageSize() *
69                                                    kFtraceDataBufSizePages);
70       }
71       // Heap-allocated buffer gets freed and reallocated.
72       if (!compact_sched_) {
73         compact_sched_ = std::make_unique<CompactSchedBuffer>();
74       }
75     }
76 
Release()77     void Release() {
78       if (ftrace_data_.IsValid()) {
79         ftrace_data_.AdviseDontNeed(ftrace_data_.Get(), ftrace_data_.size());
80       }
81       compact_sched_.reset();
82     }
83 
84    private:
85     friend class CpuReader;
86     // When reading and parsing data for a particular cpu, we do it in batches
87     // of this many pages. In other words, we'll read up to
88     // |kFtraceDataBufSizePages| into memory, parse them, and then repeat if we
89     // still haven't caught up to the writer.
90     static constexpr size_t kFtraceDataBufSizePages = 32;
91 
ftrace_data_buf()92     uint8_t* ftrace_data_buf() const {
93       return reinterpret_cast<uint8_t*>(ftrace_data_.Get());
94     }
ftrace_data_buf_pages()95     size_t ftrace_data_buf_pages() const {
96       PERFETTO_DCHECK(ftrace_data_.size() ==
97                       base::GetSysPageSize() * kFtraceDataBufSizePages);
98       return kFtraceDataBufSizePages;
99     }
compact_sched_buf()100     CompactSchedBuffer* compact_sched_buf() const {
101       return compact_sched_.get();
102     }
103 
104     base::PagedMemory ftrace_data_;
105     std::unique_ptr<CompactSchedBuffer> compact_sched_;
106   };
107 
108   // Helper class to generate `TracePacket`s when needed. Public for testing.
109   class Bundler {
110    public:
Bundler(TraceWriter * trace_writer,FtraceMetadata * metadata,LazyKernelSymbolizer * symbolizer,size_t cpu,const FtraceClockSnapshot * ftrace_clock_snapshot,protos::pbzero::FtraceClock ftrace_clock,CompactSchedBuffer * compact_sched_buf,bool compact_sched_enabled,uint64_t last_read_event_ts)111     Bundler(TraceWriter* trace_writer,
112             FtraceMetadata* metadata,
113             LazyKernelSymbolizer* symbolizer,
114             size_t cpu,
115             const FtraceClockSnapshot* ftrace_clock_snapshot,
116             protos::pbzero::FtraceClock ftrace_clock,
117             CompactSchedBuffer* compact_sched_buf,
118             bool compact_sched_enabled,
119             uint64_t last_read_event_ts)
120         : trace_writer_(trace_writer),
121           metadata_(metadata),
122           symbolizer_(symbolizer),
123           cpu_(cpu),
124           ftrace_clock_snapshot_(ftrace_clock_snapshot),
125           ftrace_clock_(ftrace_clock),
126           compact_sched_enabled_(compact_sched_enabled),
127           compact_sched_buf_(compact_sched_buf),
128           initial_last_read_event_ts_(last_read_event_ts) {
129       if (compact_sched_enabled_)
130         compact_sched_buf_->Reset();
131     }
132 
~Bundler()133     ~Bundler() { FinalizeAndRunSymbolizer(); }
134 
GetOrCreateBundle()135     protos::pbzero::FtraceEventBundle* GetOrCreateBundle() {
136       if (!bundle_) {
137         StartNewPacket(false, initial_last_read_event_ts_);
138       }
139       return bundle_;
140     }
141 
142     // Forces the creation of a new TracePacket.
143     void StartNewPacket(bool lost_events, uint64_t last_read_event_timestamp);
144 
145     // This function is called after the contents of a FtraceBundle are written.
146     void FinalizeAndRunSymbolizer();
147 
compact_sched_buf()148     CompactSchedBuffer* compact_sched_buf() {
149       // FinalizeAndRunSymbolizer will only process the compact_sched_buf_ if
150       // there is an open bundle.
151       GetOrCreateBundle();
152       return compact_sched_buf_;
153     }
154 
155    private:
156     TraceWriter* const trace_writer_;         // Never nullptr.
157     FtraceMetadata* const metadata_;          // Never nullptr.
158     LazyKernelSymbolizer* const symbolizer_;  // Can be nullptr.
159     const size_t cpu_;
160     const FtraceClockSnapshot* const ftrace_clock_snapshot_;
161     protos::pbzero::FtraceClock const ftrace_clock_;
162     const bool compact_sched_enabled_;
163     CompactSchedBuffer* const compact_sched_buf_;
164     uint64_t initial_last_read_event_ts_;
165 
166     TraceWriter::TracePacketHandle packet_;
167     protos::pbzero::FtraceEventBundle* bundle_ = nullptr;
168   };
169 
170   struct PageHeader {
171     uint64_t timestamp;
172     uint64_t size;
173     bool lost_events;
174   };
175 
176   CpuReader(size_t cpu,
177             base::ScopedFile trace_fd,
178             const ProtoTranslationTable* table,
179             LazyKernelSymbolizer* symbolizer,
180             protos::pbzero::FtraceClock ftrace_clock,
181             const FtraceClockSnapshot* ftrace_clock_snapshot);
182   ~CpuReader();
183 
184   // move-only
185   CpuReader(const CpuReader&) = delete;
186   CpuReader& operator=(const CpuReader&) = delete;
187   CpuReader(CpuReader&&) = default;
188   CpuReader& operator=(CpuReader&&) = default;
189 
190   // Reads and parses all ftrace data for this cpu (in batches), until we catch
191   // up to the writer, or hit |max_pages|. Returns number of pages read.
192   size_t ReadCycle(ParsingBuffers* parsing_bufs,
193                    size_t max_pages,
194                    const std::set<FtraceDataSource*>& started_data_sources);
195 
196   template <typename T>
ReadAndAdvance(const uint8_t ** ptr,const uint8_t * end,T * out)197   static bool ReadAndAdvance(const uint8_t** ptr, const uint8_t* end, T* out) {
198     if (*ptr > end - sizeof(T))
199       return false;
200     memcpy(reinterpret_cast<void*>(out), reinterpret_cast<const void*>(*ptr),
201            sizeof(T));
202     *ptr += sizeof(T);
203     return true;
204   }
205 
206   // Caller must do the bounds check:
207   // [start + offset, start + offset + sizeof(T))
208   // Returns the raw value not the varint.
209   template <typename T>
ReadIntoVarInt(const uint8_t * start,uint32_t field_id,protozero::Message * out)210   static T ReadIntoVarInt(const uint8_t* start,
211                           uint32_t field_id,
212                           protozero::Message* out) {
213     T t;
214     memcpy(&t, reinterpret_cast<const void*>(start), sizeof(T));
215     out->AppendVarInt<T>(field_id, t);
216     return t;
217   }
218 
219   template <typename T>
ReadInode(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)220   static void ReadInode(const uint8_t* start,
221                         uint32_t field_id,
222                         protozero::Message* out,
223                         FtraceMetadata* metadata) {
224     T t = ReadIntoVarInt<T>(start, field_id, out);
225     metadata->AddInode(static_cast<Inode>(t));
226   }
227 
228   template <typename T>
ReadDevId(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)229   static void ReadDevId(const uint8_t* start,
230                         uint32_t field_id,
231                         protozero::Message* out,
232                         FtraceMetadata* metadata) {
233     T t;
234     memcpy(&t, reinterpret_cast<const void*>(start), sizeof(T));
235     BlockDeviceID dev_id = TranslateBlockDeviceIDToUserspace<T>(t);
236     out->AppendVarInt<BlockDeviceID>(field_id, dev_id);
237     metadata->AddDevice(dev_id);
238   }
239 
240   template <typename T>
ReadSymbolAddr(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)241   static void ReadSymbolAddr(const uint8_t* start,
242                              uint32_t field_id,
243                              protozero::Message* out,
244                              FtraceMetadata* metadata) {
245     // ReadSymbolAddr is a bit special. In order to not disclose KASLR layout
246     // via traces, we put in the trace only a mangled address (which really is
247     // the insertion order into metadata.kernel_addrs). We don't care about the
248     // actual symbol addesses. We just need to match that against the symbol
249     // name in the names in the FtraceEventBundle.KernelSymbols.
250     T full_addr;
251     memcpy(&full_addr, reinterpret_cast<const void*>(start), sizeof(T));
252     uint32_t interned_index = metadata->AddSymbolAddr(full_addr);
253     out->AppendVarInt(field_id, interned_index);
254   }
255 
ReadPid(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)256   static void ReadPid(const uint8_t* start,
257                       uint32_t field_id,
258                       protozero::Message* out,
259                       FtraceMetadata* metadata) {
260     int32_t pid = ReadIntoVarInt<int32_t>(start, field_id, out);
261     metadata->AddPid(pid);
262   }
263 
ReadCommonPid(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)264   static void ReadCommonPid(const uint8_t* start,
265                             uint32_t field_id,
266                             protozero::Message* out,
267                             FtraceMetadata* metadata) {
268     int32_t pid = ReadIntoVarInt<int32_t>(start, field_id, out);
269     metadata->AddCommonPid(pid);
270   }
271 
272   // Internally the kernel stores device ids in a different layout to that
273   // exposed to userspace via stat etc. There's no userspace function to convert
274   // between the formats so we have to do it ourselves.
275   template <typename T>
TranslateBlockDeviceIDToUserspace(T kernel_dev)276   static BlockDeviceID TranslateBlockDeviceIDToUserspace(T kernel_dev) {
277     // Provided search index s_dev from
278     // https://github.com/torvalds/linux/blob/v4.12/include/linux/fs.h#L404
279     // Convert to user space id using
280     // https://github.com/torvalds/linux/blob/v4.12/include/linux/kdev_t.h#L10
281     // TODO(azappone): see if this is the same on all platforms
282     uint64_t maj = static_cast<uint64_t>(kernel_dev) >> 20;
283     uint64_t min = static_cast<uint64_t>(kernel_dev) & ((1U << 20) - 1);
284     return static_cast<BlockDeviceID>(  // From makedev()
285         ((maj & 0xfffff000ULL) << 32) | ((maj & 0xfffULL) << 8) |
286         ((min & 0xffffff00ULL) << 12) | ((min & 0xffULL)));
287   }
288 
289   // Returns a parsed representation of the given raw ftrace page's header.
290   static std::optional<CpuReader::PageHeader> ParsePageHeader(
291       const uint8_t** ptr,
292       uint16_t page_header_size_len);
293 
294   // Parse the payload of a raw ftrace page, and write the events as protos
295   // into the provided bundle (and/or compact buffer).
296   // |table| contains the mix of compile time (e.g. proto field ids) and
297   // run time (e.g. field offset and size) information necessary to do this.
298   // The table is initialized once at start time by the ftrace controller
299   // which passes it to the CpuReader which passes it here.
300   // The caller is responsible for validating that the page_header->size stays
301   // within the current page.
302   static protos::pbzero::FtraceParseStatus ParsePagePayload(
303       const uint8_t* start_of_payload,
304       const PageHeader* page_header,
305       const ProtoTranslationTable* table,
306       const FtraceDataSourceConfig* ds_config,
307       Bundler* bundler,
308       FtraceMetadata* metadata,
309       uint64_t* last_read_event_ts);
310 
311   // Parse a single raw ftrace event beginning at |start| and ending at |end|
312   // and write it into the provided bundle as a proto.
313   // |table| contains the mix of compile time (e.g. proto field ids) and
314   // run time (e.g. field offset and size) information necessary to do this.
315   // The table is initialized once at start time by the ftrace controller
316   // which passes it to the CpuReader which passes it to ParsePage which
317   // passes it here.
318   static bool ParseEvent(uint16_t ftrace_event_id,
319                          const uint8_t* start,
320                          const uint8_t* end,
321                          const ProtoTranslationTable* table,
322                          const FtraceDataSourceConfig* ds_config,
323                          protozero::Message* message,
324                          FtraceMetadata* metadata);
325 
326   static bool ParseField(const Field& field,
327                          const uint8_t* start,
328                          const uint8_t* end,
329                          const ProtoTranslationTable* table,
330                          protozero::Message* message,
331                          FtraceMetadata* metadata);
332 
333   // Parse a sys_enter event according to the pre-validated expected format
334   static bool ParseSysEnter(const Event& info,
335                             const uint8_t* start,
336                             const uint8_t* end,
337                             protozero::Message* message,
338                             FtraceMetadata* metadata);
339 
340   // Parse a sys_exit event according to the pre-validated expected format
341   static bool ParseSysExit(const Event& info,
342                            const uint8_t* start,
343                            const uint8_t* end,
344                            const FtraceDataSourceConfig* ds_config,
345                            protozero::Message* message,
346                            FtraceMetadata* metadata);
347 
348   // Parse a sched_switch event according to pre-validated format, and buffer
349   // the individual fields in the given compact encoding batch.
350   static void ParseSchedSwitchCompact(const uint8_t* start,
351                                       uint64_t timestamp,
352                                       const CompactSchedSwitchFormat* format,
353                                       CompactSchedBuffer* compact_buf,
354                                       FtraceMetadata* metadata);
355 
356   // Parse a sched_waking event according to pre-validated format, and buffer
357   // the individual fields in the given compact encoding batch.
358   static void ParseSchedWakingCompact(const uint8_t* start,
359                                       uint64_t timestamp,
360                                       const CompactSchedWakingFormat* format,
361                                       CompactSchedBuffer* compact_buf,
362                                       FtraceMetadata* metadata);
363 
364   // Parses & encodes the given range of contiguous tracing pages. Called by
365   // |ReadAndProcessBatch| for each active data source.
366   //
367   // Returns true if all pages were parsed correctly. In case of parsing
368   // errors, they will be recorded in the FtraceEventBundle proto.
369   //
370   // public and static for testing
371   static bool ProcessPagesForDataSource(
372       TraceWriter* trace_writer,
373       FtraceMetadata* metadata,
374       size_t cpu,
375       const FtraceDataSourceConfig* ds_config,
376       base::FlatSet<protos::pbzero::FtraceParseStatus>* parse_errors,
377       uint64_t* last_read_event_ts,
378       const uint8_t* parsing_buf,
379       size_t pages_read,
380       CompactSchedBuffer* compact_sched_buf,
381       const ProtoTranslationTable* table,
382       LazyKernelSymbolizer* symbolizer,
383       const FtraceClockSnapshot* ftrace_clock_snapshot,
384       protos::pbzero::FtraceClock ftrace_clock);
385 
386   // For FtraceController, which manages poll callbacks on per-cpu buffer fds.
RawBufferFd()387   int RawBufferFd() const { return trace_fd_.get(); }
388 
389  private:
390   // Reads at most |max_pages| of ftrace data, parses it, and writes it
391   // into |started_data_sources|. Returns number of pages read.
392   // See comment on ftrace_controller.cc:kMaxParsingWorkingSetPages for
393   // rationale behind the batching.
394   size_t ReadAndProcessBatch(
395       uint8_t* parsing_buf,
396       size_t max_pages,
397       bool first_batch_in_cycle,
398       CompactSchedBuffer* compact_sched_buf,
399       const std::set<FtraceDataSource*>& started_data_sources);
400 
401   size_t cpu_;
402   const ProtoTranslationTable* table_;
403   LazyKernelSymbolizer* symbolizer_;
404   base::ScopedFile trace_fd_;
405   uint64_t last_read_event_ts_ = 0;
406   protos::pbzero::FtraceClock ftrace_clock_{};
407   const FtraceClockSnapshot* ftrace_clock_snapshot_;
408 };
409 
410 }  // namespace perfetto
411 
412 #endif  // SRC_TRACED_PROBES_FTRACE_CPU_READER_H_
413