• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACED_PROBES_FTRACE_CPU_READER_H_
18 #define SRC_TRACED_PROBES_FTRACE_CPU_READER_H_
19 
20 #include <stdint.h>
21 #include <string.h>
22 
23 #include <array>
24 #include <atomic>
25 #include <memory>
26 #include <optional>
27 #include <set>
28 #include <thread>
29 
30 #include "perfetto/ext/base/paged_memory.h"
31 #include "perfetto/ext/base/pipe.h"
32 #include "perfetto/ext/base/scoped_file.h"
33 #include "perfetto/ext/base/thread_checker.h"
34 #include "perfetto/ext/traced/data_source_types.h"
35 #include "perfetto/ext/tracing/core/trace_writer.h"
36 #include "perfetto/protozero/message.h"
37 #include "perfetto/protozero/message_handle.h"
38 #include "protos/perfetto/trace/interned_data/interned_data.pbzero.h"
39 #include "protos/perfetto/trace/profiling/profile_common.pbzero.h"
40 #include "protos/perfetto/trace/trace_packet.pbzero.h"
41 #include "src/kallsyms/kernel_symbol_map.h"
42 #include "src/kallsyms/lazy_kernel_symbolizer.h"
43 #include "src/traced/probes/ftrace/compact_sched.h"
44 #include "src/traced/probes/ftrace/ftrace_metadata.h"
45 #include "src/traced/probes/ftrace/proto_translation_table.h"
46 
47 namespace perfetto {
48 
49 class FtraceDataSource;
50 class LazyKernelSymbolizer;
51 class ProtoTranslationTable;
52 struct FtraceClockSnapshot;
53 struct FtraceDataSourceConfig;
54 
55 namespace protos {
56 namespace pbzero {
57 class FtraceEventBundle;
58 enum FtraceClock : int32_t;
59 }  // namespace pbzero
60 }  // namespace protos
61 
62 // Reads raw ftrace data for a cpu, parses it, and writes it into the perfetto
63 // tracing buffers.
64 class CpuReader {
65  public:
66   using FtraceEventBundle = protos::pbzero::FtraceEventBundle;
67 
68   // Helper class to generate `TracePacket`s when needed. Public for testing.
69   class Bundler {
70    public:
Bundler(TraceWriter * trace_writer,FtraceMetadata * metadata,LazyKernelSymbolizer * symbolizer,size_t cpu,const FtraceClockSnapshot * ftrace_clock_snapshot,protos::pbzero::FtraceClock ftrace_clock,bool compact_sched_enabled)71     Bundler(TraceWriter* trace_writer,
72             FtraceMetadata* metadata,
73             LazyKernelSymbolizer* symbolizer,
74             size_t cpu,
75             const FtraceClockSnapshot* ftrace_clock_snapshot,
76             protos::pbzero::FtraceClock ftrace_clock,
77             bool compact_sched_enabled)
78         : trace_writer_(trace_writer),
79           metadata_(metadata),
80           symbolizer_(symbolizer),
81           cpu_(cpu),
82           ftrace_clock_snapshot_(ftrace_clock_snapshot),
83           ftrace_clock_(ftrace_clock),
84           compact_sched_enabled_(compact_sched_enabled) {}
85 
~Bundler()86     ~Bundler() { FinalizeAndRunSymbolizer(); }
87 
GetOrCreateBundle()88     protos::pbzero::FtraceEventBundle* GetOrCreateBundle() {
89       if (!bundle_) {
90         StartNewPacket(false);
91       }
92       return bundle_;
93     }
94 
95     // Forces the creation of a new TracePacket.
96     void StartNewPacket(bool lost_events);
97 
98     // This function is called after the contents of a FtraceBundle are written.
99     void FinalizeAndRunSymbolizer();
100 
compact_sched_buffer()101     CompactSchedBuffer* compact_sched_buffer() {
102       // FinalizeAndRunSymbolizer will only process the compact_sched_buffer_ if
103       // there is an open bundle.
104       GetOrCreateBundle();
105       return &compact_sched_buffer_;
106     }
107 
108    private:
109     TraceWriter* const trace_writer_;         // Never nullptr.
110     FtraceMetadata* const metadata_;          // Never nullptr.
111     LazyKernelSymbolizer* const symbolizer_;  // Can be nullptr.
112     const size_t cpu_;
113     const FtraceClockSnapshot* const ftrace_clock_snapshot_;
114     protos::pbzero::FtraceClock const ftrace_clock_;
115     const bool compact_sched_enabled_;
116 
117     TraceWriter::TracePacketHandle packet_;
118     protos::pbzero::FtraceEventBundle* bundle_ = nullptr;
119     // Allocate the buffer for compact scheduler events (which will be unused if
120     // the compact option isn't enabled).
121     CompactSchedBuffer compact_sched_buffer_;
122   };
123 
124   struct PageHeader {
125     uint64_t timestamp;
126     uint64_t size;
127     bool lost_events;
128   };
129 
130   CpuReader(size_t cpu,
131             const ProtoTranslationTable* table,
132             LazyKernelSymbolizer* symbolizer,
133             const FtraceClockSnapshot*,
134             base::ScopedFile trace_fd);
135   ~CpuReader();
136 
137   // Reads and parses all ftrace data for this cpu (in batches), until we catch
138   // up to the writer, or hit |max_pages|. Returns number of pages read.
139   size_t ReadCycle(uint8_t* parsing_buf,
140                    size_t parsing_buf_size_pages,
141                    size_t max_pages,
142                    const std::set<FtraceDataSource*>& started_data_sources);
143 
144   template <typename T>
ReadAndAdvance(const uint8_t ** ptr,const uint8_t * end,T * out)145   static bool ReadAndAdvance(const uint8_t** ptr, const uint8_t* end, T* out) {
146     if (*ptr > end - sizeof(T))
147       return false;
148     memcpy(reinterpret_cast<void*>(out), reinterpret_cast<const void*>(*ptr),
149            sizeof(T));
150     *ptr += sizeof(T);
151     return true;
152   }
153 
154   // Caller must do the bounds check:
155   // [start + offset, start + offset + sizeof(T))
156   // Returns the raw value not the varint.
157   template <typename T>
ReadIntoVarInt(const uint8_t * start,uint32_t field_id,protozero::Message * out)158   static T ReadIntoVarInt(const uint8_t* start,
159                           uint32_t field_id,
160                           protozero::Message* out) {
161     T t;
162     memcpy(&t, reinterpret_cast<const void*>(start), sizeof(T));
163     out->AppendVarInt<T>(field_id, t);
164     return t;
165   }
166 
167   template <typename T>
ReadInode(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)168   static void ReadInode(const uint8_t* start,
169                         uint32_t field_id,
170                         protozero::Message* out,
171                         FtraceMetadata* metadata) {
172     T t = ReadIntoVarInt<T>(start, field_id, out);
173     metadata->AddInode(static_cast<Inode>(t));
174   }
175 
176   template <typename T>
ReadDevId(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)177   static void ReadDevId(const uint8_t* start,
178                         uint32_t field_id,
179                         protozero::Message* out,
180                         FtraceMetadata* metadata) {
181     T t;
182     memcpy(&t, reinterpret_cast<const void*>(start), sizeof(T));
183     BlockDeviceID dev_id = TranslateBlockDeviceIDToUserspace<T>(t);
184     out->AppendVarInt<BlockDeviceID>(field_id, dev_id);
185     metadata->AddDevice(dev_id);
186   }
187 
188   template <typename T>
ReadSymbolAddr(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)189   static void ReadSymbolAddr(const uint8_t* start,
190                              uint32_t field_id,
191                              protozero::Message* out,
192                              FtraceMetadata* metadata) {
193     // ReadSymbolAddr is a bit special. In order to not disclose KASLR layout
194     // via traces, we put in the trace only a mangled address (which really is
195     // the insertion order into metadata.kernel_addrs). We don't care about the
196     // actual symbol addesses. We just need to match that against the symbol
197     // name in the names in the FtraceEventBundle.KernelSymbols.
198     T full_addr;
199     memcpy(&full_addr, reinterpret_cast<const void*>(start), sizeof(T));
200     uint32_t interned_index = metadata->AddSymbolAddr(full_addr);
201     out->AppendVarInt(field_id, interned_index);
202   }
203 
ReadPid(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)204   static void ReadPid(const uint8_t* start,
205                       uint32_t field_id,
206                       protozero::Message* out,
207                       FtraceMetadata* metadata) {
208     int32_t pid = ReadIntoVarInt<int32_t>(start, field_id, out);
209     metadata->AddPid(pid);
210   }
211 
ReadCommonPid(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)212   static void ReadCommonPid(const uint8_t* start,
213                             uint32_t field_id,
214                             protozero::Message* out,
215                             FtraceMetadata* metadata) {
216     int32_t pid = ReadIntoVarInt<int32_t>(start, field_id, out);
217     metadata->AddCommonPid(pid);
218   }
219 
220   // Internally the kernel stores device ids in a different layout to that
221   // exposed to userspace via stat etc. There's no userspace function to convert
222   // between the formats so we have to do it ourselves.
223   template <typename T>
TranslateBlockDeviceIDToUserspace(T kernel_dev)224   static BlockDeviceID TranslateBlockDeviceIDToUserspace(T kernel_dev) {
225     // Provided search index s_dev from
226     // https://github.com/torvalds/linux/blob/v4.12/include/linux/fs.h#L404
227     // Convert to user space id using
228     // https://github.com/torvalds/linux/blob/v4.12/include/linux/kdev_t.h#L10
229     // TODO(azappone): see if this is the same on all platforms
230     uint64_t maj = static_cast<uint64_t>(kernel_dev) >> 20;
231     uint64_t min = static_cast<uint64_t>(kernel_dev) & ((1U << 20) - 1);
232     return static_cast<BlockDeviceID>(  // From makedev()
233         ((maj & 0xfffff000ULL) << 32) | ((maj & 0xfffULL) << 8) |
234         ((min & 0xffffff00ULL) << 12) | ((min & 0xffULL)));
235   }
236 
237   // Returns a parsed representation of the given raw ftrace page's header.
238   static std::optional<CpuReader::PageHeader> ParsePageHeader(
239       const uint8_t** ptr,
240       uint16_t page_header_size_len);
241 
242   // Parse the payload of a raw ftrace page, and write the events as protos
243   // into the provided bundle (and/or compact buffer).
244   // |table| contains the mix of compile time (e.g. proto field ids) and
245   // run time (e.g. field offset and size) information necessary to do this.
246   // The table is initialized once at start time by the ftrace controller
247   // which passes it to the CpuReader which passes it here.
248   // The caller is responsible for validating that the page_header->size stays
249   // within the current page.
250   static size_t ParsePagePayload(const uint8_t* start_of_payload,
251                                  const PageHeader* page_header,
252                                  const ProtoTranslationTable* table,
253                                  const FtraceDataSourceConfig* ds_config,
254                                  Bundler* bundler,
255                                  FtraceMetadata* metadata);
256 
257   // Parse a single raw ftrace event beginning at |start| and ending at |end|
258   // and write it into the provided bundle as a proto.
259   // |table| contains the mix of compile time (e.g. proto field ids) and
260   // run time (e.g. field offset and size) information necessary to do this.
261   // The table is initialized once at start time by the ftrace controller
262   // which passes it to the CpuReader which passes it to ParsePage which
263   // passes it here.
264   static bool ParseEvent(uint16_t ftrace_event_id,
265                          const uint8_t* start,
266                          const uint8_t* end,
267                          const ProtoTranslationTable* table,
268                          const FtraceDataSourceConfig* ds_config,
269                          protozero::Message* message,
270                          FtraceMetadata* metadata);
271 
272   static bool ParseField(const Field& field,
273                          const uint8_t* start,
274                          const uint8_t* end,
275                          const ProtoTranslationTable* table,
276                          protozero::Message* message,
277                          FtraceMetadata* metadata);
278 
279   // Parse a sys_enter event according to the pre-validated expected format
280   static bool ParseSysEnter(const Event& info,
281                             const uint8_t* start,
282                             const uint8_t* end,
283                             protozero::Message* message,
284                             FtraceMetadata* metadata);
285 
286   // Parse a sys_exit event according to the pre-validated expected format
287   static bool ParseSysExit(const Event& info,
288                            const uint8_t* start,
289                            const uint8_t* end,
290                            const FtraceDataSourceConfig* ds_config,
291                            protozero::Message* message,
292                            FtraceMetadata* metadata);
293 
294   // Parse a sched_switch event according to pre-validated format, and buffer
295   // the individual fields in the given compact encoding batch.
296   static void ParseSchedSwitchCompact(const uint8_t* start,
297                                       uint64_t timestamp,
298                                       const CompactSchedSwitchFormat* format,
299                                       CompactSchedBuffer* compact_buf,
300                                       FtraceMetadata* metadata);
301 
302   // Parse a sched_waking event according to pre-validated format, and buffer
303   // the individual fields in the given compact encoding batch.
304   static void ParseSchedWakingCompact(const uint8_t* start,
305                                       uint64_t timestamp,
306                                       const CompactSchedWakingFormat* format,
307                                       CompactSchedBuffer* compact_buf,
308                                       FtraceMetadata* metadata);
309 
310   // Parses & encodes the given range of contiguous tracing pages. Called by
311   // |ReadAndProcessBatch| for each active data source.
312   //
313   // Returns the number of correctly processed pages. If the return value is
314   // equal to |pages_read|, there was no error. Otherwise, the return value
315   // points to the first page that contains an error.
316   //
317   // public and static for testing
318   static size_t ProcessPagesForDataSource(
319       TraceWriter* trace_writer,
320       FtraceMetadata* metadata,
321       size_t cpu,
322       const FtraceDataSourceConfig* ds_config,
323       const uint8_t* parsing_buf,
324       const size_t pages_read,
325       const ProtoTranslationTable* table,
326       LazyKernelSymbolizer* symbolizer,
327       const FtraceClockSnapshot*,
328       protos::pbzero::FtraceClock);
329 
set_ftrace_clock(protos::pbzero::FtraceClock clock)330   void set_ftrace_clock(protos::pbzero::FtraceClock clock) {
331     ftrace_clock_ = clock;
332   }
333 
334  private:
335   CpuReader(const CpuReader&) = delete;
336   CpuReader& operator=(const CpuReader&) = delete;
337 
338   // Reads at most |max_pages| of ftrace data, parses it, and writes it
339   // into |started_data_sources|. Returns number of pages read.
340   // See comment on ftrace_controller.cc:kMaxParsingWorkingSetPages for
341   // rationale behind the batching.
342   size_t ReadAndProcessBatch(
343       uint8_t* parsing_buf,
344       size_t max_pages,
345       bool first_batch_in_cycle,
346       const std::set<FtraceDataSource*>& started_data_sources);
347 
348   const size_t cpu_;
349   const ProtoTranslationTable* const table_;
350   LazyKernelSymbolizer* const symbolizer_;
351   const FtraceClockSnapshot* const ftrace_clock_snapshot_;
352   base::ScopedFile trace_fd_;
353   protos::pbzero::FtraceClock ftrace_clock_{};
354 };
355 
356 }  // namespace perfetto
357 
358 #endif  // SRC_TRACED_PROBES_FTRACE_CPU_READER_H_
359