1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACED_PROBES_FTRACE_CPU_READER_H_ 18 #define SRC_TRACED_PROBES_FTRACE_CPU_READER_H_ 19 20 #include <stdint.h> 21 #include <string.h> 22 23 #include <array> 24 #include <atomic> 25 #include <memory> 26 #include <optional> 27 #include <set> 28 #include <thread> 29 30 #include "perfetto/ext/base/paged_memory.h" 31 #include "perfetto/ext/base/pipe.h" 32 #include "perfetto/ext/base/scoped_file.h" 33 #include "perfetto/ext/base/thread_checker.h" 34 #include "perfetto/ext/traced/data_source_types.h" 35 #include "perfetto/ext/tracing/core/trace_writer.h" 36 #include "perfetto/protozero/message.h" 37 #include "perfetto/protozero/message_handle.h" 38 #include "protos/perfetto/trace/interned_data/interned_data.pbzero.h" 39 #include "protos/perfetto/trace/profiling/profile_common.pbzero.h" 40 #include "protos/perfetto/trace/trace_packet.pbzero.h" 41 #include "src/kallsyms/kernel_symbol_map.h" 42 #include "src/kallsyms/lazy_kernel_symbolizer.h" 43 #include "src/traced/probes/ftrace/compact_sched.h" 44 #include "src/traced/probes/ftrace/ftrace_metadata.h" 45 #include "src/traced/probes/ftrace/proto_translation_table.h" 46 47 namespace perfetto { 48 49 class FtraceDataSource; 50 class LazyKernelSymbolizer; 51 class ProtoTranslationTable; 52 struct FtraceClockSnapshot; 53 struct FtraceDataSourceConfig; 54 55 namespace protos { 56 namespace pbzero { 57 class FtraceEventBundle; 58 enum FtraceClock : int32_t; 59 } // namespace pbzero 60 } // namespace protos 61 62 // Reads raw ftrace data for a cpu, parses it, and writes it into the perfetto 63 // tracing buffers. 64 class CpuReader { 65 public: 66 using FtraceEventBundle = protos::pbzero::FtraceEventBundle; 67 68 // Helper class to generate `TracePacket`s when needed. Public for testing. 69 class Bundler { 70 public: Bundler(TraceWriter * trace_writer,FtraceMetadata * metadata,LazyKernelSymbolizer * symbolizer,size_t cpu,const FtraceClockSnapshot * ftrace_clock_snapshot,protos::pbzero::FtraceClock ftrace_clock,bool compact_sched_enabled)71 Bundler(TraceWriter* trace_writer, 72 FtraceMetadata* metadata, 73 LazyKernelSymbolizer* symbolizer, 74 size_t cpu, 75 const FtraceClockSnapshot* ftrace_clock_snapshot, 76 protos::pbzero::FtraceClock ftrace_clock, 77 bool compact_sched_enabled) 78 : trace_writer_(trace_writer), 79 metadata_(metadata), 80 symbolizer_(symbolizer), 81 cpu_(cpu), 82 ftrace_clock_snapshot_(ftrace_clock_snapshot), 83 ftrace_clock_(ftrace_clock), 84 compact_sched_enabled_(compact_sched_enabled) {} 85 ~Bundler()86 ~Bundler() { FinalizeAndRunSymbolizer(); } 87 GetOrCreateBundle()88 protos::pbzero::FtraceEventBundle* GetOrCreateBundle() { 89 if (!bundle_) { 90 StartNewPacket(false); 91 } 92 return bundle_; 93 } 94 95 // Forces the creation of a new TracePacket. 96 void StartNewPacket(bool lost_events); 97 98 // This function is called after the contents of a FtraceBundle are written. 99 void FinalizeAndRunSymbolizer(); 100 compact_sched_buffer()101 CompactSchedBuffer* compact_sched_buffer() { 102 // FinalizeAndRunSymbolizer will only process the compact_sched_buffer_ if 103 // there is an open bundle. 104 GetOrCreateBundle(); 105 return &compact_sched_buffer_; 106 } 107 108 private: 109 TraceWriter* const trace_writer_; // Never nullptr. 110 FtraceMetadata* const metadata_; // Never nullptr. 111 LazyKernelSymbolizer* const symbolizer_; // Can be nullptr. 112 const size_t cpu_; 113 const FtraceClockSnapshot* const ftrace_clock_snapshot_; 114 protos::pbzero::FtraceClock const ftrace_clock_; 115 const bool compact_sched_enabled_; 116 117 TraceWriter::TracePacketHandle packet_; 118 protos::pbzero::FtraceEventBundle* bundle_ = nullptr; 119 // Allocate the buffer for compact scheduler events (which will be unused if 120 // the compact option isn't enabled). 121 CompactSchedBuffer compact_sched_buffer_; 122 }; 123 124 struct PageHeader { 125 uint64_t timestamp; 126 uint64_t size; 127 bool lost_events; 128 }; 129 130 CpuReader(size_t cpu, 131 const ProtoTranslationTable* table, 132 LazyKernelSymbolizer* symbolizer, 133 const FtraceClockSnapshot*, 134 base::ScopedFile trace_fd); 135 ~CpuReader(); 136 137 // Reads and parses all ftrace data for this cpu (in batches), until we catch 138 // up to the writer, or hit |max_pages|. Returns number of pages read. 139 size_t ReadCycle(uint8_t* parsing_buf, 140 size_t parsing_buf_size_pages, 141 size_t max_pages, 142 const std::set<FtraceDataSource*>& started_data_sources); 143 144 template <typename T> ReadAndAdvance(const uint8_t ** ptr,const uint8_t * end,T * out)145 static bool ReadAndAdvance(const uint8_t** ptr, const uint8_t* end, T* out) { 146 if (*ptr > end - sizeof(T)) 147 return false; 148 memcpy(reinterpret_cast<void*>(out), reinterpret_cast<const void*>(*ptr), 149 sizeof(T)); 150 *ptr += sizeof(T); 151 return true; 152 } 153 154 // Caller must do the bounds check: 155 // [start + offset, start + offset + sizeof(T)) 156 // Returns the raw value not the varint. 157 template <typename T> ReadIntoVarInt(const uint8_t * start,uint32_t field_id,protozero::Message * out)158 static T ReadIntoVarInt(const uint8_t* start, 159 uint32_t field_id, 160 protozero::Message* out) { 161 T t; 162 memcpy(&t, reinterpret_cast<const void*>(start), sizeof(T)); 163 out->AppendVarInt<T>(field_id, t); 164 return t; 165 } 166 167 template <typename T> ReadInode(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)168 static void ReadInode(const uint8_t* start, 169 uint32_t field_id, 170 protozero::Message* out, 171 FtraceMetadata* metadata) { 172 T t = ReadIntoVarInt<T>(start, field_id, out); 173 metadata->AddInode(static_cast<Inode>(t)); 174 } 175 176 template <typename T> ReadDevId(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)177 static void ReadDevId(const uint8_t* start, 178 uint32_t field_id, 179 protozero::Message* out, 180 FtraceMetadata* metadata) { 181 T t; 182 memcpy(&t, reinterpret_cast<const void*>(start), sizeof(T)); 183 BlockDeviceID dev_id = TranslateBlockDeviceIDToUserspace<T>(t); 184 out->AppendVarInt<BlockDeviceID>(field_id, dev_id); 185 metadata->AddDevice(dev_id); 186 } 187 188 template <typename T> ReadSymbolAddr(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)189 static void ReadSymbolAddr(const uint8_t* start, 190 uint32_t field_id, 191 protozero::Message* out, 192 FtraceMetadata* metadata) { 193 // ReadSymbolAddr is a bit special. In order to not disclose KASLR layout 194 // via traces, we put in the trace only a mangled address (which really is 195 // the insertion order into metadata.kernel_addrs). We don't care about the 196 // actual symbol addesses. We just need to match that against the symbol 197 // name in the names in the FtraceEventBundle.KernelSymbols. 198 T full_addr; 199 memcpy(&full_addr, reinterpret_cast<const void*>(start), sizeof(T)); 200 uint32_t interned_index = metadata->AddSymbolAddr(full_addr); 201 out->AppendVarInt(field_id, interned_index); 202 } 203 ReadPid(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)204 static void ReadPid(const uint8_t* start, 205 uint32_t field_id, 206 protozero::Message* out, 207 FtraceMetadata* metadata) { 208 int32_t pid = ReadIntoVarInt<int32_t>(start, field_id, out); 209 metadata->AddPid(pid); 210 } 211 ReadCommonPid(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)212 static void ReadCommonPid(const uint8_t* start, 213 uint32_t field_id, 214 protozero::Message* out, 215 FtraceMetadata* metadata) { 216 int32_t pid = ReadIntoVarInt<int32_t>(start, field_id, out); 217 metadata->AddCommonPid(pid); 218 } 219 220 // Internally the kernel stores device ids in a different layout to that 221 // exposed to userspace via stat etc. There's no userspace function to convert 222 // between the formats so we have to do it ourselves. 223 template <typename T> TranslateBlockDeviceIDToUserspace(T kernel_dev)224 static BlockDeviceID TranslateBlockDeviceIDToUserspace(T kernel_dev) { 225 // Provided search index s_dev from 226 // https://github.com/torvalds/linux/blob/v4.12/include/linux/fs.h#L404 227 // Convert to user space id using 228 // https://github.com/torvalds/linux/blob/v4.12/include/linux/kdev_t.h#L10 229 // TODO(azappone): see if this is the same on all platforms 230 uint64_t maj = static_cast<uint64_t>(kernel_dev) >> 20; 231 uint64_t min = static_cast<uint64_t>(kernel_dev) & ((1U << 20) - 1); 232 return static_cast<BlockDeviceID>( // From makedev() 233 ((maj & 0xfffff000ULL) << 32) | ((maj & 0xfffULL) << 8) | 234 ((min & 0xffffff00ULL) << 12) | ((min & 0xffULL))); 235 } 236 237 // Returns a parsed representation of the given raw ftrace page's header. 238 static std::optional<CpuReader::PageHeader> ParsePageHeader( 239 const uint8_t** ptr, 240 uint16_t page_header_size_len); 241 242 // Parse the payload of a raw ftrace page, and write the events as protos 243 // into the provided bundle (and/or compact buffer). 244 // |table| contains the mix of compile time (e.g. proto field ids) and 245 // run time (e.g. field offset and size) information necessary to do this. 246 // The table is initialized once at start time by the ftrace controller 247 // which passes it to the CpuReader which passes it here. 248 // The caller is responsible for validating that the page_header->size stays 249 // within the current page. 250 static size_t ParsePagePayload(const uint8_t* start_of_payload, 251 const PageHeader* page_header, 252 const ProtoTranslationTable* table, 253 const FtraceDataSourceConfig* ds_config, 254 Bundler* bundler, 255 FtraceMetadata* metadata); 256 257 // Parse a single raw ftrace event beginning at |start| and ending at |end| 258 // and write it into the provided bundle as a proto. 259 // |table| contains the mix of compile time (e.g. proto field ids) and 260 // run time (e.g. field offset and size) information necessary to do this. 261 // The table is initialized once at start time by the ftrace controller 262 // which passes it to the CpuReader which passes it to ParsePage which 263 // passes it here. 264 static bool ParseEvent(uint16_t ftrace_event_id, 265 const uint8_t* start, 266 const uint8_t* end, 267 const ProtoTranslationTable* table, 268 const FtraceDataSourceConfig* ds_config, 269 protozero::Message* message, 270 FtraceMetadata* metadata); 271 272 static bool ParseField(const Field& field, 273 const uint8_t* start, 274 const uint8_t* end, 275 const ProtoTranslationTable* table, 276 protozero::Message* message, 277 FtraceMetadata* metadata); 278 279 // Parse a sys_enter event according to the pre-validated expected format 280 static bool ParseSysEnter(const Event& info, 281 const uint8_t* start, 282 const uint8_t* end, 283 protozero::Message* message, 284 FtraceMetadata* metadata); 285 286 // Parse a sys_exit event according to the pre-validated expected format 287 static bool ParseSysExit(const Event& info, 288 const uint8_t* start, 289 const uint8_t* end, 290 const FtraceDataSourceConfig* ds_config, 291 protozero::Message* message, 292 FtraceMetadata* metadata); 293 294 // Parse a sched_switch event according to pre-validated format, and buffer 295 // the individual fields in the given compact encoding batch. 296 static void ParseSchedSwitchCompact(const uint8_t* start, 297 uint64_t timestamp, 298 const CompactSchedSwitchFormat* format, 299 CompactSchedBuffer* compact_buf, 300 FtraceMetadata* metadata); 301 302 // Parse a sched_waking event according to pre-validated format, and buffer 303 // the individual fields in the given compact encoding batch. 304 static void ParseSchedWakingCompact(const uint8_t* start, 305 uint64_t timestamp, 306 const CompactSchedWakingFormat* format, 307 CompactSchedBuffer* compact_buf, 308 FtraceMetadata* metadata); 309 310 // Parses & encodes the given range of contiguous tracing pages. Called by 311 // |ReadAndProcessBatch| for each active data source. 312 // 313 // Returns the number of correctly processed pages. If the return value is 314 // equal to |pages_read|, there was no error. Otherwise, the return value 315 // points to the first page that contains an error. 316 // 317 // public and static for testing 318 static size_t ProcessPagesForDataSource( 319 TraceWriter* trace_writer, 320 FtraceMetadata* metadata, 321 size_t cpu, 322 const FtraceDataSourceConfig* ds_config, 323 const uint8_t* parsing_buf, 324 const size_t pages_read, 325 const ProtoTranslationTable* table, 326 LazyKernelSymbolizer* symbolizer, 327 const FtraceClockSnapshot*, 328 protos::pbzero::FtraceClock); 329 set_ftrace_clock(protos::pbzero::FtraceClock clock)330 void set_ftrace_clock(protos::pbzero::FtraceClock clock) { 331 ftrace_clock_ = clock; 332 } 333 334 private: 335 CpuReader(const CpuReader&) = delete; 336 CpuReader& operator=(const CpuReader&) = delete; 337 338 // Reads at most |max_pages| of ftrace data, parses it, and writes it 339 // into |started_data_sources|. Returns number of pages read. 340 // See comment on ftrace_controller.cc:kMaxParsingWorkingSetPages for 341 // rationale behind the batching. 342 size_t ReadAndProcessBatch( 343 uint8_t* parsing_buf, 344 size_t max_pages, 345 bool first_batch_in_cycle, 346 const std::set<FtraceDataSource*>& started_data_sources); 347 348 const size_t cpu_; 349 const ProtoTranslationTable* const table_; 350 LazyKernelSymbolizer* const symbolizer_; 351 const FtraceClockSnapshot* const ftrace_clock_snapshot_; 352 base::ScopedFile trace_fd_; 353 protos::pbzero::FtraceClock ftrace_clock_{}; 354 }; 355 356 } // namespace perfetto 357 358 #endif // SRC_TRACED_PROBES_FTRACE_CPU_READER_H_ 359