1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACED_PROBES_FTRACE_CPU_READER_H_ 18 #define SRC_TRACED_PROBES_FTRACE_CPU_READER_H_ 19 20 #include <string.h> 21 #include <cstdint> 22 23 #include <optional> 24 #include <set> 25 26 #include "perfetto/ext/base/paged_memory.h" 27 #include "perfetto/ext/base/scoped_file.h" 28 #include "perfetto/ext/base/utils.h" 29 #include "perfetto/ext/traced/data_source_types.h" 30 #include "perfetto/ext/tracing/core/trace_writer.h" 31 #include "perfetto/protozero/message.h" 32 #include "perfetto/protozero/message_handle.h" 33 #include "src/traced/probes/ftrace/compact_sched.h" 34 #include "src/traced/probes/ftrace/ftrace_metadata.h" 35 36 #include "protos/perfetto/trace/trace_packet.pbzero.h" 37 38 namespace perfetto { 39 40 class FtraceDataSource; 41 class LazyKernelSymbolizer; 42 class ProtoTranslationTable; 43 struct FtraceClockSnapshot; 44 struct FtraceDataSourceConfig; 45 46 namespace protos { 47 namespace pbzero { 48 class FtraceEventBundle; 49 enum FtraceClock : int32_t; 50 enum FtraceParseStatus : int32_t; 51 } // namespace pbzero 52 } // namespace protos 53 54 // Reads raw ftrace data for a cpu, parses it, and writes it into the perfetto 55 // tracing buffers. 56 class CpuReader { 57 public: 58 // Buffers used when parsing a chunk of ftrace data, allocated by 59 // FtraceController and repeatedly reused by all CpuReaders: 60 // * paged memory into which we read raw ftrace data. 61 // * buffers to accumulate and emit scheduling data in a structure-of-arrays 62 // format (packed proto fields). 63 class ParsingBuffers { 64 public: AllocateIfNeeded()65 void AllocateIfNeeded() { 66 // PagedMemory stays valid as long as it was allocated once. 67 if (!ftrace_data_.IsValid()) { 68 ftrace_data_ = base::PagedMemory::Allocate(base::GetSysPageSize() * 69 kFtraceDataBufSizePages); 70 } 71 // Heap-allocated buffer gets freed and reallocated. 72 if (!compact_sched_) { 73 compact_sched_ = std::make_unique<CompactSchedBuffer>(); 74 } 75 } 76 Release()77 void Release() { 78 if (ftrace_data_.IsValid()) { 79 ftrace_data_.AdviseDontNeed(ftrace_data_.Get(), ftrace_data_.size()); 80 } 81 compact_sched_.reset(); 82 } 83 84 private: 85 friend class CpuReader; 86 // When reading and parsing data for a particular cpu, we do it in batches 87 // of this many pages. In other words, we'll read up to 88 // |kFtraceDataBufSizePages| into memory, parse them, and then repeat if we 89 // still haven't caught up to the writer. 90 static constexpr size_t kFtraceDataBufSizePages = 32; 91 ftrace_data_buf()92 uint8_t* ftrace_data_buf() const { 93 return reinterpret_cast<uint8_t*>(ftrace_data_.Get()); 94 } ftrace_data_buf_pages()95 size_t ftrace_data_buf_pages() const { 96 PERFETTO_DCHECK(ftrace_data_.size() == 97 base::GetSysPageSize() * kFtraceDataBufSizePages); 98 return kFtraceDataBufSizePages; 99 } compact_sched_buf()100 CompactSchedBuffer* compact_sched_buf() const { 101 return compact_sched_.get(); 102 } 103 104 base::PagedMemory ftrace_data_; 105 std::unique_ptr<CompactSchedBuffer> compact_sched_; 106 }; 107 108 // Helper class to generate `TracePacket`s when needed. Public for testing. 109 class Bundler { 110 public: Bundler(TraceWriter * trace_writer,FtraceMetadata * metadata,LazyKernelSymbolizer * symbolizer,size_t cpu,const FtraceClockSnapshot * ftrace_clock_snapshot,protos::pbzero::FtraceClock ftrace_clock,CompactSchedBuffer * compact_sched_buf,bool compact_sched_enabled,uint64_t last_read_event_ts)111 Bundler(TraceWriter* trace_writer, 112 FtraceMetadata* metadata, 113 LazyKernelSymbolizer* symbolizer, 114 size_t cpu, 115 const FtraceClockSnapshot* ftrace_clock_snapshot, 116 protos::pbzero::FtraceClock ftrace_clock, 117 CompactSchedBuffer* compact_sched_buf, 118 bool compact_sched_enabled, 119 uint64_t last_read_event_ts) 120 : trace_writer_(trace_writer), 121 metadata_(metadata), 122 symbolizer_(symbolizer), 123 cpu_(cpu), 124 ftrace_clock_snapshot_(ftrace_clock_snapshot), 125 ftrace_clock_(ftrace_clock), 126 compact_sched_enabled_(compact_sched_enabled), 127 compact_sched_buf_(compact_sched_buf), 128 initial_last_read_event_ts_(last_read_event_ts) { 129 if (compact_sched_enabled_) 130 compact_sched_buf_->Reset(); 131 } 132 ~Bundler()133 ~Bundler() { FinalizeAndRunSymbolizer(); } 134 GetOrCreateBundle()135 protos::pbzero::FtraceEventBundle* GetOrCreateBundle() { 136 if (!bundle_) { 137 StartNewPacket(false, initial_last_read_event_ts_); 138 } 139 return bundle_; 140 } 141 142 // Forces the creation of a new TracePacket. 143 void StartNewPacket(bool lost_events, uint64_t last_read_event_timestamp); 144 145 // This function is called after the contents of a FtraceBundle are written. 146 void FinalizeAndRunSymbolizer(); 147 compact_sched_buf()148 CompactSchedBuffer* compact_sched_buf() { 149 // FinalizeAndRunSymbolizer will only process the compact_sched_buf_ if 150 // there is an open bundle. 151 GetOrCreateBundle(); 152 return compact_sched_buf_; 153 } 154 155 private: 156 TraceWriter* const trace_writer_; // Never nullptr. 157 FtraceMetadata* const metadata_; // Never nullptr. 158 LazyKernelSymbolizer* const symbolizer_; // Can be nullptr. 159 const size_t cpu_; 160 const FtraceClockSnapshot* const ftrace_clock_snapshot_; 161 protos::pbzero::FtraceClock const ftrace_clock_; 162 const bool compact_sched_enabled_; 163 CompactSchedBuffer* const compact_sched_buf_; 164 uint64_t initial_last_read_event_ts_; 165 166 TraceWriter::TracePacketHandle packet_; 167 protos::pbzero::FtraceEventBundle* bundle_ = nullptr; 168 }; 169 170 struct PageHeader { 171 uint64_t timestamp; 172 uint64_t size; 173 bool lost_events; 174 }; 175 176 CpuReader(size_t cpu, 177 base::ScopedFile trace_fd, 178 const ProtoTranslationTable* table, 179 LazyKernelSymbolizer* symbolizer, 180 protos::pbzero::FtraceClock ftrace_clock, 181 const FtraceClockSnapshot* ftrace_clock_snapshot); 182 ~CpuReader(); 183 184 // move-only 185 CpuReader(const CpuReader&) = delete; 186 CpuReader& operator=(const CpuReader&) = delete; 187 CpuReader(CpuReader&&) = default; 188 CpuReader& operator=(CpuReader&&) = default; 189 190 // Reads and parses all ftrace data for this cpu (in batches), until we catch 191 // up to the writer, or hit |max_pages|. Returns number of pages read. 192 size_t ReadCycle(ParsingBuffers* parsing_bufs, 193 size_t max_pages, 194 const std::set<FtraceDataSource*>& started_data_sources); 195 196 template <typename T> ReadAndAdvance(const uint8_t ** ptr,const uint8_t * end,T * out)197 static bool ReadAndAdvance(const uint8_t** ptr, const uint8_t* end, T* out) { 198 if (*ptr > end - sizeof(T)) 199 return false; 200 memcpy(reinterpret_cast<void*>(out), reinterpret_cast<const void*>(*ptr), 201 sizeof(T)); 202 *ptr += sizeof(T); 203 return true; 204 } 205 206 // Caller must do the bounds check: 207 // [start + offset, start + offset + sizeof(T)) 208 // Returns the raw value not the varint. 209 template <typename T> ReadIntoVarInt(const uint8_t * start,uint32_t field_id,protozero::Message * out)210 static T ReadIntoVarInt(const uint8_t* start, 211 uint32_t field_id, 212 protozero::Message* out) { 213 T t; 214 memcpy(&t, reinterpret_cast<const void*>(start), sizeof(T)); 215 out->AppendVarInt<T>(field_id, t); 216 return t; 217 } 218 219 template <typename T> ReadInode(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)220 static void ReadInode(const uint8_t* start, 221 uint32_t field_id, 222 protozero::Message* out, 223 FtraceMetadata* metadata) { 224 T t = ReadIntoVarInt<T>(start, field_id, out); 225 metadata->AddInode(static_cast<Inode>(t)); 226 } 227 228 template <typename T> ReadDevId(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)229 static void ReadDevId(const uint8_t* start, 230 uint32_t field_id, 231 protozero::Message* out, 232 FtraceMetadata* metadata) { 233 T t; 234 memcpy(&t, reinterpret_cast<const void*>(start), sizeof(T)); 235 BlockDeviceID dev_id = TranslateBlockDeviceIDToUserspace<T>(t); 236 out->AppendVarInt<BlockDeviceID>(field_id, dev_id); 237 metadata->AddDevice(dev_id); 238 } 239 240 template <typename T> ReadSymbolAddr(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)241 static void ReadSymbolAddr(const uint8_t* start, 242 uint32_t field_id, 243 protozero::Message* out, 244 FtraceMetadata* metadata) { 245 // ReadSymbolAddr is a bit special. In order to not disclose KASLR layout 246 // via traces, we put in the trace only a mangled address (which really is 247 // the insertion order into metadata.kernel_addrs). We don't care about the 248 // actual symbol addesses. We just need to match that against the symbol 249 // name in the names in the FtraceEventBundle.KernelSymbols. 250 T full_addr; 251 memcpy(&full_addr, reinterpret_cast<const void*>(start), sizeof(T)); 252 uint32_t interned_index = metadata->AddSymbolAddr(full_addr); 253 out->AppendVarInt(field_id, interned_index); 254 } 255 ReadPid(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)256 static void ReadPid(const uint8_t* start, 257 uint32_t field_id, 258 protozero::Message* out, 259 FtraceMetadata* metadata) { 260 int32_t pid = ReadIntoVarInt<int32_t>(start, field_id, out); 261 metadata->AddPid(pid); 262 } 263 ReadCommonPid(const uint8_t * start,uint32_t field_id,protozero::Message * out,FtraceMetadata * metadata)264 static void ReadCommonPid(const uint8_t* start, 265 uint32_t field_id, 266 protozero::Message* out, 267 FtraceMetadata* metadata) { 268 int32_t pid = ReadIntoVarInt<int32_t>(start, field_id, out); 269 metadata->AddCommonPid(pid); 270 } 271 272 // Internally the kernel stores device ids in a different layout to that 273 // exposed to userspace via stat etc. There's no userspace function to convert 274 // between the formats so we have to do it ourselves. 275 template <typename T> TranslateBlockDeviceIDToUserspace(T kernel_dev)276 static BlockDeviceID TranslateBlockDeviceIDToUserspace(T kernel_dev) { 277 // Provided search index s_dev from 278 // https://github.com/torvalds/linux/blob/v4.12/include/linux/fs.h#L404 279 // Convert to user space id using 280 // https://github.com/torvalds/linux/blob/v4.12/include/linux/kdev_t.h#L10 281 // TODO(azappone): see if this is the same on all platforms 282 uint64_t maj = static_cast<uint64_t>(kernel_dev) >> 20; 283 uint64_t min = static_cast<uint64_t>(kernel_dev) & ((1U << 20) - 1); 284 return static_cast<BlockDeviceID>( // From makedev() 285 ((maj & 0xfffff000ULL) << 32) | ((maj & 0xfffULL) << 8) | 286 ((min & 0xffffff00ULL) << 12) | ((min & 0xffULL))); 287 } 288 289 // Returns a parsed representation of the given raw ftrace page's header. 290 static std::optional<CpuReader::PageHeader> ParsePageHeader( 291 const uint8_t** ptr, 292 uint16_t page_header_size_len); 293 294 // Parse the payload of a raw ftrace page, and write the events as protos 295 // into the provided bundle (and/or compact buffer). 296 // |table| contains the mix of compile time (e.g. proto field ids) and 297 // run time (e.g. field offset and size) information necessary to do this. 298 // The table is initialized once at start time by the ftrace controller 299 // which passes it to the CpuReader which passes it here. 300 // The caller is responsible for validating that the page_header->size stays 301 // within the current page. 302 static protos::pbzero::FtraceParseStatus ParsePagePayload( 303 const uint8_t* start_of_payload, 304 const PageHeader* page_header, 305 const ProtoTranslationTable* table, 306 const FtraceDataSourceConfig* ds_config, 307 Bundler* bundler, 308 FtraceMetadata* metadata, 309 uint64_t* last_read_event_ts); 310 311 // Parse a single raw ftrace event beginning at |start| and ending at |end| 312 // and write it into the provided bundle as a proto. 313 // |table| contains the mix of compile time (e.g. proto field ids) and 314 // run time (e.g. field offset and size) information necessary to do this. 315 // The table is initialized once at start time by the ftrace controller 316 // which passes it to the CpuReader which passes it to ParsePage which 317 // passes it here. 318 static bool ParseEvent(uint16_t ftrace_event_id, 319 const uint8_t* start, 320 const uint8_t* end, 321 const ProtoTranslationTable* table, 322 const FtraceDataSourceConfig* ds_config, 323 protozero::Message* message, 324 FtraceMetadata* metadata); 325 326 static bool ParseField(const Field& field, 327 const uint8_t* start, 328 const uint8_t* end, 329 const ProtoTranslationTable* table, 330 protozero::Message* message, 331 FtraceMetadata* metadata); 332 333 // Parse a sys_enter event according to the pre-validated expected format 334 static bool ParseSysEnter(const Event& info, 335 const uint8_t* start, 336 const uint8_t* end, 337 protozero::Message* message, 338 FtraceMetadata* metadata); 339 340 // Parse a sys_exit event according to the pre-validated expected format 341 static bool ParseSysExit(const Event& info, 342 const uint8_t* start, 343 const uint8_t* end, 344 const FtraceDataSourceConfig* ds_config, 345 protozero::Message* message, 346 FtraceMetadata* metadata); 347 348 // Parse a sched_switch event according to pre-validated format, and buffer 349 // the individual fields in the given compact encoding batch. 350 static void ParseSchedSwitchCompact(const uint8_t* start, 351 uint64_t timestamp, 352 const CompactSchedSwitchFormat* format, 353 CompactSchedBuffer* compact_buf, 354 FtraceMetadata* metadata); 355 356 // Parse a sched_waking event according to pre-validated format, and buffer 357 // the individual fields in the given compact encoding batch. 358 static void ParseSchedWakingCompact(const uint8_t* start, 359 uint64_t timestamp, 360 const CompactSchedWakingFormat* format, 361 CompactSchedBuffer* compact_buf, 362 FtraceMetadata* metadata); 363 364 // Parses & encodes the given range of contiguous tracing pages. Called by 365 // |ReadAndProcessBatch| for each active data source. 366 // 367 // Returns true if all pages were parsed correctly. In case of parsing 368 // errors, they will be recorded in the FtraceEventBundle proto. 369 // 370 // public and static for testing 371 static bool ProcessPagesForDataSource( 372 TraceWriter* trace_writer, 373 FtraceMetadata* metadata, 374 size_t cpu, 375 const FtraceDataSourceConfig* ds_config, 376 base::FlatSet<protos::pbzero::FtraceParseStatus>* parse_errors, 377 uint64_t* last_read_event_ts, 378 const uint8_t* parsing_buf, 379 size_t pages_read, 380 CompactSchedBuffer* compact_sched_buf, 381 const ProtoTranslationTable* table, 382 LazyKernelSymbolizer* symbolizer, 383 const FtraceClockSnapshot* ftrace_clock_snapshot, 384 protos::pbzero::FtraceClock ftrace_clock); 385 386 // For FtraceController, which manages poll callbacks on per-cpu buffer fds. RawBufferFd()387 int RawBufferFd() const { return trace_fd_.get(); } 388 389 private: 390 // Reads at most |max_pages| of ftrace data, parses it, and writes it 391 // into |started_data_sources|. Returns number of pages read. 392 // See comment on ftrace_controller.cc:kMaxParsingWorkingSetPages for 393 // rationale behind the batching. 394 size_t ReadAndProcessBatch( 395 uint8_t* parsing_buf, 396 size_t max_pages, 397 bool first_batch_in_cycle, 398 CompactSchedBuffer* compact_sched_buf, 399 const std::set<FtraceDataSource*>& started_data_sources); 400 401 size_t cpu_; 402 const ProtoTranslationTable* table_; 403 LazyKernelSymbolizer* symbolizer_; 404 base::ScopedFile trace_fd_; 405 uint64_t last_read_event_ts_ = 0; 406 protos::pbzero::FtraceClock ftrace_clock_{}; 407 const FtraceClockSnapshot* ftrace_clock_snapshot_; 408 }; 409 410 } // namespace perfetto 411 412 #endif // SRC_TRACED_PROBES_FTRACE_CPU_READER_H_ 413