1/* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17syntax = "proto2"; 18 19import "protos/perfetto/common/perf_events.proto"; 20import "protos/perfetto/trace/profiling/profile_common.proto"; 21 22package perfetto.protos; 23 24// This file contains a mixture of messages emitted by various sampling 25// profilers: 26// 27// Memory allocator profiling 28// ---------------- 29// ProfilePacket: 30// The packet emitted by heapprofd, which started off as a native heap 31// (malloc/free) profiler, but now supports custom allocators as well. Each 32// packet contains a preaggregated state of the heap at snapshot time, which 33// report the total allocated/free bytes per callstack (plus other info such 34// as the number of samples). 35// StreamingAllocation/StreamingFree: 36// Emitted by heapprofd when configured in streaming mode (i.e. when 37// stream_allocations = true). This is only for local testing, and doesn't 38// report callstacks (only address time and size of each alloc/free). It can 39// lead to enormous traces, as it contains the stream of each alloc/free call. 40// 41// Callstack sampling 42// ------------------ 43// StreamingProfilePacket: 44// The packet emitted by the chromium in-process sampling profiler, which is 45// based on periodically sending a signal to itself, and unwinding the stack 46// in the signal handler. Each packet contains a series of individual stack 47// samples for a Chromium thread. 48// 49// Callstack and performance counter sampling 50// --------------------- 51// PerfSample: 52// The packet emitted by traced_perf sampling performance profiler based on 53// the perf_event_open syscall. Each packet represents an individual sample 54// of a performance counter (which might be a timer), and optionally a 55// callstack of the process that was scheduled at the time of the sample. 56// 57 58// The packet emitted by heapprofd for each heap snapshot. A snapshot can 59// involve more than one ProfilePacket if the snapshot is big (when |continued| 60// is true). The cardinality and grouping is as follows: 61// A ProfilePacket contains: 62// - 1+ per-process heap snapshots (ProcessHeapSamples). Normally there is only 63// one heap per process (the main malloc/free heap), but there can be more if 64// the process is using the heapprofd API to profile custom allocators. 65// - Globally interned strings, mappings and frames (to allow de-duplicating 66// frames/mapping in common between different processes). 67// A ProcessHeapSamples contains: 68// - The process and heap identifier. 69// - A number of HeapSample, one for each callsite that had some alloc/frees. 70// - Statistics about heapprofd internals (e.g., sampling/unwinding timings). 71// A HeapSample contains statistics about callsites: 72// - Total number of bytes allocated and freed from that callsite. 73// - Total number of alloc/free calls sampled. 74// - Stats at the local maximum when dump_at_max = true. 75// See https://perfetto.dev/docs/data-sources/native-heap-profiler for more. 76message ProfilePacket { 77 // The following interning tables are only used in Android version Q. 78 // In newer versions, these tables are in InternedData 79 // (see protos/perfetto/trace/interned_data) and are shared across 80 // multiple ProfilePackets. 81 // For backwards compatibility, consumers need to first look up interned 82 // data in the tables within the ProfilePacket, and then, if they are empty, 83 // look up in the InternedData instead. 84 repeated InternedString strings = 1; 85 repeated Mapping mappings = 4; 86 repeated Frame frames = 2; 87 repeated Callstack callstacks = 3; 88 89 // Next ID: 9 90 message HeapSample { 91 optional uint64 callstack_id = 1; 92 // bytes allocated at this callstack. 93 optional uint64 self_allocated = 2; 94 // bytes allocated at this callstack that have been freed. 95 optional uint64 self_freed = 3; 96 // deprecated self_idle. 97 reserved 7; 98 // Bytes allocated by this callstack but not freed at the time the malloc 99 // heap usage of this process was maximal. This is only set if dump_at_max 100 // is true in HeapprofdConfig. In that case, self_allocated, self_freed and 101 // self_idle will not be set. 102 optional uint64 self_max = 8; 103 // Number of allocations that were sampled at this callstack but not freed 104 // at the time the malloc heap usage of this process was maximal. This is 105 // only set if dump_at_max is true in HeapprofdConfig. In that case, 106 // self_allocated, self_freed and self_idle will not be set. 107 optional uint64 self_max_count = 9; 108 // timestamp [opt] 109 optional uint64 timestamp = 4; 110 // Number of allocations that were sampled at this callstack. 111 optional uint64 alloc_count = 5; 112 // Number of allocations that were sampled at this callstack that have been 113 // freed. 114 optional uint64 free_count = 6; 115 } 116 117 message Histogram { 118 message Bucket { 119 // This bucket counts values from the previous bucket's (or -infinity if 120 // this is the first bucket) upper_limit (inclusive) to this upper_limit 121 // (exclusive). 122 optional uint64 upper_limit = 1; 123 // This is the highest bucket. This is set instead of the upper_limit. Any 124 // values larger or equal to the previous bucket's upper_limit are counted 125 // in this bucket. 126 optional bool max_bucket = 2; 127 // Number of values that fall into this range. 128 optional uint64 count = 3; 129 } 130 repeated Bucket buckets = 1; 131 } 132 133 message ProcessStats { 134 optional uint64 unwinding_errors = 1; 135 optional uint64 heap_samples = 2; 136 optional uint64 map_reparses = 3; 137 optional Histogram unwinding_time_us = 4; 138 optional uint64 total_unwinding_time_us = 5; 139 optional uint64 client_spinlock_blocked_us = 6; 140 } 141 142 repeated ProcessHeapSamples process_dumps = 5; 143 message ProcessHeapSamples { 144 enum ClientError { 145 CLIENT_ERROR_NONE = 0; 146 CLIENT_ERROR_HIT_TIMEOUT = 1; 147 CLIENT_ERROR_INVALID_STACK_BOUNDS = 2; 148 } 149 optional uint64 pid = 1; 150 151 // This process was profiled from startup. 152 // If false, this process was already running when profiling started. 153 optional bool from_startup = 3; 154 155 // This process was not profiled because a concurrent session was active. 156 // If this is true, samples will be empty. 157 optional bool rejected_concurrent = 4; 158 159 // This process disconnected while it was profiled. 160 // If false, the process outlived the profiling session. 161 optional bool disconnected = 6; 162 163 // If disconnected, this disconnect was caused by the client overrunning 164 // the buffer. 165 // Equivalent to client_error == CLIENT_ERROR_HIT_TIMEOUT 166 // on new S builds. 167 optional bool buffer_overran = 7; 168 169 optional ClientError client_error = 14; 170 171 // If disconnected, this disconnected was caused by the shared memory 172 // buffer being corrupted. THIS IS ALWAYS A BUG IN HEAPPROFD OR CLIENT 173 // MEMORY CORRUPTION. 174 optional bool buffer_corrupted = 8; 175 176 // If disconnected, this disconnect was caused by heapprofd exceeding 177 // guardrails during this profiling session. 178 optional bool hit_guardrail = 10; 179 180 optional string heap_name = 11; 181 optional uint64 sampling_interval_bytes = 12; 182 optional uint64 orig_sampling_interval_bytes = 13; 183 184 // Timestamp of the state of the target process that this dump represents. 185 // This can be different to the timestamp of the TracePackets for various 186 // reasons: 187 // * If disconnected is set above, this is the timestamp of last state 188 // heapprofd had of the process before it disconnected. 189 // * Otherwise, if the rate of events produced by the process is high, 190 // heapprofd might be behind. 191 // 192 // TODO(fmayer): This is MONOTONIC_COARSE. Refactor ClockSnapshot::Clock 193 // to have a type enum that we can reuse here. 194 optional uint64 timestamp = 9; 195 196 // Metadata about heapprofd. 197 optional ProcessStats stats = 5; 198 199 repeated HeapSample samples = 2; 200 } 201 202 // If this is true, the next ProfilePacket in this package_sequence_id is a 203 // continuation of this one. 204 // To get all samples for a process, accummulate its 205 // ProcessHeapSamples.samples until you see continued=false. 206 optional bool continued = 6; 207 208 // Index of this ProfilePacket on its package_sequence_id. Can be used 209 // to detect dropped data. 210 // Verify these are consecutive. 211 optional uint64 index = 7; 212} 213 214// Packet emitted by heapprofd when stream_allocations = true. Only for local 215// testing. Doesn't report the callsite. 216message StreamingAllocation { 217 // TODO(fmayer): Add callstack. 218 repeated uint64 address = 1; 219 repeated uint64 size = 2; 220 repeated uint64 sample_size = 3; 221 repeated uint64 clock_monotonic_coarse_timestamp = 4; 222 repeated uint32 heap_id = 5; 223 repeated uint64 sequence_number = 6; 224}; 225 226// Packet emitted by heapprofd when stream_allocations = true. Only for local 227// testing. Doesn't report the callsite. 228message StreamingFree { 229 // TODO(fmayer): Add callstack. 230 repeated uint64 address = 1; 231 repeated uint32 heap_id = 2; 232 repeated uint64 sequence_number = 3; 233}; 234 235// Packet emitted by the chromium in-process signal-based callstack sampler. 236// Represents a series of individual stack samples (sampled at discrete points 237// in time), rather than aggregated over an interval. 238message StreamingProfilePacket { 239 // Index into InternedData.callstacks 240 repeated uint64 callstack_iid = 1; 241 // TODO(eseckler): ThreadDescriptor-based timestamps are deprecated. Replace 242 // this with ClockSnapshot-based delta encoding instead. 243 repeated int64 timestamp_delta_us = 2; 244 optional int32 process_priority = 3; 245} 246 247// Namespace for the contained enums. 248message Profiling { 249 enum CpuMode { 250 MODE_UNKNOWN = 0; 251 MODE_KERNEL = 1; 252 MODE_USER = 2; 253 // The following values aren't expected, but included for completeness: 254 MODE_HYPERVISOR = 3; 255 MODE_GUEST_KERNEL = 4; 256 MODE_GUEST_USER = 5; 257 } 258 259 // Enumeration of libunwindstack's error codes. 260 // NB: the integral representations of the two enums are different. 261 enum StackUnwindError { 262 UNWIND_ERROR_UNKNOWN = 0; 263 UNWIND_ERROR_NONE = 1; 264 UNWIND_ERROR_MEMORY_INVALID = 2; 265 UNWIND_ERROR_UNWIND_INFO = 3; 266 UNWIND_ERROR_UNSUPPORTED = 4; 267 UNWIND_ERROR_INVALID_MAP = 5; 268 UNWIND_ERROR_MAX_FRAMES_EXCEEDED = 6; 269 UNWIND_ERROR_REPEATED_FRAME = 7; 270 UNWIND_ERROR_INVALID_ELF = 8; 271 UNWIND_ERROR_SYSTEM_CALL = 9; 272 UNWIND_ERROR_THREAD_TIMEOUT = 10; 273 UNWIND_ERROR_THREAD_DOES_NOT_EXIST = 11; 274 UNWIND_ERROR_BAD_ARCH = 12; 275 UNWIND_ERROR_MAPS_PARSE = 13; 276 UNWIND_ERROR_INVALID_PARAMETER = 14; 277 UNWIND_ERROR_PTRACE_CALL = 15; 278 } 279} 280 281// Packet emitted by the traced_perf sampling performance profiler, which 282// gathers data via the perf_event_open syscall. Each packet contains an 283// individual sample with a counter value, and optionally a 284// callstack. 285// 286// Timestamps are within the root packet. The config can specify the clock, or 287// the implementation will default to CLOCK_MONOTONIC_RAW. Within the Android R 288// timeframe, the default was CLOCK_BOOTTIME. 289// 290// There are several distinct views of this message: 291// * indication of kernel buffer data loss (kernel_records_lost set) 292// * indication of skipped samples (sample_skipped_reason set) 293// * notable event in the sampling implementation (producer_event set) 294// * normal sample (timebase_count set, typically also callstack_iid) 295message PerfSample { 296 optional uint32 cpu = 1; 297 optional uint32 pid = 2; 298 optional uint32 tid = 3; 299 300 // Execution state that the process was sampled at. 301 optional Profiling.CpuMode cpu_mode = 5; 302 303 // Value of the timebase counter (since the event was configured, no deltas). 304 optional uint64 timebase_count = 6; 305 306 // Unwound callstack. Might be partial, in which case a synthetic "error" 307 // frame is appended, and |unwind_error| is set accordingly. 308 optional uint64 callstack_iid = 4; 309 310 // If set, stack unwinding was incomplete due to an error. 311 // Unset values should be treated as UNWIND_ERROR_NONE. 312 oneof optional_unwind_error { Profiling.StackUnwindError unwind_error = 16; }; 313 314 // If set, indicates that this message is not a sample, but rather an 315 // indication of data loss in the ring buffer allocated for |cpu|. Such data 316 // loss occurs when the kernel has insufficient ring buffer capacity to write 317 // a record (which gets discarded). A record in this context is an individual 318 // ring buffer entry, and counts more than just sample records. 319 // 320 // The |timestamp| of the packet corresponds to the time that the producer 321 // wrote the packet for trace-sorting purposes alone, and should not be 322 // interpreted relative to the sample timestamps. This field is sufficient to 323 // detect that *some* kernel data loss happened within the trace, but not the 324 // specific time bounds of that loss (which would require tracking precedessor 325 // & successor timestamps, which is not deemed necessary at the moment). 326 optional uint64 kernel_records_lost = 17; 327 328 // If set, indicates that the profiler encountered a sample that was relevant, 329 // but was skipped. 330 enum SampleSkipReason { 331 PROFILER_SKIP_UNKNOWN = 0; 332 PROFILER_SKIP_READ_STAGE = 1; 333 PROFILER_SKIP_UNWIND_STAGE = 2; 334 PROFILER_SKIP_UNWIND_ENQUEUE = 3; 335 } 336 oneof optional_sample_skipped_reason { 337 SampleSkipReason sample_skipped_reason = 18; 338 }; 339 340 // A notable event within the sampling implementation. 341 message ProducerEvent { 342 enum DataSourceStopReason { 343 PROFILER_STOP_UNKNOWN = 0; 344 PROFILER_STOP_GUARDRAIL = 1; 345 } 346 oneof optional_source_stop_reason { 347 DataSourceStopReason source_stop_reason = 1; 348 } 349 } 350 optional ProducerEvent producer_event = 19; 351} 352 353// Submessage for TracePacketDefaults. 354message PerfSampleDefaults { 355 // The sampling timebase. Might not be identical to the data source config if 356 // the implementation decided to default/override some parameters. 357 optional PerfEvents.Timebase timebase = 1; 358 359 // If the config requested process sharding, report back the count and which 360 // of those bins was selected. Never changes for the duration of a trace. 361 optional uint32 process_shard_count = 2; 362 optional uint32 chosen_process_shard = 3; 363} 364