• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1/*
2 * Copyright (C) 2018 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17syntax = "proto2";
18
19import "protos/perfetto/common/perf_events.proto";
20import "protos/perfetto/trace/profiling/profile_common.proto";
21
22package perfetto.protos;
23
24// This file contains a mixture of messages emitted by various sampling
25// profilers:
26//
27// Memory allocator profiling
28// ----------------
29// ProfilePacket:
30//   The packet emitted by heapprofd, which started off as a native heap
31//   (malloc/free) profiler, but now supports custom allocators as well. Each
32//   packet contains a preaggregated state of the heap at snapshot time, which
33//   report the total allocated/free bytes per callstack (plus other info such
34//   as the number of samples).
35// StreamingAllocation/StreamingFree:
36//   Emitted by heapprofd when configured in streaming mode (i.e. when
37//   stream_allocations = true). This is only for local testing, and doesn't
38//   report callstacks (only address time and size of each alloc/free). It can
39//   lead to enormous traces, as it contains the stream of each alloc/free call.
40//
41// Callstack sampling
42// ------------------
43// StreamingProfilePacket:
44//   The packet emitted by the chromium in-process sampling profiler, which is
45//   based on periodically sending a signal to itself, and unwinding the stack
46//   in the signal handler. Each packet contains a series of individual stack
47//   samples for a Chromium thread.
48//
49// Callstack and performance counter sampling
50// ---------------------
51// PerfSample:
52//   The packet emitted by traced_perf sampling performance profiler based on
53//   the perf_event_open syscall. Each packet represents an individual sample
54//   of a performance counter (which might be a timer), and optionally a
55//   callstack of the process that was scheduled at the time of the sample.
56//
57
58// The packet emitted by heapprofd for each heap snapshot. A snapshot can
59// involve more than one ProfilePacket if the snapshot is big (when |continued|
60// is true). The cardinality and grouping is as follows:
61// A ProfilePacket contains:
62//  - 1+ per-process heap snapshots (ProcessHeapSamples). Normally there is only
63//    one heap per process (the main malloc/free heap), but there can be more if
64//    the process is using the heapprofd API to profile custom allocators.
65//  - Globally interned strings, mappings and frames (to allow de-duplicating
66//    frames/mapping in common between different processes).
67// A ProcessHeapSamples contains:
68//  - The process and heap identifier.
69//  - A number of HeapSample, one for each callsite that had some alloc/frees.
70//  - Statistics about heapprofd internals (e.g., sampling/unwinding timings).
71// A HeapSample contains statistics about callsites:
72//  - Total number of bytes allocated and freed from that callsite.
73//  - Total number of alloc/free calls sampled.
74//  - Stats at the local maximum when dump_at_max = true.
75// See https://perfetto.dev/docs/data-sources/native-heap-profiler for more.
76message ProfilePacket {
77  // The following interning tables are only used in Android version Q.
78  // In newer versions, these tables are in InternedData
79  // (see protos/perfetto/trace/interned_data) and are shared across
80  // multiple ProfilePackets.
81  // For backwards compatibility, consumers need to first look up interned
82  // data in the tables within the ProfilePacket, and then, if they are empty,
83  // look up in the InternedData instead.
84  repeated InternedString strings = 1;
85  repeated Mapping mappings = 4;
86  repeated Frame frames = 2;
87  repeated Callstack callstacks = 3;
88
89  // Next ID: 9
90  message HeapSample {
91    optional uint64 callstack_id = 1;
92    // bytes allocated at this callstack.
93    optional uint64 self_allocated = 2;
94    // bytes allocated at this callstack that have been freed.
95    optional uint64 self_freed = 3;
96    // deprecated self_idle.
97    reserved 7;
98    // Bytes allocated by this callstack but not freed at the time the malloc
99    // heap usage of this process was maximal. This is only set if dump_at_max
100    // is true in HeapprofdConfig. In that case, self_allocated, self_freed and
101    // self_idle will not be set.
102    optional uint64 self_max = 8;
103    // Number of allocations that were sampled at this callstack but not freed
104    // at the time the malloc heap usage of this process was maximal. This is
105    // only set if dump_at_max is true in HeapprofdConfig. In that case,
106    // self_allocated, self_freed and self_idle will not be set.
107    optional uint64 self_max_count = 9;
108    // timestamp [opt]
109    optional uint64 timestamp = 4;
110    // Number of allocations that were sampled at this callstack.
111    optional uint64 alloc_count = 5;
112    // Number of allocations that were sampled at this callstack that have been
113    // freed.
114    optional uint64 free_count = 6;
115  }
116
117  message Histogram {
118    message Bucket {
119      // This bucket counts values from the previous bucket's (or -infinity if
120      // this is the first bucket) upper_limit (inclusive) to this upper_limit
121      // (exclusive).
122      optional uint64 upper_limit = 1;
123      // This is the highest bucket. This is set instead of the upper_limit. Any
124      // values larger or equal to the previous bucket's upper_limit are counted
125      // in this bucket.
126      optional bool max_bucket = 2;
127      // Number of values that fall into this range.
128      optional uint64 count = 3;
129    }
130    repeated Bucket buckets = 1;
131  }
132
133  message ProcessStats {
134    optional uint64 unwinding_errors = 1;
135    optional uint64 heap_samples = 2;
136    optional uint64 map_reparses = 3;
137    optional Histogram unwinding_time_us = 4;
138    optional uint64 total_unwinding_time_us = 5;
139    optional uint64 client_spinlock_blocked_us = 6;
140  }
141
142  repeated ProcessHeapSamples process_dumps = 5;
143  message ProcessHeapSamples {
144    enum ClientError {
145      CLIENT_ERROR_NONE = 0;
146      CLIENT_ERROR_HIT_TIMEOUT = 1;
147      CLIENT_ERROR_INVALID_STACK_BOUNDS = 2;
148    }
149    optional uint64 pid = 1;
150
151    // This process was profiled from startup.
152    // If false, this process was already running when profiling started.
153    optional bool from_startup = 3;
154
155    // This process was not profiled because a concurrent session was active.
156    // If this is true, samples will be empty.
157    optional bool rejected_concurrent = 4;
158
159    // This process disconnected while it was profiled.
160    // If false, the process outlived the profiling session.
161    optional bool disconnected = 6;
162
163    // If disconnected, this disconnect was caused by the client overrunning
164    // the buffer.
165    // Equivalent to client_error == CLIENT_ERROR_HIT_TIMEOUT
166    // on new S builds.
167    optional bool buffer_overran = 7;
168
169    optional ClientError client_error = 14;
170
171    // If disconnected, this disconnected was caused by the shared memory
172    // buffer being corrupted. THIS IS ALWAYS A BUG IN HEAPPROFD OR CLIENT
173    // MEMORY CORRUPTION.
174    optional bool buffer_corrupted = 8;
175
176    // If disconnected, this disconnect was caused by heapprofd exceeding
177    // guardrails during this profiling session.
178    optional bool hit_guardrail = 10;
179
180    optional string heap_name = 11;
181    optional uint64 sampling_interval_bytes = 12;
182    optional uint64 orig_sampling_interval_bytes = 13;
183
184    // Timestamp of the state of the target process that this dump represents.
185    // This can be different to the timestamp of the TracePackets for various
186    // reasons:
187    // * If disconnected is set above, this is the timestamp of last state
188    //   heapprofd had of the process before it disconnected.
189    // * Otherwise, if the rate of events produced by the process is high,
190    //   heapprofd might be behind.
191    //
192    // TODO(fmayer): This is MONOTONIC_COARSE. Refactor ClockSnapshot::Clock
193    //               to have a type enum that we can reuse here.
194    optional uint64 timestamp = 9;
195
196    // Metadata about heapprofd.
197    optional ProcessStats stats = 5;
198
199    repeated HeapSample samples = 2;
200  }
201
202  // If this is true, the next ProfilePacket in this package_sequence_id is a
203  // continuation of this one.
204  // To get all samples for a process, accummulate its
205  // ProcessHeapSamples.samples until you see continued=false.
206  optional bool continued = 6;
207
208  // Index of this ProfilePacket on its package_sequence_id. Can be used
209  // to detect dropped data.
210  // Verify these are consecutive.
211  optional uint64 index = 7;
212}
213
214// Packet emitted by heapprofd when stream_allocations = true. Only for local
215// testing. Doesn't report the callsite.
216message StreamingAllocation {
217  // TODO(fmayer): Add callstack.
218  repeated uint64 address = 1;
219  repeated uint64 size = 2;
220  repeated uint64 sample_size = 3;
221  repeated uint64 clock_monotonic_coarse_timestamp = 4;
222  repeated uint32 heap_id = 5;
223  repeated uint64 sequence_number = 6;
224};
225
226// Packet emitted by heapprofd when stream_allocations = true. Only for local
227// testing. Doesn't report the callsite.
228message StreamingFree {
229  // TODO(fmayer): Add callstack.
230  repeated uint64 address = 1;
231  repeated uint32 heap_id = 2;
232  repeated uint64 sequence_number = 3;
233};
234
235// Packet emitted by the chromium in-process signal-based callstack sampler.
236// Represents a series of individual stack samples (sampled at discrete points
237// in time), rather than aggregated over an interval.
238message StreamingProfilePacket {
239  // Index into InternedData.callstacks
240  repeated uint64 callstack_iid = 1;
241  // TODO(eseckler): ThreadDescriptor-based timestamps are deprecated. Replace
242  // this with ClockSnapshot-based delta encoding instead.
243  repeated int64 timestamp_delta_us = 2;
244  optional int32 process_priority = 3;
245}
246
247// Namespace for the contained enums.
248message Profiling {
249  enum CpuMode {
250    MODE_UNKNOWN = 0;
251    MODE_KERNEL = 1;
252    MODE_USER = 2;
253    // The following values aren't expected, but included for completeness:
254    MODE_HYPERVISOR = 3;
255    MODE_GUEST_KERNEL = 4;
256    MODE_GUEST_USER = 5;
257  }
258
259  // Enumeration of libunwindstack's error codes.
260  // NB: the integral representations of the two enums are different.
261  enum StackUnwindError {
262    UNWIND_ERROR_UNKNOWN = 0;
263    UNWIND_ERROR_NONE = 1;
264    UNWIND_ERROR_MEMORY_INVALID = 2;
265    UNWIND_ERROR_UNWIND_INFO = 3;
266    UNWIND_ERROR_UNSUPPORTED = 4;
267    UNWIND_ERROR_INVALID_MAP = 5;
268    UNWIND_ERROR_MAX_FRAMES_EXCEEDED = 6;
269    UNWIND_ERROR_REPEATED_FRAME = 7;
270    UNWIND_ERROR_INVALID_ELF = 8;
271    UNWIND_ERROR_SYSTEM_CALL = 9;
272    UNWIND_ERROR_THREAD_TIMEOUT = 10;
273    UNWIND_ERROR_THREAD_DOES_NOT_EXIST = 11;
274    UNWIND_ERROR_BAD_ARCH = 12;
275    UNWIND_ERROR_MAPS_PARSE = 13;
276    UNWIND_ERROR_INVALID_PARAMETER = 14;
277    UNWIND_ERROR_PTRACE_CALL = 15;
278  }
279}
280
281// Packet emitted by the traced_perf sampling performance profiler, which
282// gathers data via the perf_event_open syscall. Each packet contains an
283// individual sample with a counter value, and optionally a
284// callstack.
285//
286// Timestamps are within the root packet. The config can specify the clock, or
287// the implementation will default to CLOCK_MONOTONIC_RAW. Within the Android R
288// timeframe, the default was CLOCK_BOOTTIME.
289//
290// There are several distinct views of this message:
291// * indication of kernel buffer data loss (kernel_records_lost set)
292// * indication of skipped samples (sample_skipped_reason set)
293// * notable event in the sampling implementation (producer_event set)
294// * normal sample (timebase_count set, typically also callstack_iid)
295message PerfSample {
296  optional uint32 cpu = 1;
297  optional uint32 pid = 2;
298  optional uint32 tid = 3;
299
300  // Execution state that the process was sampled at.
301  optional Profiling.CpuMode cpu_mode = 5;
302
303  // Value of the timebase counter (since the event was configured, no deltas).
304  optional uint64 timebase_count = 6;
305
306  // Unwound callstack. Might be partial, in which case a synthetic "error"
307  // frame is appended, and |unwind_error| is set accordingly.
308  optional uint64 callstack_iid = 4;
309
310  // If set, stack unwinding was incomplete due to an error.
311  // Unset values should be treated as UNWIND_ERROR_NONE.
312  oneof optional_unwind_error { Profiling.StackUnwindError unwind_error = 16; };
313
314  // If set, indicates that this message is not a sample, but rather an
315  // indication of data loss in the ring buffer allocated for |cpu|. Such data
316  // loss occurs when the kernel has insufficient ring buffer capacity to write
317  // a record (which gets discarded). A record in this context is an individual
318  // ring buffer entry, and counts more than just sample records.
319  //
320  // The |timestamp| of the packet corresponds to the time that the producer
321  // wrote the packet for trace-sorting purposes alone, and should not be
322  // interpreted relative to the sample timestamps. This field is sufficient to
323  // detect that *some* kernel data loss happened within the trace, but not the
324  // specific time bounds of that loss (which would require tracking precedessor
325  // & successor timestamps, which is not deemed necessary at the moment).
326  optional uint64 kernel_records_lost = 17;
327
328  // If set, indicates that the profiler encountered a sample that was relevant,
329  // but was skipped.
330  enum SampleSkipReason {
331    PROFILER_SKIP_UNKNOWN = 0;
332    PROFILER_SKIP_READ_STAGE = 1;
333    PROFILER_SKIP_UNWIND_STAGE = 2;
334    PROFILER_SKIP_UNWIND_ENQUEUE = 3;
335  }
336  oneof optional_sample_skipped_reason {
337    SampleSkipReason sample_skipped_reason = 18;
338  };
339
340  // A notable event within the sampling implementation.
341  message ProducerEvent {
342    enum DataSourceStopReason {
343      PROFILER_STOP_UNKNOWN = 0;
344      PROFILER_STOP_GUARDRAIL = 1;
345    }
346    oneof optional_source_stop_reason {
347      DataSourceStopReason source_stop_reason = 1;
348    }
349  }
350  optional ProducerEvent producer_event = 19;
351}
352
353// Submessage for TracePacketDefaults.
354message PerfSampleDefaults {
355  // The sampling timebase. Might not be identical to the data source config if
356  // the implementation decided to default/override some parameters.
357  optional PerfEvents.Timebase timebase = 1;
358
359  // If the config requested process sharding, report back the count and which
360  // of those bins was selected. Never changes for the duration of a trace.
361  optional uint32 process_shard_count = 2;
362  optional uint32 chosen_process_shard = 3;
363}
364