1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
18 #define INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
19
20 #include <array>
21 #include <atomic>
22 #include <functional>
23 #include <string>
24
25 #include "perfetto/base/logging.h"
26 #include "perfetto/base/thread_utils.h"
27 #include "perfetto/base/time.h"
28 #include "perfetto/ext/base/metatrace_events.h"
29 #include "perfetto/ext/base/thread_annotations.h"
30 #include "perfetto/ext/base/utils.h"
31
32 // A facility to trace execution of the perfetto codebase itself.
33 // The meta-tracing framework is organized into three layers:
34 //
35 // 1. A static ring-buffer in base/ (this file) that supports concurrent writes
36 // and a single reader.
37 // The responsibility of this layer is to store events and counters as
38 // efficiently as possible without re-entering any tracing code.
39 // This is really a static-storage-based ring-buffer based on a POD array.
40 // This layer does NOT deal with serializing the meta-trace buffer.
41 // It posts a task when it's half full and expects something outside of
42 // base/ to drain the ring-buffer and serialize it, eventually writing it
43 // into the trace itself, before it gets 100% full.
44 //
45 // 2. A class in tracing/core which takes care of serializing the meta-trace
46 // buffer into the trace using a TraceWriter. See metatrace_writer.h .
47 //
48 // 3. A data source in traced_probes that, when be enabled via the trace config,
49 // injects metatrace events into the trace. See metatrace_data_source.h .
50 //
51 // The available events and tags are defined in metatrace_events.h .
52
53 namespace perfetto {
54
55 namespace base {
56 class TaskRunner;
57 } // namespace base
58
59 namespace metatrace {
60
61 // Meta-tracing is organized in "tags" that can be selectively enabled. This is
62 // to enable meta-tracing only of one sub-system. This word has one "enabled"
63 // bit for each tag. 0 -> meta-tracing off.
64 extern std::atomic<uint32_t> g_enabled_tags;
65
66 // Time of the Enable() call. Used as a reference for keeping delta timestmaps
67 // in Record.
68 extern std::atomic<uint64_t> g_enabled_timestamp;
69
70 // Enables meta-tracing for one or more tags. Once enabled it will discard any
71 // further Enable() calls and return false until disabled,
72 // |read_task| is a closure that will be called enqueued |task_runner| when the
73 // meta-tracing ring buffer is half full. The task is expected to read the ring
74 // buffer using RingBuffer::GetReadIterator() and serialize the contents onto a
75 // file or into the trace itself.
76 // Must be called on the |task_runner| passed.
77 // |task_runner| must have static lifetime.
78 bool Enable(std::function<void()> read_task, base::TaskRunner*, uint32_t tags);
79
80 // Disables meta-tracing.
81 // Must be called on the same |task_runner| as Enable().
82 void Disable();
83
TraceTimeNowNs()84 inline uint64_t TraceTimeNowNs() {
85 return static_cast<uint64_t>(base::GetBootTimeNs().count());
86 }
87
88 // Returns a relaxed view of whether metatracing is enabled for the given tag.
89 // Useful for skipping unnecessary argument computation if metatracing is off.
IsEnabled(uint32_t tag)90 inline bool IsEnabled(uint32_t tag) {
91 auto enabled_tags = g_enabled_tags.load(std::memory_order_relaxed);
92 if (PERFETTO_LIKELY((enabled_tags & tag) == 0))
93 return false;
94 else
95 return true;
96 }
97
98 // Holds the data for a metatrace event or counter.
99 struct Record {
100 static constexpr uint16_t kTypeMask = 0x8000;
101 static constexpr uint16_t kTypeCounter = 0x8000;
102 static constexpr uint16_t kTypeEvent = 0;
103
timestamp_nsRecord104 uint64_t timestamp_ns() const {
105 auto base_ns = g_enabled_timestamp.load(std::memory_order_relaxed);
106 PERFETTO_DCHECK(base_ns);
107 return base_ns + ((static_cast<uint64_t>(timestamp_ns_high) << 32) |
108 timestamp_ns_low);
109 }
110
set_timestampRecord111 void set_timestamp(uint64_t ts) {
112 auto t_start = g_enabled_timestamp.load(std::memory_order_relaxed);
113 uint64_t diff = ts - t_start;
114 PERFETTO_DCHECK(diff < (1ull << 48));
115 timestamp_ns_low = static_cast<uint32_t>(diff);
116 timestamp_ns_high = static_cast<uint16_t>(diff >> 32);
117 }
118
119 // We can't just memset() this class because on MSVC std::atomic<> is not
120 // trivially constructible anymore. Also std::atomic<> has a deleted copy
121 // constructor so we cant just do "*this = Record()" either.
122 // See http://bit.ly/339Jlzd .
clearRecord123 void clear() {
124 this->~Record();
125 new (this) Record();
126 }
127
128 // This field holds the type (counter vs event) in the MSB and event ID (as
129 // defined in metatrace_events.h) in the lowest 15 bits. It is also used also
130 // as a linearization point: this is always written after all the other
131 // fields with a release-store. This is so the reader can determine whether it
132 // can safely process the other event fields after a load-acquire.
133 std::atomic<uint16_t> type_and_id{};
134
135 // Timestamp is stored as a 48-bits value diffed against g_enabled_timestamp.
136 // This gives us 78 hours from Enabled().
137 uint16_t timestamp_ns_high = 0;
138 uint32_t timestamp_ns_low = 0;
139
140 uint32_t thread_id = 0;
141
142 union {
143 // Only one of the two elements can be zero initialized, clang complains
144 // about "initializing multiple members of union" otherwise.
145 uint32_t duration_ns = 0; // If type == event.
146 int32_t counter_value; // If type == counter.
147 };
148 };
149
150 // Hold the meta-tracing data into a statically allocated array.
151 // This class uses static storage (as opposite to being a singleton) to:
152 // - Have the guarantee of always valid storage, so that meta-tracing can be
153 // safely used in any part of the codebase, including base/ itself.
154 // - Avoid barriers that thread-safe static locals would require.
155 class RingBuffer {
156 public:
157 static constexpr size_t kCapacity = 4096; // 4096 * 16 bytes = 64K.
158
159 // This iterator is not idempotent and will bump the read index in the buffer
160 // at the end of the reads. There can be only one reader at any time.
161 // Usage: for (auto it = RingBuffer::GetReadIterator(); it; ++it) { it->... }
162 class ReadIterator {
163 public:
ReadIterator(ReadIterator && other)164 ReadIterator(ReadIterator&& other) {
165 PERFETTO_DCHECK(other.valid_);
166 cur_ = other.cur_;
167 end_ = other.end_;
168 valid_ = other.valid_;
169 other.valid_ = false;
170 }
171
~ReadIterator()172 ~ReadIterator() {
173 if (!valid_)
174 return;
175 PERFETTO_DCHECK(cur_ >= RingBuffer::rd_index_);
176 PERFETTO_DCHECK(cur_ <= RingBuffer::wr_index_);
177 RingBuffer::rd_index_.store(cur_, std::memory_order_release);
178 }
179
180 explicit operator bool() const { return cur_ < end_; }
181 const Record* operator->() const { return RingBuffer::At(cur_); }
182 const Record& operator*() const { return *operator->(); }
183
184 // This is for ++it. it++ is deliberately not supported.
185 ReadIterator& operator++() {
186 PERFETTO_DCHECK(cur_ < end_);
187 // Once a record has been read, mark it as free clearing its type_and_id,
188 // so if we encounter it in another read iteration while being written
189 // we know it's not fully written yet.
190 // The memory_order_relaxed below is enough because:
191 // - The reader is single-threaded and doesn't re-read the same records.
192 // - Before starting a read batch, the reader has an acquire barrier on
193 // |rd_index_|.
194 // - After terminating a read batch, the ~ReadIterator dtor updates the
195 // |rd_index_| with a release-store.
196 // - Reader and writer are typically kCapacity/2 apart. So unless an
197 // overrun happens a writer won't reuse a newly released record any time
198 // soon. If an overrun happens, everything is busted regardless.
199 At(cur_)->type_and_id.store(0, std::memory_order_relaxed);
200 ++cur_;
201 return *this;
202 }
203
204 private:
205 friend class RingBuffer;
ReadIterator(uint64_t begin,uint64_t end)206 ReadIterator(uint64_t begin, uint64_t end)
207 : cur_(begin), end_(end), valid_(true) {}
208 ReadIterator& operator=(const ReadIterator&) = delete;
209 ReadIterator(const ReadIterator&) = delete;
210
211 uint64_t cur_;
212 uint64_t end_;
213 bool valid_;
214 };
215
At(uint64_t index)216 static Record* At(uint64_t index) {
217 // Doesn't really have to be pow2, but if not the compiler will emit
218 // arithmetic operations to compute the modulo instead of a bitwise AND.
219 static_assert(!(kCapacity & (kCapacity - 1)), "kCapacity must be pow2");
220 PERFETTO_DCHECK(index >= rd_index_);
221 PERFETTO_DCHECK(index <= wr_index_);
222 return &records_[index % kCapacity];
223 }
224
225 // Must be called on the same task runner passed to Enable()
GetReadIterator()226 static ReadIterator GetReadIterator() {
227 PERFETTO_DCHECK(RingBuffer::IsOnValidTaskRunner());
228 return ReadIterator(rd_index_.load(std::memory_order_acquire),
229 wr_index_.load(std::memory_order_acquire));
230 }
231
232 static Record* AppendNewRecord();
233 static void Reset();
234
has_overruns()235 static bool has_overruns() {
236 return has_overruns_.load(std::memory_order_acquire);
237 }
238
239 // Can temporarily return a value >= kCapacity but is eventually consistent.
240 // This would happen in case of overruns until threads hit the --wr_index_
241 // in AppendNewRecord().
GetSizeForTesting()242 static uint64_t GetSizeForTesting() {
243 auto wr_index = wr_index_.load(std::memory_order_relaxed);
244 auto rd_index = rd_index_.load(std::memory_order_relaxed);
245 PERFETTO_DCHECK(wr_index >= rd_index);
246 return wr_index - rd_index;
247 }
248
249 private:
250 friend class ReadIterator;
251
252 // Returns true if the caller is on the task runner passed to Enable().
253 // Used only for DCHECKs.
254 static bool IsOnValidTaskRunner();
255
256 static std::array<Record, kCapacity> records_;
257 static std::atomic<bool> read_task_queued_;
258 static std::atomic<uint64_t> wr_index_;
259 static std::atomic<uint64_t> rd_index_;
260 static std::atomic<bool> has_overruns_;
261 static Record bankruptcy_record_; // Used in case of overruns.
262 };
263
TraceCounter(uint32_t tag,uint16_t id,int32_t value)264 inline void TraceCounter(uint32_t tag, uint16_t id, int32_t value) {
265 // memory_order_relaxed is okay because the storage has static lifetime.
266 // It is safe to accidentally log an event soon after disabling.
267 auto enabled_tags = g_enabled_tags.load(std::memory_order_relaxed);
268 if (PERFETTO_LIKELY((enabled_tags & tag) == 0))
269 return;
270 Record* record = RingBuffer::AppendNewRecord();
271 record->thread_id = static_cast<uint32_t>(base::GetThreadId());
272 record->set_timestamp(TraceTimeNowNs());
273 record->counter_value = value;
274 record->type_and_id.store(Record::kTypeCounter | id,
275 std::memory_order_release);
276 }
277
278 class ScopedEvent {
279 public:
ScopedEvent(uint32_t tag,uint16_t event_id)280 ScopedEvent(uint32_t tag, uint16_t event_id) {
281 auto enabled_tags = g_enabled_tags.load(std::memory_order_relaxed);
282 if (PERFETTO_LIKELY((enabled_tags & tag) == 0))
283 return;
284 event_id_ = event_id;
285 record_ = RingBuffer::AppendNewRecord();
286 record_->thread_id = static_cast<uint32_t>(base::GetThreadId());
287 record_->set_timestamp(TraceTimeNowNs());
288 }
289
~ScopedEvent()290 ~ScopedEvent() {
291 if (PERFETTO_LIKELY(!record_))
292 return;
293 auto now = TraceTimeNowNs();
294 record_->duration_ns = static_cast<uint32_t>(now - record_->timestamp_ns());
295 record_->type_and_id.store(Record::kTypeEvent | event_id_,
296 std::memory_order_release);
297 }
298
299 private:
300 Record* record_ = nullptr;
301 uint16_t event_id_ = 0;
302 ScopedEvent(const ScopedEvent&) = delete;
303 ScopedEvent& operator=(const ScopedEvent&) = delete;
304 };
305
306 // Boilerplate to derive a unique variable name for the event.
307 #define PERFETTO_METATRACE_UID2(a, b) a##b
308 #define PERFETTO_METATRACE_UID(x) PERFETTO_METATRACE_UID2(metatrace_, x)
309
310 #define PERFETTO_METATRACE_SCOPED(TAG, ID) \
311 ::perfetto::metatrace::ScopedEvent PERFETTO_METATRACE_UID(__COUNTER__)( \
312 ::perfetto::metatrace::TAG, ::perfetto::metatrace::ID)
313
314 #define PERFETTO_METATRACE_COUNTER(TAG, ID, VALUE) \
315 ::perfetto::metatrace::TraceCounter(::perfetto::metatrace::TAG, \
316 ::perfetto::metatrace::ID, \
317 static_cast<int32_t>(VALUE))
318
319 } // namespace metatrace
320 } // namespace perfetto
321
322 #endif // INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
323