• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
18 #define INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
19 
20 #include <array>
21 #include <atomic>
22 #include <functional>
23 #include <string>
24 
25 #include "perfetto/base/logging.h"
26 #include "perfetto/base/thread_utils.h"
27 #include "perfetto/base/time.h"
28 #include "perfetto/ext/base/metatrace_events.h"
29 #include "perfetto/ext/base/thread_annotations.h"
30 #include "perfetto/ext/base/utils.h"
31 
32 // A facility to trace execution of the perfetto codebase itself.
33 // The meta-tracing framework is organized into three layers:
34 //
35 // 1. A static ring-buffer in base/ (this file) that supports concurrent writes
36 //    and a single reader.
37 //    The responsibility of this layer is to store events and counters as
38 //    efficiently as possible without re-entering any tracing code.
39 //    This is really a static-storage-based ring-buffer based on a POD array.
40 //    This layer does NOT deal with serializing the meta-trace buffer.
41 //    It posts a task when it's half full and expects something outside of
42 //    base/ to drain the ring-buffer and serialize it, eventually writing it
43 //    into the trace itself, before it gets 100% full.
44 //
45 // 2. A class in tracing/core which takes care of serializing the meta-trace
46 //    buffer into the trace using a TraceWriter. See metatrace_writer.h .
47 //
48 // 3. A data source in traced_probes that, when be enabled via the trace config,
49 //    injects metatrace events into the trace. See metatrace_data_source.h .
50 //
51 // The available events and tags are defined in metatrace_events.h .
52 
53 namespace perfetto {
54 
55 namespace base {
56 class TaskRunner;
57 }  // namespace base
58 
59 namespace metatrace {
60 
61 // Meta-tracing is organized in "tags" that can be selectively enabled. This is
62 // to enable meta-tracing only of one sub-system. This word has one "enabled"
63 // bit for each tag. 0 -> meta-tracing off.
64 extern std::atomic<uint32_t> g_enabled_tags;
65 
66 // Time of the Enable() call. Used as a reference for keeping delta timestmaps
67 // in Record.
68 extern std::atomic<uint64_t> g_enabled_timestamp;
69 
70 // Enables meta-tracing for one or more tags. Once enabled it will discard any
71 // further Enable() calls and return false until disabled,
72 // |read_task| is a closure that will be called enqueued |task_runner| when the
73 // meta-tracing ring buffer is half full. The task is expected to read the ring
74 // buffer using RingBuffer::GetReadIterator() and serialize the contents onto a
75 // file or into the trace itself.
76 // Must be called on the |task_runner| passed.
77 // |task_runner| must have static lifetime.
78 bool Enable(std::function<void()> read_task, base::TaskRunner*, uint32_t tags);
79 
80 // Disables meta-tracing.
81 // Must be called on the same |task_runner| as Enable().
82 void Disable();
83 
TraceTimeNowNs()84 inline uint64_t TraceTimeNowNs() {
85   return static_cast<uint64_t>(base::GetBootTimeNs().count());
86 }
87 
88 // Returns a relaxed view of whether metatracing is enabled for the given tag.
89 // Useful for skipping unnecessary argument computation if metatracing is off.
IsEnabled(uint32_t tag)90 inline bool IsEnabled(uint32_t tag) {
91   auto enabled_tags = g_enabled_tags.load(std::memory_order_relaxed);
92   if (PERFETTO_LIKELY((enabled_tags & tag) == 0))
93     return false;
94   else
95     return true;
96 }
97 
98 // Holds the data for a metatrace event or counter.
99 struct Record {
100   static constexpr uint16_t kTypeMask = 0x8000;
101   static constexpr uint16_t kTypeCounter = 0x8000;
102   static constexpr uint16_t kTypeEvent = 0;
103 
timestamp_nsRecord104   uint64_t timestamp_ns() const {
105     auto base_ns = g_enabled_timestamp.load(std::memory_order_relaxed);
106     PERFETTO_DCHECK(base_ns);
107     return base_ns + ((static_cast<uint64_t>(timestamp_ns_high) << 32) |
108                       timestamp_ns_low);
109   }
110 
set_timestampRecord111   void set_timestamp(uint64_t ts) {
112     auto t_start = g_enabled_timestamp.load(std::memory_order_relaxed);
113     uint64_t diff = ts - t_start;
114     PERFETTO_DCHECK(diff < (1ull << 48));
115     timestamp_ns_low = static_cast<uint32_t>(diff);
116     timestamp_ns_high = static_cast<uint16_t>(diff >> 32);
117   }
118 
119   // We can't just memset() this class because on MSVC std::atomic<> is not
120   // trivially constructible anymore. Also std::atomic<> has a deleted copy
121   // constructor so we cant just do "*this = Record()" either.
122   // See http://bit.ly/339Jlzd .
clearRecord123   void clear() {
124     this->~Record();
125     new (this) Record();
126   }
127 
128   // This field holds the type (counter vs event) in the MSB and event ID (as
129   // defined in metatrace_events.h) in the lowest 15 bits. It is also used also
130   // as a linearization point: this is always written after all the other
131   // fields with a release-store. This is so the reader can determine whether it
132   // can safely process the other event fields after a load-acquire.
133   std::atomic<uint16_t> type_and_id{};
134 
135   // Timestamp is stored as a 48-bits value diffed against g_enabled_timestamp.
136   // This gives us 78 hours from Enabled().
137   uint16_t timestamp_ns_high = 0;
138   uint32_t timestamp_ns_low = 0;
139 
140   uint32_t thread_id = 0;
141 
142   union {
143     // Only one of the two elements can be zero initialized, clang complains
144     // about "initializing multiple members of union" otherwise.
145     uint32_t duration_ns = 0;  // If type == event.
146     int32_t counter_value;  // If type == counter.
147   };
148 };
149 
150 // Hold the meta-tracing data into a statically allocated array.
151 // This class uses static storage (as opposite to being a singleton) to:
152 // - Have the guarantee of always valid storage, so that meta-tracing can be
153 //   safely used in any part of the codebase, including base/ itself.
154 // - Avoid barriers that thread-safe static locals would require.
155 class RingBuffer {
156  public:
157   static constexpr size_t kCapacity = 4096;  // 4096 * 16 bytes = 64K.
158 
159   // This iterator is not idempotent and will bump the read index in the buffer
160   // at the end of the reads. There can be only one reader at any time.
161   // Usage: for (auto it = RingBuffer::GetReadIterator(); it; ++it) { it->... }
162   class ReadIterator {
163    public:
ReadIterator(ReadIterator && other)164     ReadIterator(ReadIterator&& other) {
165       PERFETTO_DCHECK(other.valid_);
166       cur_ = other.cur_;
167       end_ = other.end_;
168       valid_ = other.valid_;
169       other.valid_ = false;
170     }
171 
~ReadIterator()172     ~ReadIterator() {
173       if (!valid_)
174         return;
175       PERFETTO_DCHECK(cur_ >= RingBuffer::rd_index_);
176       PERFETTO_DCHECK(cur_ <= RingBuffer::wr_index_);
177       RingBuffer::rd_index_.store(cur_, std::memory_order_release);
178     }
179 
180     explicit operator bool() const { return cur_ < end_; }
181     const Record* operator->() const { return RingBuffer::At(cur_); }
182     const Record& operator*() const { return *operator->(); }
183 
184     // This is for ++it. it++ is deliberately not supported.
185     ReadIterator& operator++() {
186       PERFETTO_DCHECK(cur_ < end_);
187       // Once a record has been read, mark it as free clearing its type_and_id,
188       // so if we encounter it in another read iteration while being written
189       // we know it's not fully written yet.
190       // The memory_order_relaxed below is enough because:
191       // - The reader is single-threaded and doesn't re-read the same records.
192       // - Before starting a read batch, the reader has an acquire barrier on
193       //   |rd_index_|.
194       // - After terminating a read batch, the ~ReadIterator dtor updates the
195       //   |rd_index_| with a release-store.
196       // - Reader and writer are typically kCapacity/2 apart. So unless an
197       //   overrun happens a writer won't reuse a newly released record any time
198       //   soon. If an overrun happens, everything is busted regardless.
199       At(cur_)->type_and_id.store(0, std::memory_order_relaxed);
200       ++cur_;
201       return *this;
202     }
203 
204    private:
205     friend class RingBuffer;
ReadIterator(uint64_t begin,uint64_t end)206     ReadIterator(uint64_t begin, uint64_t end)
207         : cur_(begin), end_(end), valid_(true) {}
208     ReadIterator& operator=(const ReadIterator&) = delete;
209     ReadIterator(const ReadIterator&) = delete;
210 
211     uint64_t cur_;
212     uint64_t end_;
213     bool valid_;
214   };
215 
At(uint64_t index)216   static Record* At(uint64_t index) {
217     // Doesn't really have to be pow2, but if not the compiler will emit
218     // arithmetic operations to compute the modulo instead of a bitwise AND.
219     static_assert(!(kCapacity & (kCapacity - 1)), "kCapacity must be pow2");
220     PERFETTO_DCHECK(index >= rd_index_);
221     PERFETTO_DCHECK(index <= wr_index_);
222     return &records_[index % kCapacity];
223   }
224 
225   // Must be called on the same task runner passed to Enable()
GetReadIterator()226   static ReadIterator GetReadIterator() {
227     PERFETTO_DCHECK(RingBuffer::IsOnValidTaskRunner());
228     return ReadIterator(rd_index_.load(std::memory_order_acquire),
229                         wr_index_.load(std::memory_order_acquire));
230   }
231 
232   static Record* AppendNewRecord();
233   static void Reset();
234 
has_overruns()235   static bool has_overruns() {
236     return has_overruns_.load(std::memory_order_acquire);
237   }
238 
239   // Can temporarily return a value >= kCapacity but is eventually consistent.
240   // This would happen in case of overruns until threads hit the --wr_index_
241   // in AppendNewRecord().
GetSizeForTesting()242   static uint64_t GetSizeForTesting() {
243     auto wr_index = wr_index_.load(std::memory_order_relaxed);
244     auto rd_index = rd_index_.load(std::memory_order_relaxed);
245     PERFETTO_DCHECK(wr_index >= rd_index);
246     return wr_index - rd_index;
247   }
248 
249  private:
250   friend class ReadIterator;
251 
252   // Returns true if the caller is on the task runner passed to Enable().
253   // Used only for DCHECKs.
254   static bool IsOnValidTaskRunner();
255 
256   static std::array<Record, kCapacity> records_;
257   static std::atomic<bool> read_task_queued_;
258   static std::atomic<uint64_t> wr_index_;
259   static std::atomic<uint64_t> rd_index_;
260   static std::atomic<bool> has_overruns_;
261   static Record bankruptcy_record_;  // Used in case of overruns.
262 };
263 
TraceCounter(uint32_t tag,uint16_t id,int32_t value)264 inline void TraceCounter(uint32_t tag, uint16_t id, int32_t value) {
265   // memory_order_relaxed is okay because the storage has static lifetime.
266   // It is safe to accidentally log an event soon after disabling.
267   auto enabled_tags = g_enabled_tags.load(std::memory_order_relaxed);
268   if (PERFETTO_LIKELY((enabled_tags & tag) == 0))
269     return;
270   Record* record = RingBuffer::AppendNewRecord();
271   record->thread_id = static_cast<uint32_t>(base::GetThreadId());
272   record->set_timestamp(TraceTimeNowNs());
273   record->counter_value = value;
274   record->type_and_id.store(Record::kTypeCounter | id,
275                             std::memory_order_release);
276 }
277 
278 class ScopedEvent {
279  public:
ScopedEvent(uint32_t tag,uint16_t event_id)280   ScopedEvent(uint32_t tag, uint16_t event_id) {
281     auto enabled_tags = g_enabled_tags.load(std::memory_order_relaxed);
282     if (PERFETTO_LIKELY((enabled_tags & tag) == 0))
283       return;
284     event_id_ = event_id;
285     record_ = RingBuffer::AppendNewRecord();
286     record_->thread_id = static_cast<uint32_t>(base::GetThreadId());
287     record_->set_timestamp(TraceTimeNowNs());
288   }
289 
~ScopedEvent()290   ~ScopedEvent() {
291     if (PERFETTO_LIKELY(!record_))
292       return;
293     auto now = TraceTimeNowNs();
294     record_->duration_ns = static_cast<uint32_t>(now - record_->timestamp_ns());
295     record_->type_and_id.store(Record::kTypeEvent | event_id_,
296                                std::memory_order_release);
297   }
298 
299  private:
300   Record* record_ = nullptr;
301   uint16_t event_id_ = 0;
302   ScopedEvent(const ScopedEvent&) = delete;
303   ScopedEvent& operator=(const ScopedEvent&) = delete;
304 };
305 
306 // Boilerplate to derive a unique variable name for the event.
307 #define PERFETTO_METATRACE_UID2(a, b) a##b
308 #define PERFETTO_METATRACE_UID(x) PERFETTO_METATRACE_UID2(metatrace_, x)
309 
310 #define PERFETTO_METATRACE_SCOPED(TAG, ID)                                \
311   ::perfetto::metatrace::ScopedEvent PERFETTO_METATRACE_UID(__COUNTER__)( \
312       ::perfetto::metatrace::TAG, ::perfetto::metatrace::ID)
313 
314 #define PERFETTO_METATRACE_COUNTER(TAG, ID, VALUE)                \
315   ::perfetto::metatrace::TraceCounter(::perfetto::metatrace::TAG, \
316                                       ::perfetto::metatrace::ID,  \
317                                       static_cast<int32_t>(VALUE))
318 
319 }  // namespace metatrace
320 }  // namespace perfetto
321 
322 #endif  // INCLUDE_PERFETTO_EXT_BASE_METATRACE_H_
323