• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef INCLUDE_PERFETTO_TRACING_DATA_SOURCE_H_
18 #define INCLUDE_PERFETTO_TRACING_DATA_SOURCE_H_
19 
20 // This header contains the key class (DataSource) that a producer app should
21 // override in order to create a custom data source that gets tracing Start/Stop
22 // notifications and emits tracing data.
23 
24 #include <assert.h>
25 #include <stddef.h>
26 #include <stdint.h>
27 
28 #include <array>
29 #include <atomic>
30 #include <functional>
31 #include <memory>
32 #include <mutex>
33 
34 #include "perfetto/base/compiler.h"
35 #include "perfetto/base/export.h"
36 #include "perfetto/protozero/message.h"
37 #include "perfetto/protozero/message_handle.h"
38 #include "perfetto/tracing/buffer_exhausted_policy.h"
39 #include "perfetto/tracing/core/forward_decls.h"
40 #include "perfetto/tracing/internal/basic_types.h"
41 #include "perfetto/tracing/internal/data_source_internal.h"
42 #include "perfetto/tracing/internal/tracing_muxer.h"
43 #include "perfetto/tracing/locked_handle.h"
44 #include "perfetto/tracing/trace_writer_base.h"
45 
46 #include "protos/perfetto/trace/trace_packet.pbzero.h"
47 
48 // PERFETTO_COMPONENT_EXPORT is used to mark symbols in Perfetto's headers
49 // (typically templates) that are defined by the user outside of Perfetto and
50 // should be made visible outside the current module. (e.g., in Chrome's
51 // component build).
52 #if !defined(PERFETTO_COMPONENT_EXPORT)
53 #define PERFETTO_COMPONENT_EXPORT
54 #endif
55 
56 namespace perfetto {
57 namespace internal {
58 class TracingMuxerImpl;
59 class TrackEventCategoryRegistry;
60 template <typename, const internal::TrackEventCategoryRegistry*>
61 class TrackEventDataSource;
62 }  // namespace internal
63 
64 // Base class with the virtual methods to get start/stop notifications.
65 // Embedders are supposed to derive the templated version below, not this one.
66 class PERFETTO_EXPORT DataSourceBase {
67  public:
68   virtual ~DataSourceBase();
69 
70   // TODO(primiano): change the const& args below to be pointers instead. It
71   // makes it more awkward to handle output arguments and require mutable(s).
72   // This requires synchronizing a breaking API change for existing embedders.
73 
74   // OnSetup() is invoked when tracing is configured. In most cases this happens
75   // just before starting the trace. In the case of deferred start (see
76   // deferred_start in trace_config.proto) start might happen later.
77   class SetupArgs {
78    public:
79     // This is valid only within the scope of the OnSetup() call and must not
80     // be retained.
81     const DataSourceConfig* config = nullptr;
82 
83     // The index of this data source instance (0..kMaxDataSourceInstances - 1).
84     uint32_t internal_instance_index = 0;
85   };
86   virtual void OnSetup(const SetupArgs&);
87 
88   class StartArgs {
89    public:
90     // The index of this data source instance (0..kMaxDataSourceInstances - 1).
91     uint32_t internal_instance_index = 0;
92   };
93   virtual void OnStart(const StartArgs&);
94 
95   class StopArgs {
96    public:
97     virtual ~StopArgs();
98 
99     // HandleAsynchronously() can optionally be called to defer the tracing
100     // session stop and write tracing data just before stopping.
101     // This function returns a closure that must be invoked after the last
102     // trace events have been emitted. The returned closure can be called from
103     // any thread. The caller also needs to explicitly call TraceContext.Flush()
104     // from the last Trace() lambda invocation because no other implicit flushes
105     // will happen after the stop signal.
106     // When this function is called, the tracing service will defer the stop of
107     // the tracing session until the returned closure is invoked.
108     // However, the caller cannot hang onto this closure for too long. The
109     // tracing service will forcefully stop the tracing session without waiting
110     // for pending producers after TraceConfig.data_source_stop_timeout_ms
111     // (default: 5s, can be overridden by Consumers when starting a trace).
112     // If the closure is called after this timeout an error will be logged and
113     // the trace data emitted will not be present in the trace. No other
114     // functional side effects (e.g. crashes or corruptions) will happen. In
115     // other words, it is fine to accidentally hold onto this closure for too
116     // long but, if that happens, some tracing data will be lost.
117     virtual std::function<void()> HandleStopAsynchronously() const = 0;
118 
119     // The index of this data source instance (0..kMaxDataSourceInstances - 1).
120     uint32_t internal_instance_index = 0;
121   };
122   virtual void OnStop(const StopArgs&);
123 };
124 
125 struct DefaultDataSourceTraits {
126   // |IncrementalStateType| can optionally be used store custom per-sequence
127   // incremental data (e.g., interning tables). It should have a Clear() method
128   // for when incremental state needs to be cleared. See
129   // TraceContext::GetIncrementalState().
130   using IncrementalStateType = void;
131 
132   // Allows overriding what type of thread-local state configuration the data
133   // source uses. By default every data source gets independent thread-local
134   // state, which means every instance uses separate trace writers and
135   // incremental state even on the same thread. Some data sources (most notably
136   // the track event data source) want to share trace writers and incremental
137   // state on the same thread.
GetDataSourceTLSDefaultDataSourceTraits138   static internal::DataSourceThreadLocalState* GetDataSourceTLS(
139       internal::DataSourceStaticState* static_state,
140       internal::TracingTLS* root_tls) {
141     auto* ds_tls = &root_tls->data_sources_tls[static_state->index];
142     // The per-type TLS is either zero-initialized or must have been initialized
143     // for this specific data source type.
144     assert(!ds_tls->static_state ||
145            ds_tls->static_state->index == static_state->index);
146     return ds_tls;
147   }
148 };
149 
150 // Templated base class meant to be derived by embedders to create a custom data
151 // source. DataSourceType must be the type of the derived class itself, e.g.:
152 // class MyDataSource : public DataSourceBase<MyDataSource> {...}.
153 //
154 // |DataSourceTraits| allows customizing the behavior of the data source. See
155 // |DefaultDataSourceTraits|.
156 template <typename DataSourceType,
157           typename DataSourceTraits = DefaultDataSourceTraits>
158 class DataSource : public DataSourceBase {
159   struct DefaultTracePointTraits;
160 
161  public:
162   // The BufferExhaustedPolicy to use for TraceWriters of this DataSource.
163   // Override this in your DataSource class to change the default, which is to
164   // drop data on shared memory overruns.
165   constexpr static BufferExhaustedPolicy kBufferExhaustedPolicy =
166       BufferExhaustedPolicy::kDrop;
167 
168   // Argument passed to the lambda function passed to Trace() (below).
169   class TraceContext {
170    public:
171     using TracePacketHandle =
172         ::protozero::MessageHandle<::perfetto::protos::pbzero::TracePacket>;
173 
174     TraceContext(TraceContext&&) noexcept = default;
175     ~TraceContext() = default;
176 
NewTracePacket()177     TracePacketHandle NewTracePacket() {
178       return tls_inst_->trace_writer->NewTracePacket();
179     }
180 
181     // Forces a commit of the thread-local tracing data written so far to the
182     // service. This is almost never required (tracing data is periodically
183     // committed as trace pages are filled up) and has a non-negligible
184     // performance hit (requires an IPC + refresh of the current thread-local
185     // chunk). The only case when this should be used is when handling OnStop()
186     // asynchronously, to ensure sure that the data is committed before the
187     // Stop timeout expires.
188     // The TracePacketHandle obtained by the last NewTracePacket() call must be
189     // finalized before calling Flush() (either implicitly by going out of scope
190     // or by explicitly calling Finalize()).
191     // |cb| is an optional callback. When non-null it will request the
192     // service to ACK the flush and will be invoked on an internal thread after
193     // the service has  acknowledged it. The callback might be NEVER INVOKED if
194     // the service crashes or the IPC connection is dropped.
195     void Flush(std::function<void()> cb = {}) {
196       tls_inst_->trace_writer->Flush(cb);
197     }
198 
199     // Returns the number of bytes written on the current thread by the current
200     // data-source since its creation.
201     // This can be useful for splitting protos that might grow very large.
written()202     uint64_t written() { return tls_inst_->trace_writer->written(); }
203 
204     // Returns a RAII handle to access the data source instance, guaranteeing
205     // that it won't be deleted on another thread (because of trace stopping)
206     // while accessing it from within the Trace() lambda.
207     // The returned handle can be invalid (nullptr) if tracing is stopped
208     // immediately before calling this. The caller is supposed to check for its
209     // validity before using it. After checking, the handle is guaranteed to
210     // remain valid until the handle goes out of scope.
GetDataSourceLocked()211     LockedHandle<DataSourceType> GetDataSourceLocked() {
212       auto* internal_state = static_state_.TryGet(instance_index_);
213       if (!internal_state)
214         return LockedHandle<DataSourceType>();
215       return LockedHandle<DataSourceType>(
216           &internal_state->lock,
217           static_cast<DataSourceType*>(internal_state->data_source.get()));
218     }
219 
GetIncrementalState()220     typename DataSourceTraits::IncrementalStateType* GetIncrementalState() {
221       return reinterpret_cast<typename DataSourceTraits::IncrementalStateType*>(
222           tls_inst_->incremental_state.get());
223     }
224 
225    private:
226     friend class DataSource;
227     template <typename, const internal::TrackEventCategoryRegistry*>
228     friend class internal::TrackEventDataSource;
TraceContext(internal::DataSourceInstanceThreadLocalState * tls_inst,uint32_t instance_index)229     TraceContext(internal::DataSourceInstanceThreadLocalState* tls_inst,
230                  uint32_t instance_index)
231         : tls_inst_(tls_inst), instance_index_(instance_index) {}
232     TraceContext(const TraceContext&) = delete;
233     TraceContext& operator=(const TraceContext&) = delete;
234 
235     internal::DataSourceInstanceThreadLocalState* const tls_inst_;
236     uint32_t const instance_index_;
237   };
238 
239   // The main tracing method. Tracing code should call this passing a lambda as
240   // argument, with the following signature: void(TraceContext).
241   // The lambda will be called synchronously (i.e., always before Trace()
242   // returns) only if tracing is enabled and the data source has been enabled in
243   // the tracing config.
244   // The lambda can be called more than once per Trace() call, in the case of
245   // concurrent tracing sessions (or even if the data source is instantiated
246   // twice within the same trace config).
247   template <typename Lambda>
Trace(Lambda tracing_fn)248   static void Trace(Lambda tracing_fn) {
249     CallIfEnabled<DefaultTracePointTraits>([&tracing_fn](uint32_t instances) {
250       TraceWithInstances<DefaultTracePointTraits>(instances,
251                                                   std::move(tracing_fn));
252     });
253   }
254 
255   // An efficient trace point guard for checking if this data source is active.
256   // |callback| is a function which will only be called if there are active
257   // instances. It is given an instance state parameter, which should be passed
258   // to TraceWithInstances() to actually record trace data.
259   template <typename Traits = DefaultTracePointTraits, typename Callback>
CallIfEnabled(Callback callback)260   static void CallIfEnabled(Callback callback) PERFETTO_ALWAYS_INLINE {
261     // |instances| is a per-class bitmap that tells:
262     // 1. If the data source is enabled at all.
263     // 2. The index of the slot within |static_state_| that holds the instance
264     //    state. In turn this allows to map the data source to the tracing
265     //    session and buffers.
266     // memory_order_relaxed is okay because:
267     // - |instances| is re-read with an acquire barrier below if this succeeds.
268     // - The code between this point and the acquire-load is based on static
269     //    storage which has indefinite lifetime.
270     uint32_t instances =
271         Traits::GetActiveInstances()->load(std::memory_order_relaxed);
272 
273     // This is the tracing fast-path. Bail out immediately if tracing is not
274     // enabled (or tracing is enabled but not for this data source).
275     if (PERFETTO_LIKELY(!instances))
276       return;
277     callback(instances);
278   }
279 
280   // The "lower half" of a trace point which actually performs tracing after
281   // this data source has been determined to be active.
282   // |instances| must be the instance state value retrieved through
283   // CallIfEnabled().
284   // |tracing_fn| will be called to record trace data as in Trace().
285   //
286   // TODO(primiano): all the stuff below should be outlined from the trace
287   // point. Or at least we should have some compile-time traits like
288   // kOptimizeBinarySize / kOptimizeTracingLatency.
289   template <typename Traits = DefaultTracePointTraits, typename Lambda>
TraceWithInstances(uint32_t instances,Lambda tracing_fn)290   static void TraceWithInstances(uint32_t instances, Lambda tracing_fn) {
291     PERFETTO_DCHECK(instances);
292     constexpr auto kMaxDataSourceInstances = internal::kMaxDataSourceInstances;
293 
294     // See tracing_muxer.h for the structure of the TLS.
295     auto* tracing_impl = internal::TracingMuxer::Get();
296     if (PERFETTO_UNLIKELY(!tls_state_))
297       tls_state_ = GetOrCreateDataSourceTLS(&static_state_);
298 
299     // TracingTLS::generation is a global monotonic counter that is incremented
300     // every time a tracing session is stopped. We use that as a signal to force
301     // a slow-path garbage collection of all the trace writers for the current
302     // thread and to destroy the ones that belong to tracing sessions that have
303     // ended. This is to avoid having too many TraceWriter instances alive, each
304     // holding onto one chunk of the shared memory buffer.
305     // Rationale why memory_order_relaxed should be fine:
306     // - The TraceWriter object that we use is always constructed and destructed
307     //   on the current thread. There is no risk of accessing a half-initialized
308     //   TraceWriter (which would be really bad).
309     // - In the worst case, in the case of a race on the generation check, we
310     //   might end up using a TraceWriter for the same data source that belongs
311     //   to a stopped session. This is not really wrong, as we don't give any
312     //   guarantee on the global atomicity of the stop. In the worst case the
313     //   service will reject the data commit if this arrives too late.
314 
315     if (PERFETTO_UNLIKELY(
316             tls_state_->root_tls->generation !=
317             tracing_impl->generation(std::memory_order_relaxed))) {
318       // Will update root_tls->generation.
319       tracing_impl->DestroyStoppedTraceWritersForCurrentThread();
320     }
321 
322     for (uint32_t i = 0; i < kMaxDataSourceInstances; i++) {
323       internal::DataSourceState* instance_state =
324           static_state_.TryGetCached(instances, i);
325       if (!instance_state)
326         continue;
327 
328       // Even if we passed the check above, the DataSourceInstance might be
329       // still destroyed concurrently while this code runs. The code below is
330       // designed to deal with such race, as follows:
331       // - We don't access the user-defined data source instance state. The only
332       //   bits of state we use are |backend_id| and |buffer_id|.
333       // - Beyond those two integers, we access only the TraceWriter here. The
334       //   TraceWriter is always safe because it lives on the TLS.
335       // - |instance_state| is backed by static storage, so the pointer is
336       //   always valid, even after the data source instance is destroyed.
337       // - In the case of a race-on-destruction, we'll still see the latest
338       //   backend_id and buffer_id and in the worst case keep trying writing
339       //   into the tracing shared memory buffer after stopped. But this isn't
340       //   really any worse than the case of the stop IPC being delayed by the
341       //   kernel scheduler. The tracing service is robust against data commit
342       //   attemps made after tracing is stopped.
343       // There is a theoretical race that would case the wrong behavior w.r.t
344       // writing data in the wrong buffer, but it's so rare that we ignore it:
345       // if the data source is stopped and started kMaxDataSourceInstances
346       // times (so that the same id is recycled) while we are in this function,
347       // we might end up reusing the old data source's backend_id and buffer_id
348       // for the new one, because we don't see the generation change past this
349       // point. But stopping and starting tracing (even once) takes so much
350       // handshaking to make this extremely unrealistic.
351 
352       auto& tls_inst = tls_state_->per_instance[i];
353       if (PERFETTO_UNLIKELY(!tls_inst.trace_writer)) {
354         // Here we need an acquire barrier, which matches the release-store made
355         // by TracingMuxerImpl::SetupDataSource(), to ensure that the backend_id
356         // and buffer_id are consistent.
357         instances =
358             Traits::GetActiveInstances()->load(std::memory_order_acquire);
359         instance_state = static_state_.TryGetCached(instances, i);
360         if (!instance_state || !instance_state->trace_lambda_enabled)
361           return;
362         tls_inst.backend_id = instance_state->backend_id;
363         tls_inst.buffer_id = instance_state->buffer_id;
364         tls_inst.trace_writer = tracing_impl->CreateTraceWriter(
365             instance_state, DataSourceType::kBufferExhaustedPolicy);
366         CreateIncrementalState(
367             &tls_inst,
368             static_cast<typename DataSourceTraits::IncrementalStateType*>(
369                 nullptr));
370 
371         // Even in the case of out-of-IDs, SharedMemoryArbiterImpl returns a
372         // NullTraceWriter. The returned pointer should never be null.
373         assert(tls_inst.trace_writer);
374       }
375 
376       tracing_fn(TraceContext(&tls_inst, i));
377     }
378   }
379 
380   // Registers the data source on all tracing backends, including ones that
381   // connect after the registration. Doing so enables the data source to receive
382   // Setup/Start/Stop notifications and makes the Trace() method work when
383   // tracing is enabled and the data source is selected.
384   // This must be called after Tracing::Initialize().
385   // Can return false to signal failure if attemping to register more than
386   // kMaxDataSources (32) data sources types or if tracing hasn't been
387   // initialized.
Register(const DataSourceDescriptor & descriptor)388   static bool Register(const DataSourceDescriptor& descriptor) {
389     // Silences -Wunused-variable warning in case the trace method is not used
390     // by the translation unit that declares the data source.
391     (void)static_state_;
392     (void)tls_state_;
393 
394     auto factory = [] {
395       return std::unique_ptr<DataSourceBase>(new DataSourceType());
396     };
397     auto* tracing_impl = internal::TracingMuxer::Get();
398     if (!tracing_impl)
399       return false;
400     return tracing_impl->RegisterDataSource(descriptor, factory,
401                                             &static_state_);
402   }
403 
404  private:
405   // Traits for customizing the behavior of a specific trace point.
406   struct DefaultTracePointTraits {
407     // By default, every call to DataSource::Trace() will record trace events
408     // for every active instance of that data source. A single trace point can,
409     // however, use a custom set of enable flags for more fine grained control
410     // of when that trace point is active.
411     //
412     // DANGER: when doing this, the data source must use the appropriate memory
413     // fences when changing the state of the bitmap.
GetActiveInstancesDefaultTracePointTraits414     static constexpr std::atomic<uint32_t>* GetActiveInstances() {
415       return &static_state_.valid_instances;
416     }
417   };
418 
419   // Create the user provided incremental state in the given thread-local
420   // storage. Note: The second parameter here is used to specialize the case
421   // where there is no incremental state type.
422   template <typename T>
CreateIncrementalState(internal::DataSourceInstanceThreadLocalState * tls_inst,const T *)423   static void CreateIncrementalState(
424       internal::DataSourceInstanceThreadLocalState* tls_inst,
425       const T*) {
426     PERFETTO_DCHECK(!tls_inst->incremental_state);
427     tls_inst->incremental_state =
428         internal::DataSourceInstanceThreadLocalState::IncrementalStatePointer(
429             reinterpret_cast<void*>(new T()),
430             [](void* p) { delete reinterpret_cast<T*>(p); });
431   }
CreateIncrementalState(internal::DataSourceInstanceThreadLocalState *,const void *)432   static void CreateIncrementalState(
433       internal::DataSourceInstanceThreadLocalState*,
434       const void*) {}
435 
436   // Note that the returned object is one per-thread per-data-source-type, NOT
437   // per data-source *instance*.
GetOrCreateDataSourceTLS(internal::DataSourceStaticState * static_state)438   static internal::DataSourceThreadLocalState* GetOrCreateDataSourceTLS(
439       internal::DataSourceStaticState* static_state) {
440     auto* tracing_impl = internal::TracingMuxer::Get();
441     internal::TracingTLS* root_tls = tracing_impl->GetOrCreateTracingTLS();
442     internal::DataSourceThreadLocalState* ds_tls =
443         DataSourceTraits::GetDataSourceTLS(static_state, root_tls);
444     // We keep re-initializing as the initialization is idempotent and not worth
445     // the code for extra checks.
446     ds_tls->static_state = static_state;
447     assert(!ds_tls->root_tls || ds_tls->root_tls == root_tls);
448     ds_tls->root_tls = root_tls;
449     return ds_tls;
450   }
451 
452   // Static state. Accessed by the static Trace() method fastpaths.
453   static internal::DataSourceStaticState static_state_;
454 
455   // This TLS object is a cached raw pointer and has deliberately no destructor.
456   // The Platform implementation is supposed to create and manage the lifetime
457   // of the Platform::ThreadLocalObject and take care of destroying it.
458   // This is because non-POD thread_local variables have subtleties (global
459   // destructors) that we need to defer to the embedder. In chromium's platform
460   // implementation, for instance, the tls slot is implemented using
461   // chromium's base::ThreadLocalStorage.
462   static thread_local internal::DataSourceThreadLocalState* tls_state_;
463 };
464 
465 template <typename T, typename D>
466 internal::DataSourceStaticState DataSource<T, D>::static_state_;
467 template <typename T, typename D>
468 thread_local internal::DataSourceThreadLocalState* DataSource<T, D>::tls_state_;
469 
470 }  // namespace perfetto
471 
472 // If placed at the end of a macro declaration, eats the semicolon at the end of
473 // the macro invocation (e.g., "MACRO(...);") to avoid warnings about extra
474 // semicolons.
475 #define PERFETTO_INTERNAL_SWALLOW_SEMICOLON() \
476   extern int perfetto_internal_unused
477 
478 // This macro must be used once for each data source next to the data source's
479 // declaration.
480 #define PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(...)              \
481   template <>                                                         \
482   PERFETTO_COMPONENT_EXPORT perfetto::internal::DataSourceStaticState \
483       perfetto::DataSource<__VA_ARGS__>::static_state_;               \
484   template <>                                                         \
485   PERFETTO_COMPONENT_EXPORT thread_local perfetto::internal::         \
486       DataSourceThreadLocalState*                                     \
487           perfetto::DataSource<__VA_ARGS__>::tls_state_
488 
489 // This macro must be used once for each data source in one source file to
490 // allocate static storage for the data source's static state.
491 #define PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(...)               \
492   template <>                                                         \
493   PERFETTO_COMPONENT_EXPORT perfetto::internal::DataSourceStaticState \
494       perfetto::DataSource<__VA_ARGS__>::static_state_{};             \
495   template <>                                                         \
496   PERFETTO_COMPONENT_EXPORT thread_local perfetto::internal::         \
497       DataSourceThreadLocalState*                                     \
498           perfetto::DataSource<__VA_ARGS__>::tls_state_ = nullptr
499 
500 #endif  // INCLUDE_PERFETTO_TRACING_DATA_SOURCE_H_
501