1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef INCLUDE_PERFETTO_TRACING_DATA_SOURCE_H_ 18 #define INCLUDE_PERFETTO_TRACING_DATA_SOURCE_H_ 19 20 // This header contains the key class (DataSource) that a producer app should 21 // override in order to create a custom data source that gets tracing Start/Stop 22 // notifications and emits tracing data. 23 24 #include <assert.h> 25 #include <stddef.h> 26 #include <stdint.h> 27 28 #include <array> 29 #include <atomic> 30 #include <functional> 31 #include <memory> 32 #include <mutex> 33 34 #include "perfetto/base/compiler.h" 35 #include "perfetto/base/export.h" 36 #include "perfetto/protozero/message.h" 37 #include "perfetto/protozero/message_handle.h" 38 #include "perfetto/tracing/buffer_exhausted_policy.h" 39 #include "perfetto/tracing/core/forward_decls.h" 40 #include "perfetto/tracing/internal/basic_types.h" 41 #include "perfetto/tracing/internal/data_source_internal.h" 42 #include "perfetto/tracing/internal/tracing_muxer.h" 43 #include "perfetto/tracing/locked_handle.h" 44 #include "perfetto/tracing/trace_writer_base.h" 45 46 #include "protos/perfetto/trace/trace_packet.pbzero.h" 47 48 // PERFETTO_COMPONENT_EXPORT is used to mark symbols in Perfetto's headers 49 // (typically templates) that are defined by the user outside of Perfetto and 50 // should be made visible outside the current module. (e.g., in Chrome's 51 // component build). 52 #if !defined(PERFETTO_COMPONENT_EXPORT) 53 #define PERFETTO_COMPONENT_EXPORT 54 #endif 55 56 namespace perfetto { 57 namespace internal { 58 class TracingMuxerImpl; 59 class TrackEventCategoryRegistry; 60 template <typename, const internal::TrackEventCategoryRegistry*> 61 class TrackEventDataSource; 62 } // namespace internal 63 64 // Base class with the virtual methods to get start/stop notifications. 65 // Embedders are supposed to derive the templated version below, not this one. 66 class PERFETTO_EXPORT DataSourceBase { 67 public: 68 virtual ~DataSourceBase(); 69 70 // TODO(primiano): change the const& args below to be pointers instead. It 71 // makes it more awkward to handle output arguments and require mutable(s). 72 // This requires synchronizing a breaking API change for existing embedders. 73 74 // OnSetup() is invoked when tracing is configured. In most cases this happens 75 // just before starting the trace. In the case of deferred start (see 76 // deferred_start in trace_config.proto) start might happen later. 77 class SetupArgs { 78 public: 79 // This is valid only within the scope of the OnSetup() call and must not 80 // be retained. 81 const DataSourceConfig* config = nullptr; 82 83 // The index of this data source instance (0..kMaxDataSourceInstances - 1). 84 uint32_t internal_instance_index = 0; 85 }; 86 virtual void OnSetup(const SetupArgs&); 87 88 class StartArgs { 89 public: 90 // The index of this data source instance (0..kMaxDataSourceInstances - 1). 91 uint32_t internal_instance_index = 0; 92 }; 93 virtual void OnStart(const StartArgs&); 94 95 class StopArgs { 96 public: 97 virtual ~StopArgs(); 98 99 // HandleAsynchronously() can optionally be called to defer the tracing 100 // session stop and write tracing data just before stopping. 101 // This function returns a closure that must be invoked after the last 102 // trace events have been emitted. The returned closure can be called from 103 // any thread. The caller also needs to explicitly call TraceContext.Flush() 104 // from the last Trace() lambda invocation because no other implicit flushes 105 // will happen after the stop signal. 106 // When this function is called, the tracing service will defer the stop of 107 // the tracing session until the returned closure is invoked. 108 // However, the caller cannot hang onto this closure for too long. The 109 // tracing service will forcefully stop the tracing session without waiting 110 // for pending producers after TraceConfig.data_source_stop_timeout_ms 111 // (default: 5s, can be overridden by Consumers when starting a trace). 112 // If the closure is called after this timeout an error will be logged and 113 // the trace data emitted will not be present in the trace. No other 114 // functional side effects (e.g. crashes or corruptions) will happen. In 115 // other words, it is fine to accidentally hold onto this closure for too 116 // long but, if that happens, some tracing data will be lost. 117 virtual std::function<void()> HandleStopAsynchronously() const = 0; 118 119 // The index of this data source instance (0..kMaxDataSourceInstances - 1). 120 uint32_t internal_instance_index = 0; 121 }; 122 virtual void OnStop(const StopArgs&); 123 }; 124 125 struct DefaultDataSourceTraits { 126 // |IncrementalStateType| can optionally be used store custom per-sequence 127 // incremental data (e.g., interning tables). It should have a Clear() method 128 // for when incremental state needs to be cleared. See 129 // TraceContext::GetIncrementalState(). 130 using IncrementalStateType = void; 131 132 // Allows overriding what type of thread-local state configuration the data 133 // source uses. By default every data source gets independent thread-local 134 // state, which means every instance uses separate trace writers and 135 // incremental state even on the same thread. Some data sources (most notably 136 // the track event data source) want to share trace writers and incremental 137 // state on the same thread. GetDataSourceTLSDefaultDataSourceTraits138 static internal::DataSourceThreadLocalState* GetDataSourceTLS( 139 internal::DataSourceStaticState* static_state, 140 internal::TracingTLS* root_tls) { 141 auto* ds_tls = &root_tls->data_sources_tls[static_state->index]; 142 // The per-type TLS is either zero-initialized or must have been initialized 143 // for this specific data source type. 144 assert(!ds_tls->static_state || 145 ds_tls->static_state->index == static_state->index); 146 return ds_tls; 147 } 148 }; 149 150 // Templated base class meant to be derived by embedders to create a custom data 151 // source. DataSourceType must be the type of the derived class itself, e.g.: 152 // class MyDataSource : public DataSourceBase<MyDataSource> {...}. 153 // 154 // |DataSourceTraits| allows customizing the behavior of the data source. See 155 // |DefaultDataSourceTraits|. 156 template <typename DataSourceType, 157 typename DataSourceTraits = DefaultDataSourceTraits> 158 class DataSource : public DataSourceBase { 159 struct DefaultTracePointTraits; 160 161 public: 162 // The BufferExhaustedPolicy to use for TraceWriters of this DataSource. 163 // Override this in your DataSource class to change the default, which is to 164 // drop data on shared memory overruns. 165 constexpr static BufferExhaustedPolicy kBufferExhaustedPolicy = 166 BufferExhaustedPolicy::kDrop; 167 168 // Argument passed to the lambda function passed to Trace() (below). 169 class TraceContext { 170 public: 171 using TracePacketHandle = 172 ::protozero::MessageHandle<::perfetto::protos::pbzero::TracePacket>; 173 174 TraceContext(TraceContext&&) noexcept = default; 175 ~TraceContext() = default; 176 NewTracePacket()177 TracePacketHandle NewTracePacket() { 178 return tls_inst_->trace_writer->NewTracePacket(); 179 } 180 181 // Forces a commit of the thread-local tracing data written so far to the 182 // service. This is almost never required (tracing data is periodically 183 // committed as trace pages are filled up) and has a non-negligible 184 // performance hit (requires an IPC + refresh of the current thread-local 185 // chunk). The only case when this should be used is when handling OnStop() 186 // asynchronously, to ensure sure that the data is committed before the 187 // Stop timeout expires. 188 // The TracePacketHandle obtained by the last NewTracePacket() call must be 189 // finalized before calling Flush() (either implicitly by going out of scope 190 // or by explicitly calling Finalize()). 191 // |cb| is an optional callback. When non-null it will request the 192 // service to ACK the flush and will be invoked on an internal thread after 193 // the service has acknowledged it. The callback might be NEVER INVOKED if 194 // the service crashes or the IPC connection is dropped. 195 void Flush(std::function<void()> cb = {}) { 196 tls_inst_->trace_writer->Flush(cb); 197 } 198 199 // Returns the number of bytes written on the current thread by the current 200 // data-source since its creation. 201 // This can be useful for splitting protos that might grow very large. written()202 uint64_t written() { return tls_inst_->trace_writer->written(); } 203 204 // Returns a RAII handle to access the data source instance, guaranteeing 205 // that it won't be deleted on another thread (because of trace stopping) 206 // while accessing it from within the Trace() lambda. 207 // The returned handle can be invalid (nullptr) if tracing is stopped 208 // immediately before calling this. The caller is supposed to check for its 209 // validity before using it. After checking, the handle is guaranteed to 210 // remain valid until the handle goes out of scope. GetDataSourceLocked()211 LockedHandle<DataSourceType> GetDataSourceLocked() { 212 auto* internal_state = static_state_.TryGet(instance_index_); 213 if (!internal_state) 214 return LockedHandle<DataSourceType>(); 215 return LockedHandle<DataSourceType>( 216 &internal_state->lock, 217 static_cast<DataSourceType*>(internal_state->data_source.get())); 218 } 219 GetIncrementalState()220 typename DataSourceTraits::IncrementalStateType* GetIncrementalState() { 221 return reinterpret_cast<typename DataSourceTraits::IncrementalStateType*>( 222 tls_inst_->incremental_state.get()); 223 } 224 225 private: 226 friend class DataSource; 227 template <typename, const internal::TrackEventCategoryRegistry*> 228 friend class internal::TrackEventDataSource; TraceContext(internal::DataSourceInstanceThreadLocalState * tls_inst,uint32_t instance_index)229 TraceContext(internal::DataSourceInstanceThreadLocalState* tls_inst, 230 uint32_t instance_index) 231 : tls_inst_(tls_inst), instance_index_(instance_index) {} 232 TraceContext(const TraceContext&) = delete; 233 TraceContext& operator=(const TraceContext&) = delete; 234 235 internal::DataSourceInstanceThreadLocalState* const tls_inst_; 236 uint32_t const instance_index_; 237 }; 238 239 // The main tracing method. Tracing code should call this passing a lambda as 240 // argument, with the following signature: void(TraceContext). 241 // The lambda will be called synchronously (i.e., always before Trace() 242 // returns) only if tracing is enabled and the data source has been enabled in 243 // the tracing config. 244 // The lambda can be called more than once per Trace() call, in the case of 245 // concurrent tracing sessions (or even if the data source is instantiated 246 // twice within the same trace config). 247 template <typename Lambda> Trace(Lambda tracing_fn)248 static void Trace(Lambda tracing_fn) { 249 CallIfEnabled<DefaultTracePointTraits>([&tracing_fn](uint32_t instances) { 250 TraceWithInstances<DefaultTracePointTraits>(instances, 251 std::move(tracing_fn)); 252 }); 253 } 254 255 // An efficient trace point guard for checking if this data source is active. 256 // |callback| is a function which will only be called if there are active 257 // instances. It is given an instance state parameter, which should be passed 258 // to TraceWithInstances() to actually record trace data. 259 template <typename Traits = DefaultTracePointTraits, typename Callback> CallIfEnabled(Callback callback)260 static void CallIfEnabled(Callback callback) PERFETTO_ALWAYS_INLINE { 261 // |instances| is a per-class bitmap that tells: 262 // 1. If the data source is enabled at all. 263 // 2. The index of the slot within |static_state_| that holds the instance 264 // state. In turn this allows to map the data source to the tracing 265 // session and buffers. 266 // memory_order_relaxed is okay because: 267 // - |instances| is re-read with an acquire barrier below if this succeeds. 268 // - The code between this point and the acquire-load is based on static 269 // storage which has indefinite lifetime. 270 uint32_t instances = 271 Traits::GetActiveInstances()->load(std::memory_order_relaxed); 272 273 // This is the tracing fast-path. Bail out immediately if tracing is not 274 // enabled (or tracing is enabled but not for this data source). 275 if (PERFETTO_LIKELY(!instances)) 276 return; 277 callback(instances); 278 } 279 280 // The "lower half" of a trace point which actually performs tracing after 281 // this data source has been determined to be active. 282 // |instances| must be the instance state value retrieved through 283 // CallIfEnabled(). 284 // |tracing_fn| will be called to record trace data as in Trace(). 285 // 286 // TODO(primiano): all the stuff below should be outlined from the trace 287 // point. Or at least we should have some compile-time traits like 288 // kOptimizeBinarySize / kOptimizeTracingLatency. 289 template <typename Traits = DefaultTracePointTraits, typename Lambda> TraceWithInstances(uint32_t instances,Lambda tracing_fn)290 static void TraceWithInstances(uint32_t instances, Lambda tracing_fn) { 291 PERFETTO_DCHECK(instances); 292 constexpr auto kMaxDataSourceInstances = internal::kMaxDataSourceInstances; 293 294 // See tracing_muxer.h for the structure of the TLS. 295 auto* tracing_impl = internal::TracingMuxer::Get(); 296 if (PERFETTO_UNLIKELY(!tls_state_)) 297 tls_state_ = GetOrCreateDataSourceTLS(&static_state_); 298 299 // TracingTLS::generation is a global monotonic counter that is incremented 300 // every time a tracing session is stopped. We use that as a signal to force 301 // a slow-path garbage collection of all the trace writers for the current 302 // thread and to destroy the ones that belong to tracing sessions that have 303 // ended. This is to avoid having too many TraceWriter instances alive, each 304 // holding onto one chunk of the shared memory buffer. 305 // Rationale why memory_order_relaxed should be fine: 306 // - The TraceWriter object that we use is always constructed and destructed 307 // on the current thread. There is no risk of accessing a half-initialized 308 // TraceWriter (which would be really bad). 309 // - In the worst case, in the case of a race on the generation check, we 310 // might end up using a TraceWriter for the same data source that belongs 311 // to a stopped session. This is not really wrong, as we don't give any 312 // guarantee on the global atomicity of the stop. In the worst case the 313 // service will reject the data commit if this arrives too late. 314 315 if (PERFETTO_UNLIKELY( 316 tls_state_->root_tls->generation != 317 tracing_impl->generation(std::memory_order_relaxed))) { 318 // Will update root_tls->generation. 319 tracing_impl->DestroyStoppedTraceWritersForCurrentThread(); 320 } 321 322 for (uint32_t i = 0; i < kMaxDataSourceInstances; i++) { 323 internal::DataSourceState* instance_state = 324 static_state_.TryGetCached(instances, i); 325 if (!instance_state) 326 continue; 327 328 // Even if we passed the check above, the DataSourceInstance might be 329 // still destroyed concurrently while this code runs. The code below is 330 // designed to deal with such race, as follows: 331 // - We don't access the user-defined data source instance state. The only 332 // bits of state we use are |backend_id| and |buffer_id|. 333 // - Beyond those two integers, we access only the TraceWriter here. The 334 // TraceWriter is always safe because it lives on the TLS. 335 // - |instance_state| is backed by static storage, so the pointer is 336 // always valid, even after the data source instance is destroyed. 337 // - In the case of a race-on-destruction, we'll still see the latest 338 // backend_id and buffer_id and in the worst case keep trying writing 339 // into the tracing shared memory buffer after stopped. But this isn't 340 // really any worse than the case of the stop IPC being delayed by the 341 // kernel scheduler. The tracing service is robust against data commit 342 // attemps made after tracing is stopped. 343 // There is a theoretical race that would case the wrong behavior w.r.t 344 // writing data in the wrong buffer, but it's so rare that we ignore it: 345 // if the data source is stopped and started kMaxDataSourceInstances 346 // times (so that the same id is recycled) while we are in this function, 347 // we might end up reusing the old data source's backend_id and buffer_id 348 // for the new one, because we don't see the generation change past this 349 // point. But stopping and starting tracing (even once) takes so much 350 // handshaking to make this extremely unrealistic. 351 352 auto& tls_inst = tls_state_->per_instance[i]; 353 if (PERFETTO_UNLIKELY(!tls_inst.trace_writer)) { 354 // Here we need an acquire barrier, which matches the release-store made 355 // by TracingMuxerImpl::SetupDataSource(), to ensure that the backend_id 356 // and buffer_id are consistent. 357 instances = 358 Traits::GetActiveInstances()->load(std::memory_order_acquire); 359 instance_state = static_state_.TryGetCached(instances, i); 360 if (!instance_state || !instance_state->trace_lambda_enabled) 361 return; 362 tls_inst.backend_id = instance_state->backend_id; 363 tls_inst.buffer_id = instance_state->buffer_id; 364 tls_inst.trace_writer = tracing_impl->CreateTraceWriter( 365 instance_state, DataSourceType::kBufferExhaustedPolicy); 366 CreateIncrementalState( 367 &tls_inst, 368 static_cast<typename DataSourceTraits::IncrementalStateType*>( 369 nullptr)); 370 371 // Even in the case of out-of-IDs, SharedMemoryArbiterImpl returns a 372 // NullTraceWriter. The returned pointer should never be null. 373 assert(tls_inst.trace_writer); 374 } 375 376 tracing_fn(TraceContext(&tls_inst, i)); 377 } 378 } 379 380 // Registers the data source on all tracing backends, including ones that 381 // connect after the registration. Doing so enables the data source to receive 382 // Setup/Start/Stop notifications and makes the Trace() method work when 383 // tracing is enabled and the data source is selected. 384 // This must be called after Tracing::Initialize(). 385 // Can return false to signal failure if attemping to register more than 386 // kMaxDataSources (32) data sources types or if tracing hasn't been 387 // initialized. Register(const DataSourceDescriptor & descriptor)388 static bool Register(const DataSourceDescriptor& descriptor) { 389 // Silences -Wunused-variable warning in case the trace method is not used 390 // by the translation unit that declares the data source. 391 (void)static_state_; 392 (void)tls_state_; 393 394 auto factory = [] { 395 return std::unique_ptr<DataSourceBase>(new DataSourceType()); 396 }; 397 auto* tracing_impl = internal::TracingMuxer::Get(); 398 if (!tracing_impl) 399 return false; 400 return tracing_impl->RegisterDataSource(descriptor, factory, 401 &static_state_); 402 } 403 404 private: 405 // Traits for customizing the behavior of a specific trace point. 406 struct DefaultTracePointTraits { 407 // By default, every call to DataSource::Trace() will record trace events 408 // for every active instance of that data source. A single trace point can, 409 // however, use a custom set of enable flags for more fine grained control 410 // of when that trace point is active. 411 // 412 // DANGER: when doing this, the data source must use the appropriate memory 413 // fences when changing the state of the bitmap. GetActiveInstancesDefaultTracePointTraits414 static constexpr std::atomic<uint32_t>* GetActiveInstances() { 415 return &static_state_.valid_instances; 416 } 417 }; 418 419 // Create the user provided incremental state in the given thread-local 420 // storage. Note: The second parameter here is used to specialize the case 421 // where there is no incremental state type. 422 template <typename T> CreateIncrementalState(internal::DataSourceInstanceThreadLocalState * tls_inst,const T *)423 static void CreateIncrementalState( 424 internal::DataSourceInstanceThreadLocalState* tls_inst, 425 const T*) { 426 PERFETTO_DCHECK(!tls_inst->incremental_state); 427 tls_inst->incremental_state = 428 internal::DataSourceInstanceThreadLocalState::IncrementalStatePointer( 429 reinterpret_cast<void*>(new T()), 430 [](void* p) { delete reinterpret_cast<T*>(p); }); 431 } CreateIncrementalState(internal::DataSourceInstanceThreadLocalState *,const void *)432 static void CreateIncrementalState( 433 internal::DataSourceInstanceThreadLocalState*, 434 const void*) {} 435 436 // Note that the returned object is one per-thread per-data-source-type, NOT 437 // per data-source *instance*. GetOrCreateDataSourceTLS(internal::DataSourceStaticState * static_state)438 static internal::DataSourceThreadLocalState* GetOrCreateDataSourceTLS( 439 internal::DataSourceStaticState* static_state) { 440 auto* tracing_impl = internal::TracingMuxer::Get(); 441 internal::TracingTLS* root_tls = tracing_impl->GetOrCreateTracingTLS(); 442 internal::DataSourceThreadLocalState* ds_tls = 443 DataSourceTraits::GetDataSourceTLS(static_state, root_tls); 444 // We keep re-initializing as the initialization is idempotent and not worth 445 // the code for extra checks. 446 ds_tls->static_state = static_state; 447 assert(!ds_tls->root_tls || ds_tls->root_tls == root_tls); 448 ds_tls->root_tls = root_tls; 449 return ds_tls; 450 } 451 452 // Static state. Accessed by the static Trace() method fastpaths. 453 static internal::DataSourceStaticState static_state_; 454 455 // This TLS object is a cached raw pointer and has deliberately no destructor. 456 // The Platform implementation is supposed to create and manage the lifetime 457 // of the Platform::ThreadLocalObject and take care of destroying it. 458 // This is because non-POD thread_local variables have subtleties (global 459 // destructors) that we need to defer to the embedder. In chromium's platform 460 // implementation, for instance, the tls slot is implemented using 461 // chromium's base::ThreadLocalStorage. 462 static thread_local internal::DataSourceThreadLocalState* tls_state_; 463 }; 464 465 template <typename T, typename D> 466 internal::DataSourceStaticState DataSource<T, D>::static_state_; 467 template <typename T, typename D> 468 thread_local internal::DataSourceThreadLocalState* DataSource<T, D>::tls_state_; 469 470 } // namespace perfetto 471 472 // If placed at the end of a macro declaration, eats the semicolon at the end of 473 // the macro invocation (e.g., "MACRO(...);") to avoid warnings about extra 474 // semicolons. 475 #define PERFETTO_INTERNAL_SWALLOW_SEMICOLON() \ 476 extern int perfetto_internal_unused 477 478 // This macro must be used once for each data source next to the data source's 479 // declaration. 480 #define PERFETTO_DECLARE_DATA_SOURCE_STATIC_MEMBERS(...) \ 481 template <> \ 482 PERFETTO_COMPONENT_EXPORT perfetto::internal::DataSourceStaticState \ 483 perfetto::DataSource<__VA_ARGS__>::static_state_; \ 484 template <> \ 485 PERFETTO_COMPONENT_EXPORT thread_local perfetto::internal:: \ 486 DataSourceThreadLocalState* \ 487 perfetto::DataSource<__VA_ARGS__>::tls_state_ 488 489 // This macro must be used once for each data source in one source file to 490 // allocate static storage for the data source's static state. 491 #define PERFETTO_DEFINE_DATA_SOURCE_STATIC_MEMBERS(...) \ 492 template <> \ 493 PERFETTO_COMPONENT_EXPORT perfetto::internal::DataSourceStaticState \ 494 perfetto::DataSource<__VA_ARGS__>::static_state_{}; \ 495 template <> \ 496 PERFETTO_COMPONENT_EXPORT thread_local perfetto::internal:: \ 497 DataSourceThreadLocalState* \ 498 perfetto::DataSource<__VA_ARGS__>::tls_state_ = nullptr 499 500 #endif // INCLUDE_PERFETTO_TRACING_DATA_SOURCE_H_ 501