1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef BASE_TRACKED_OBJECTS_H_ 6 #define BASE_TRACKED_OBJECTS_H_ 7 8 #include <stdint.h> 9 10 #include <map> 11 #include <set> 12 #include <stack> 13 #include <string> 14 #include <utility> 15 #include <vector> 16 17 #include "base/allocator/features.h" 18 #include "base/atomicops.h" 19 #include "base/base_export.h" 20 #include "base/containers/hash_tables.h" 21 #include "base/debug/debugging_flags.h" 22 #include "base/debug/thread_heap_usage_tracker.h" 23 #include "base/gtest_prod_util.h" 24 #include "base/lazy_instance.h" 25 #include "base/location.h" 26 #include "base/macros.h" 27 #include "base/process/process_handle.h" 28 #include "base/profiler/tracked_time.h" 29 #include "base/synchronization/lock.h" 30 #include "base/threading/thread_checker.h" 31 #include "base/threading/thread_local_storage.h" 32 33 namespace base { 34 struct TrackingInfo; 35 } 36 37 // TrackedObjects provides a database of stats about objects (generally Tasks) 38 // that are tracked. Tracking means their birth, death, duration, birth thread, 39 // death thread, and birth place are recorded. This data is carefully spread 40 // across a series of objects so that the counts and times can be rapidly 41 // updated without (usually) having to lock the data, and hence there is usually 42 // very little contention caused by the tracking. The data can be viewed via 43 // the about:profiler URL, with a variety of sorting and filtering choices. 44 // 45 // These classes serve as the basis of a profiler of sorts for the Tasks system. 46 // As a result, design decisions were made to maximize speed, by minimizing 47 // recurring allocation/deallocation, lock contention and data copying. In the 48 // "stable" state, which is reached relatively quickly, there is no separate 49 // marginal allocation cost associated with construction or destruction of 50 // tracked objects, no locks are generally employed, and probably the largest 51 // computational cost is associated with obtaining start and stop times for 52 // instances as they are created and destroyed. 53 // 54 // The following describes the life cycle of tracking an instance. 55 // 56 // First off, when the instance is created, the FROM_HERE macro is expanded 57 // to specify the birth place (file, line, function) where the instance was 58 // created. That data is used to create a transient Location instance 59 // encapsulating the above triple of information. The strings (like __FILE__) 60 // are passed around by reference, with the assumption that they are static, and 61 // will never go away. This ensures that the strings can be dealt with as atoms 62 // with great efficiency (i.e., copying of strings is never needed, and 63 // comparisons for equality can be based on pointer comparisons). 64 // 65 // Next, a Births instance is constructed or found. A Births instance records 66 // (in a base class BirthOnThread) references to the static data provided in a 67 // Location instance, as well as a pointer to the ThreadData bound to the thread 68 // on which the birth takes place (see discussion on ThreadData below). There is 69 // at most one Births instance for each Location / ThreadData pair. The derived 70 // Births class contains slots for recording statistics about all instances born 71 // at the same location. Statistics currently include only the count of 72 // instances constructed. 73 // 74 // Since the base class BirthOnThread contains only constant data, it can be 75 // freely accessed by any thread at any time. The statistics must be handled 76 // more carefully; they are updated exclusively by the single thread to which 77 // the ThreadData is bound at a given time. 78 // 79 // For Tasks, having now either constructed or found the Births instance 80 // described above, a pointer to the Births instance is then recorded into the 81 // PendingTask structure. This fact alone is very useful in debugging, when 82 // there is a question of where an instance came from. In addition, the birth 83 // time is also recorded and used to later evaluate the lifetime duration of the 84 // whole Task. As a result of the above embedding, we can find out a Task's 85 // location of birth, and name of birth thread, without using any locks, as all 86 // that data is constant across the life of the process. 87 // 88 // The above work *could* also be done for any other object as well by calling 89 // TallyABirthIfActive() and TallyRunOnNamedThreadIfTracking() as appropriate. 90 // 91 // The upper bound for the amount of memory used in the above data structures is 92 // the product of the number of ThreadData instances and the number of 93 // Locations. Fortunately, Locations are often created on a single thread and 94 // the memory utilization is actually fairly restrained. 95 // 96 // Lastly, when an instance is deleted, the final tallies of statistics are 97 // carefully accumulated. That tallying writes into slots (members) in a 98 // collection of DeathData instances. For each Births / death ThreadData pair, 99 // there is a DeathData instance to record the additional death count, as well 100 // as to accumulate the run-time and queue-time durations for the instance as it 101 // is destroyed (dies). Since a ThreadData is bound to at most one thread at a 102 // time, there is no need to lock such DeathData instances. (i.e., these 103 // accumulated stats in a DeathData instance are exclusively updated by the 104 // singular owning thread). 105 // 106 // With the above life cycle description complete, the major remaining detail is 107 // explaining how existing Births and DeathData instances are found to avoid 108 // redundant allocations. 109 // 110 // A ThreadData instance maintains maps of Births and DeathData instances. The 111 // Births map is indexed by Location and the DeathData map is indexed by 112 // Births*. As noted earlier, we can compare Locations very efficiently as we 113 // consider the underlying data (file, function, line) to be atoms, and hence 114 // pointer comparison is used rather than (slow) string comparisons. 115 // 116 // The first time that a thread calls ThreadData::InitializeThreadContext() or 117 // ThreadData::Get(), a ThreadData instance is bound to it and stored in TLS. If 118 // a ThreadData bound to a terminated thread with the same sanitized name (i.e. 119 // name without trailing digits) as the current thread is available, it is 120 // reused. Otherwise, a new ThreadData instance is instantiated. Since a 121 // ThreadData is bound to at most one thread at a time, there is no need to 122 // acquire a lock to access its maps. Over time, a ThreadData may be bound to 123 // different threads that share the same sanitized name. 124 // 125 // We maintain a list of all ThreadData instances for the current process. Each 126 // ThreadData instance has a pointer to the next one. A static member of 127 // ThreadData provides a pointer to the first item on this global list, and 128 // access via that all_thread_data_list_head_ item requires the use of the 129 // list_lock_. 130 // 131 // When new ThreadData instances are added to the global list, they are pre- 132 // pended, which ensures that any prior acquisition of the list is valid (i.e., 133 // the holder can iterate over it without fear of it changing, or the necessity 134 // of using an additional lock. Iterations are actually pretty rare (used 135 // primarily for cleanup, or snapshotting data for display), so this lock has 136 // very little global performance impact. 137 // 138 // The above description tries to define the high performance (run time) 139 // portions of these classes. After gathering statistics, calls instigated 140 // by visiting about:profiler will assemble and aggregate data for display. The 141 // following data structures are used for producing such displays. They are 142 // not performance critical, and their only major constraint is that they should 143 // be able to run concurrently with ongoing augmentation of the birth and death 144 // data. 145 // 146 // This header also exports collection of classes that provide "snapshotted" 147 // representations of the core tracked_objects:: classes. These snapshotted 148 // representations are designed for safe transmission of the tracked_objects:: 149 // data across process boundaries. Each consists of: 150 // (1) a default constructor, to support the IPC serialization macros, 151 // (2) a constructor that extracts data from the type being snapshotted, and 152 // (3) the snapshotted data. 153 // 154 // For a given birth location, information about births is spread across data 155 // structures that are asynchronously changing on various threads. For 156 // serialization and display purposes, we need to construct TaskSnapshot 157 // instances for each combination of birth thread, death thread, and location, 158 // along with the count of such lifetimes. We gather such data into a 159 // TaskSnapshot instances, so that such instances can be sorted and 160 // aggregated (and remain frozen during our processing). 161 // 162 // Profiling consists of phases. The concrete phase in the sequence of phases 163 // is identified by its 0-based index. 164 // 165 // The ProcessDataPhaseSnapshot struct is a serialized representation of the 166 // list of ThreadData objects for a process for a concrete profiling phase. It 167 // holds a set of TaskSnapshots. The statistics in a snapshot are gathered 168 // asynhcronously relative to their ongoing updates. 169 // It is possible, though highly unlikely, that stats could be incorrectly 170 // recorded by this process (all data is held in 32 bit ints, but we are not 171 // atomically collecting all data, so we could have count that does not, for 172 // example, match with the number of durations we accumulated). The advantage 173 // to having fast (non-atomic) updates of the data outweighs the minimal risk of 174 // a singular corrupt statistic snapshot (only the snapshot could be corrupt, 175 // not the underlying and ongoing statistic). In contrast, pointer data that 176 // is accessed during snapshotting is completely invariant, and hence is 177 // perfectly acquired (i.e., no potential corruption, and no risk of a bad 178 // memory reference). 179 // 180 // TODO(jar): We can implement a Snapshot system that *tries* to grab the 181 // snapshots on the source threads *when* they have SingleThreadTaskRunners 182 // available (worker threads don't have SingleThreadTaskRunners, and hence 183 // gathering from them will continue to be asynchronous). We had an 184 // implementation of this in the past, but the difficulty is dealing with 185 // threads being terminated. We can *try* to post a task to threads that have a 186 // SingleThreadTaskRunner and check if that succeeds (will fail if the thread 187 // has been terminated). This *might* be valuable when we are collecting data 188 // for upload via UMA (where correctness of data may be more significant than 189 // for a single screen of about:profiler). 190 // 191 // TODO(jar): We need to store DataCollections, and provide facilities for 192 // taking the difference between two gathered DataCollections. For now, we're 193 // just adding a hack that Reset()s to zero all counts and stats. This is also 194 // done in a slightly thread-unsafe fashion, as the resetting is done 195 // asynchronously relative to ongoing updates (but all data is 32 bit in size). 196 // For basic profiling, this will work "most of the time," and should be 197 // sufficient... but storing away DataCollections is the "right way" to do this. 198 // We'll accomplish this via JavaScript storage of snapshots, and then we'll 199 // remove the Reset() methods. We may also need a short-term-max value in 200 // DeathData that is reset (as synchronously as possible) during each snapshot. 201 // This will facilitate displaying a max value for each snapshot period. 202 203 namespace tracked_objects { 204 205 //------------------------------------------------------------------------------ 206 // For a specific thread, and a specific birth place, the collection of all 207 // death info (with tallies for each death thread, to prevent access conflicts). 208 class ThreadData; 209 class BASE_EXPORT BirthOnThread { 210 public: 211 BirthOnThread(const Location& location, const ThreadData& current); 212 location()213 const Location& location() const { return location_; } birth_thread()214 const ThreadData* birth_thread() const { return birth_thread_; } 215 216 private: 217 // File/lineno of birth. This defines the essence of the task, as the context 218 // of the birth (construction) often tell what the item is for. This field 219 // is const, and hence safe to access from any thread. 220 const Location location_; 221 222 // The thread that records births into this object. Only this thread is 223 // allowed to update birth_count_ (which changes over time). 224 const ThreadData* const birth_thread_; 225 226 DISALLOW_COPY_AND_ASSIGN(BirthOnThread); 227 }; 228 229 //------------------------------------------------------------------------------ 230 // A "snapshotted" representation of the BirthOnThread class. 231 232 struct BASE_EXPORT BirthOnThreadSnapshot { 233 BirthOnThreadSnapshot(); 234 explicit BirthOnThreadSnapshot(const BirthOnThread& birth); 235 ~BirthOnThreadSnapshot(); 236 237 LocationSnapshot location; 238 std::string sanitized_thread_name; 239 }; 240 241 //------------------------------------------------------------------------------ 242 // A class for accumulating counts of births (without bothering with a map<>). 243 244 class BASE_EXPORT Births: public BirthOnThread { 245 public: 246 Births(const Location& location, const ThreadData& current); 247 248 int birth_count() const; 249 250 // When we have a birth we update the count for this birthplace. 251 void RecordBirth(); 252 253 private: 254 // The number of births on this thread for our location_. 255 int birth_count_; 256 257 DISALLOW_COPY_AND_ASSIGN(Births); 258 }; 259 260 class DeathData; 261 262 //------------------------------------------------------------------------------ 263 // A "snapshotted" representation of the DeathData class. 264 265 struct BASE_EXPORT DeathDataSnapshot { 266 DeathDataSnapshot(); 267 268 // Constructs the snapshot from individual values. 269 // The alternative would be taking a DeathData parameter, but this would 270 // create a loop since DeathData indirectly refers DeathDataSnapshot. Passing 271 // a wrapper structure as a param or using an empty constructor for 272 // snapshotting DeathData would be less efficient. 273 DeathDataSnapshot(int count, 274 int32_t run_duration_sum, 275 int32_t run_duration_max, 276 int32_t run_duration_sample, 277 int32_t queue_duration_sum, 278 int32_t queue_duration_max, 279 int32_t queue_duration_sample, 280 int32_t alloc_ops, 281 int32_t free_ops, 282 int32_t allocated_bytes, 283 int32_t freed_bytes, 284 int32_t alloc_overhead_bytes, 285 int32_t max_allocated_bytes); 286 DeathDataSnapshot(const DeathData& death_data); 287 DeathDataSnapshot(const DeathDataSnapshot& other); 288 ~DeathDataSnapshot(); 289 290 // Calculates and returns the delta between this snapshot and an earlier 291 // snapshot of the same task |older|. 292 DeathDataSnapshot Delta(const DeathDataSnapshot& older) const; 293 294 int count; 295 int32_t run_duration_sum; 296 int32_t run_duration_max; 297 int32_t run_duration_sample; 298 int32_t queue_duration_sum; 299 int32_t queue_duration_max; 300 int32_t queue_duration_sample; 301 302 int32_t alloc_ops; 303 int32_t free_ops; 304 int32_t allocated_bytes; 305 int32_t freed_bytes; 306 int32_t alloc_overhead_bytes; 307 int32_t max_allocated_bytes; 308 }; 309 310 //------------------------------------------------------------------------------ 311 // A "snapshotted" representation of the DeathData for a particular profiling 312 // phase. Used as an element of the list of phase snapshots owned by DeathData. 313 314 struct DeathDataPhaseSnapshot { 315 DeathDataPhaseSnapshot(int profiling_phase, 316 const DeathData& death_data, 317 const DeathDataPhaseSnapshot* prev); 318 319 // Profiling phase at which completion this snapshot was taken. 320 int profiling_phase; 321 322 // Death data snapshot. 323 DeathDataSnapshot death_data; 324 325 // Pointer to a snapshot from the previous phase. 326 const DeathDataPhaseSnapshot* prev; 327 }; 328 329 //------------------------------------------------------------------------------ 330 // Information about deaths of a task on a given thread, called "death thread". 331 // Access to members of this class is never protected by a lock. The fields 332 // are accessed in such a way that corruptions resulting from race conditions 333 // are not significant, and don't accumulate as a result of multiple accesses. 334 // All invocations of DeathData::OnProfilingPhaseCompleted and 335 // ThreadData::SnapshotMaps (which takes DeathData snapshot) in a given process 336 // must be called from the same thread. It doesn't matter what thread it is, but 337 // it's important the same thread is used as a snapshot thread during the whole 338 // process lifetime. All fields except sample_probability_count_ can be 339 // snapshotted. 340 341 class BASE_EXPORT DeathData { 342 public: 343 DeathData(); 344 DeathData(const DeathData& other); 345 ~DeathData(); 346 347 // Update stats for a task destruction (death) that had a Run() time of 348 // |duration|, and has had a queueing delay of |queue_duration|. 349 void RecordDurations(const int32_t queue_duration, 350 const int32_t run_duration, 351 const uint32_t random_number); 352 353 // Update stats for a task destruction that performed |alloc_ops| 354 // allocations, |free_ops| frees, allocated |allocated_bytes| bytes, freed 355 // |freed_bytes|, where an estimated |alloc_overhead_bytes| went to heap 356 // overhead, and where at most |max_allocated_bytes| were outstanding at any 357 // one time. 358 // Note that |alloc_overhead_bytes|/|alloc_ops| yields the average estimated 359 // heap overhead of allocations in the task, and |allocated_bytes|/|alloc_ops| 360 // yields the average size of allocation. 361 // Note also that |allocated_bytes|-|freed_bytes| yields the net heap memory 362 // usage of the task, which can be negative. 363 void RecordAllocations(const uint32_t alloc_ops, 364 const uint32_t free_ops, 365 const uint32_t allocated_bytes, 366 const uint32_t freed_bytes, 367 const uint32_t alloc_overhead_bytes, 368 const uint32_t max_allocated_bytes); 369 370 // Metrics and past snapshots accessors, used only for serialization and in 371 // tests. count()372 int count() const { return base::subtle::NoBarrier_Load(&count_); } run_duration_sum()373 int32_t run_duration_sum() const { 374 return base::subtle::NoBarrier_Load(&run_duration_sum_); 375 } run_duration_max()376 int32_t run_duration_max() const { 377 return base::subtle::NoBarrier_Load(&run_duration_max_); 378 } run_duration_sample()379 int32_t run_duration_sample() const { 380 return base::subtle::NoBarrier_Load(&run_duration_sample_); 381 } queue_duration_sum()382 int32_t queue_duration_sum() const { 383 return base::subtle::NoBarrier_Load(&queue_duration_sum_); 384 } queue_duration_max()385 int32_t queue_duration_max() const { 386 return base::subtle::NoBarrier_Load(&queue_duration_max_); 387 } queue_duration_sample()388 int32_t queue_duration_sample() const { 389 return base::subtle::NoBarrier_Load(&queue_duration_sample_); 390 } alloc_ops()391 int32_t alloc_ops() const { 392 return base::subtle::NoBarrier_Load(&alloc_ops_); 393 } free_ops()394 int32_t free_ops() const { return base::subtle::NoBarrier_Load(&free_ops_); } allocated_bytes()395 int32_t allocated_bytes() const { 396 return base::subtle::NoBarrier_Load(&allocated_bytes_); 397 } freed_bytes()398 int32_t freed_bytes() const { 399 return base::subtle::NoBarrier_Load(&freed_bytes_); 400 } alloc_overhead_bytes()401 int32_t alloc_overhead_bytes() const { 402 return base::subtle::NoBarrier_Load(&alloc_overhead_bytes_); 403 } max_allocated_bytes()404 int32_t max_allocated_bytes() const { 405 return base::subtle::NoBarrier_Load(&max_allocated_bytes_); 406 } last_phase_snapshot()407 const DeathDataPhaseSnapshot* last_phase_snapshot() const { 408 return last_phase_snapshot_; 409 } 410 411 // Called when the current profiling phase, identified by |profiling_phase|, 412 // ends. 413 // Must be called only on the snapshot thread. 414 void OnProfilingPhaseCompleted(int profiling_phase); 415 416 private: 417 // A saturating addition operation for member variables. This elides the 418 // use of atomic-primitive reads for members that are only written on the 419 // owning thread. 420 static void SaturatingMemberAdd(const uint32_t addend, 421 base::subtle::Atomic32* sum); 422 423 // Members are ordered from most regularly read and updated, to least 424 // frequently used. This might help a bit with cache lines. 425 // Number of runs seen (divisor for calculating averages). 426 // Can be incremented only on the death thread. 427 base::subtle::Atomic32 count_; 428 429 // Count used in determining probability of selecting exec/queue times from a 430 // recorded death as samples. 431 // Gets incremented only on the death thread, but can be set to 0 by 432 // OnProfilingPhaseCompleted() on the snapshot thread. 433 base::subtle::Atomic32 sample_probability_count_; 434 435 // Basic tallies, used to compute averages. Can be incremented only on the 436 // death thread. 437 base::subtle::Atomic32 run_duration_sum_; 438 base::subtle::Atomic32 queue_duration_sum_; 439 // Max values, used by local visualization routines. These are often read, 440 // but rarely updated. The max values get assigned only on the death thread, 441 // but these fields can be set to 0 by OnProfilingPhaseCompleted() on the 442 // snapshot thread. 443 base::subtle::Atomic32 run_duration_max_; 444 base::subtle::Atomic32 queue_duration_max_; 445 446 // The cumulative number of allocation and free operations. 447 base::subtle::Atomic32 alloc_ops_; 448 base::subtle::Atomic32 free_ops_; 449 450 // The number of bytes allocated by the task. 451 base::subtle::Atomic32 allocated_bytes_; 452 453 // The number of bytes freed by the task. 454 base::subtle::Atomic32 freed_bytes_; 455 456 // The cumulative number of overhead bytes. Where available this yields an 457 // estimate of the heap overhead for allocations. 458 base::subtle::Atomic32 alloc_overhead_bytes_; 459 460 // The high-watermark for the number of outstanding heap allocated bytes. 461 base::subtle::Atomic32 max_allocated_bytes_; 462 463 // Samples, used by crowd sourcing gatherers. These are almost never read, 464 // and rarely updated. They can be modified only on the death thread. 465 base::subtle::Atomic32 run_duration_sample_; 466 base::subtle::Atomic32 queue_duration_sample_; 467 468 // Snapshot of this death data made at the last profiling phase completion, if 469 // any. DeathData owns the whole list starting with this pointer. 470 // Can be accessed only on the snapshot thread. 471 const DeathDataPhaseSnapshot* last_phase_snapshot_; 472 473 DISALLOW_ASSIGN(DeathData); 474 }; 475 476 //------------------------------------------------------------------------------ 477 // A temporary collection of data that can be sorted and summarized. It is 478 // gathered (carefully) from many threads. Instances are held in arrays and 479 // processed, filtered, and rendered. 480 // The source of this data was collected on many threads, and is asynchronously 481 // changing. The data in this instance is not asynchronously changing. 482 483 struct BASE_EXPORT TaskSnapshot { 484 TaskSnapshot(); 485 TaskSnapshot(const BirthOnThreadSnapshot& birth, 486 const DeathDataSnapshot& death_data, 487 const std::string& death_sanitized_thread_name); 488 ~TaskSnapshot(); 489 490 BirthOnThreadSnapshot birth; 491 // Delta between death data for a thread for a certain profiling phase and the 492 // snapshot for the pervious phase, if any. Otherwise, just a snapshot. 493 DeathDataSnapshot death_data; 494 std::string death_sanitized_thread_name; 495 }; 496 497 //------------------------------------------------------------------------------ 498 // For each thread, we have a ThreadData that stores all tracking info generated 499 // on this thread. This prevents the need for locking as data accumulates. 500 // We use ThreadLocalStorage to quickly identfy the current ThreadData context. 501 // We also have a linked list of ThreadData instances, and that list is used to 502 // harvest data from all existing instances. 503 504 struct ProcessDataPhaseSnapshot; 505 struct ProcessDataSnapshot; 506 class BASE_EXPORT TaskStopwatch; 507 508 // Map from profiling phase number to the process-wide snapshotted 509 // representation of the list of ThreadData objects that died during the given 510 // phase. 511 typedef std::map<int, ProcessDataPhaseSnapshot> PhasedProcessDataSnapshotMap; 512 513 class BASE_EXPORT ThreadData { 514 public: 515 // Current allowable states of the tracking system. The states can vary 516 // between ACTIVE and DEACTIVATED, but can never go back to UNINITIALIZED. 517 enum Status { 518 UNINITIALIZED, // Pristine, link-time state before running. 519 DORMANT_DURING_TESTS, // Only used during testing. 520 DEACTIVATED, // No longer recording profiling. 521 PROFILING_ACTIVE, // Recording profiles. 522 STATUS_LAST = PROFILING_ACTIVE 523 }; 524 525 typedef base::hash_map<Location, Births*, Location::Hash> BirthMap; 526 typedef std::map<const Births*, DeathData> DeathMap; 527 528 // Initialize the current thread context with a new instance of ThreadData. 529 // This is used by all threads that have names, and should be explicitly 530 // set *before* any births on the threads have taken place. 531 static void InitializeThreadContext(const std::string& thread_name); 532 533 // Using Thread Local Store, find the current instance for collecting data. 534 // If an instance does not exist, construct one (and remember it for use on 535 // this thread. 536 // This may return NULL if the system is disabled for any reason. 537 static ThreadData* Get(); 538 539 // Fills |process_data_snapshot| with phased snapshots of all profiling 540 // phases, including the current one, identified by |current_profiling_phase|. 541 // |current_profiling_phase| is necessary because a child process can start 542 // after several phase-changing events, so it needs to receive the current 543 // phase number from the browser process to fill the correct entry for the 544 // current phase in the |process_data_snapshot| map. 545 static void Snapshot(int current_profiling_phase, 546 ProcessDataSnapshot* process_data_snapshot); 547 548 // Called when the current profiling phase, identified by |profiling_phase|, 549 // ends. 550 // |profiling_phase| is necessary because a child process can start after 551 // several phase-changing events, so it needs to receive the phase number from 552 // the browser process to fill the correct entry in the 553 // completed_phases_snapshots_ map. 554 static void OnProfilingPhaseCompleted(int profiling_phase); 555 556 // Finds (or creates) a place to count births from the given location in this 557 // thread, and increment that tally. 558 // TallyABirthIfActive will returns NULL if the birth cannot be tallied. 559 static Births* TallyABirthIfActive(const Location& location); 560 561 // Records the end of a timed run of an object. The |completed_task| contains 562 // a pointer to a Births, the time_posted, and a delayed_start_time if any. 563 // The |start_of_run| indicates when we started to perform the run of the 564 // task. The delayed_start_time is non-null for tasks that were posted as 565 // delayed tasks, and it indicates when the task should have run (i.e., when 566 // it should have posted out of the timer queue, and into the work queue. 567 // The |end_of_run| was just obtained by a call to Now() (just after the task 568 // finished). It is provided as an argument to help with testing. 569 static void TallyRunOnNamedThreadIfTracking( 570 const base::TrackingInfo& completed_task, 571 const TaskStopwatch& stopwatch); 572 573 // Record the end of a timed run of an object. The |birth| is the record for 574 // the instance, the |time_posted| records that instant, which is presumed to 575 // be when the task was posted into a queue to run on a worker thread. 576 // The |start_of_run| is when the worker thread started to perform the run of 577 // the task. 578 // The |end_of_run| was just obtained by a call to Now() (just after the task 579 // finished). 580 static void TallyRunOnWorkerThreadIfTracking(const Births* births, 581 const TrackedTime& time_posted, 582 const TaskStopwatch& stopwatch); 583 584 // Record the end of execution in region, generally corresponding to a scope 585 // being exited. 586 static void TallyRunInAScopedRegionIfTracking(const Births* births, 587 const TaskStopwatch& stopwatch); 588 sanitized_thread_name()589 const std::string& sanitized_thread_name() const { 590 return sanitized_thread_name_; 591 } 592 593 // Initializes all statics if needed (this initialization call should be made 594 // while we are single threaded). 595 static void EnsureTlsInitialization(); 596 597 // Sets internal status_. 598 // If |status| is false, then status_ is set to DEACTIVATED. 599 // If |status| is true, then status_ is set to PROFILING_ACTIVE. 600 static void InitializeAndSetTrackingStatus(Status status); 601 602 static Status status(); 603 604 // Indicate if any sort of profiling is being done (i.e., we are more than 605 // DEACTIVATED). 606 static bool TrackingStatus(); 607 608 // Enables profiler timing. 609 static void EnableProfilerTiming(); 610 611 // Provide a time function that does nothing (runs fast) when we don't have 612 // the profiler enabled. It will generally be optimized away when it is 613 // ifdef'ed to be small enough (allowing the profiler to be "compiled out" of 614 // the code). 615 static TrackedTime Now(); 616 617 // This function can be called at process termination to validate that thread 618 // cleanup routines have been called for at least some number of named 619 // threads. 620 static void EnsureCleanupWasCalled(int major_threads_shutdown_count); 621 622 private: 623 friend class TaskStopwatch; 624 // Allow only tests to call ShutdownSingleThreadedCleanup. We NEVER call it 625 // in production code. 626 // TODO(jar): Make this a friend in DEBUG only, so that the optimizer has a 627 // better change of optimizing (inlining? etc.) private methods (knowing that 628 // there will be no need for an external entry point). 629 friend class TrackedObjectsTest; 630 FRIEND_TEST_ALL_PREFIXES(TrackedObjectsTest, MinimalStartupShutdown); 631 FRIEND_TEST_ALL_PREFIXES(TrackedObjectsTest, TinyStartupShutdown); 632 633 // Type for an alternate timer function (testing only). 634 typedef unsigned int NowFunction(); 635 636 typedef std::map<const BirthOnThread*, int> BirthCountMap; 637 typedef std::vector<std::pair<const Births*, DeathDataPhaseSnapshot>> 638 DeathsSnapshot; 639 640 explicit ThreadData(const std::string& sanitized_thread_name); 641 ~ThreadData(); 642 643 // Push this instance to the head of all_thread_data_list_head_, linking it to 644 // the previous head. This is performed after each construction, and leaves 645 // the instance permanently on that list. 646 void PushToHeadOfList(); 647 648 // (Thread safe) Get start of list of all ThreadData instances using the lock. 649 static ThreadData* first(); 650 651 // Iterate through the null terminated list of ThreadData instances. 652 ThreadData* next() const; 653 654 655 // In this thread's data, record a new birth. 656 Births* TallyABirth(const Location& location); 657 658 // Find a place to record a death on this thread. 659 void TallyADeath(const Births& births, 660 int32_t queue_duration, 661 const TaskStopwatch& stopwatch); 662 663 // Snapshots (under a lock) the profiled data for the tasks for this thread 664 // and writes all of the executed tasks' data -- i.e. the data for all 665 // profiling phases (including the current one: |current_profiling_phase|) for 666 // the tasks with with entries in the death_map_ -- into |phased_snapshots|. 667 // Also updates the |birth_counts| tally for each task to keep track of the 668 // number of living instances of the task -- that is, each task maps to the 669 // number of births for the task that have not yet been balanced by a death. 670 void SnapshotExecutedTasks(int current_profiling_phase, 671 PhasedProcessDataSnapshotMap* phased_snapshots, 672 BirthCountMap* birth_counts); 673 674 // Using our lock, make a copy of the specified maps. This call may be made 675 // on non-local threads, which necessitate the use of the lock to prevent 676 // the map(s) from being reallocated while they are copied. 677 void SnapshotMaps(int profiling_phase, 678 BirthMap* birth_map, 679 DeathsSnapshot* deaths); 680 681 // Called for this thread when the current profiling phase, identified by 682 // |profiling_phase|, ends. 683 void OnProfilingPhaseCompletedOnThread(int profiling_phase); 684 685 // This method is called by the TLS system when a thread terminates. 686 // The argument may be NULL if this thread has never tracked a birth or death. 687 static void OnThreadTermination(void* thread_data); 688 689 // This method should be called when a worker thread terminates, so that we 690 // can save all the thread data into a cache of reusable ThreadData instances. 691 void OnThreadTerminationCleanup(); 692 693 // Cleans up data structures, and returns statics to near pristine (mostly 694 // uninitialized) state. If there is any chance that other threads are still 695 // using the data structures, then the |leak| argument should be passed in as 696 // true, and the data structures (birth maps, death maps, ThreadData 697 // insntances, etc.) will be leaked and not deleted. If you have joined all 698 // threads since the time that InitializeAndSetTrackingStatus() was called, 699 // then you can pass in a |leak| value of false, and this function will 700 // delete recursively all data structures, starting with the list of 701 // ThreadData instances. 702 static void ShutdownSingleThreadedCleanup(bool leak); 703 704 // Returns a ThreadData instance for a thread whose sanitized name is 705 // |sanitized_thread_name|. The returned instance may have been extracted from 706 // the list of retired ThreadData instances or newly allocated. 707 static ThreadData* GetRetiredOrCreateThreadData( 708 const std::string& sanitized_thread_name); 709 710 // When non-null, this specifies an external function that supplies monotone 711 // increasing time functcion. 712 static NowFunction* now_function_for_testing_; 713 714 // We use thread local store to identify which ThreadData to interact with. 715 static base::ThreadLocalStorage::StaticSlot tls_index_; 716 717 // Linked list of ThreadData instances that were associated with threads that 718 // have been terminated and that have not been associated with a new thread 719 // since then. This is only accessed while |list_lock_| is held. 720 static ThreadData* first_retired_thread_data_; 721 722 // Link to the most recently created instance (starts a null terminated list). 723 // The list is traversed by about:profiler when it needs to snapshot data. 724 // This is only accessed while list_lock_ is held. 725 static ThreadData* all_thread_data_list_head_; 726 727 // The number of times TLS has called us back to cleanup a ThreadData 728 // instance. This is only accessed while list_lock_ is held. 729 static int cleanup_count_; 730 731 // Incarnation sequence number, indicating how many times (during unittests) 732 // we've either transitioned out of UNINITIALIZED, or into that state. This 733 // value is only accessed while the list_lock_ is held. 734 static int incarnation_counter_; 735 736 // Protection for access to all_thread_data_list_head_, and to 737 // unregistered_thread_data_pool_. This lock is leaked at shutdown. 738 // The lock is very infrequently used, so we can afford to just make a lazy 739 // instance and be safe. 740 static base::LazyInstance<base::Lock>::Leaky list_lock_; 741 742 // We set status_ to SHUTDOWN when we shut down the tracking service. 743 static base::subtle::Atomic32 status_; 744 745 // Link to next instance (null terminated list). Used to globally track all 746 // registered instances (corresponds to all registered threads where we keep 747 // data). Only modified in the constructor. 748 ThreadData* next_; 749 750 // Pointer to another retired ThreadData instance. This value is nullptr if 751 // this is associated with an active thread. 752 ThreadData* next_retired_thread_data_; 753 754 // The name of the thread that is being recorded, with all trailing digits 755 // replaced with a single "*" character. 756 const std::string sanitized_thread_name_; 757 758 // A map used on each thread to keep track of Births on this thread. 759 // This map should only be accessed on the thread it was constructed on. 760 // When a snapshot is needed, this structure can be locked in place for the 761 // duration of the snapshotting activity. 762 BirthMap birth_map_; 763 764 // Similar to birth_map_, this records informations about death of tracked 765 // instances (i.e., when a tracked instance was destroyed on this thread). 766 // It is locked before changing, and hence other threads may access it by 767 // locking before reading it. 768 DeathMap death_map_; 769 770 // Lock to protect *some* access to BirthMap and DeathMap. The maps are 771 // regularly read and written on this thread, but may only be read from other 772 // threads. To support this, we acquire this lock if we are writing from this 773 // thread, or reading from another thread. For reading from this thread we 774 // don't need a lock, as there is no potential for a conflict since the 775 // writing is only done from this thread. 776 mutable base::Lock map_lock_; 777 778 // A random number that we used to select decide which sample to keep as a 779 // representative sample in each DeathData instance. We can't start off with 780 // much randomness (because we can't call RandInt() on all our threads), so 781 // we stir in more and more as we go. 782 uint32_t random_number_; 783 784 // Record of what the incarnation_counter_ was when this instance was created. 785 // If the incarnation_counter_ has changed, then we avoid pushing into the 786 // pool (this is only critical in tests which go through multiple 787 // incarnations). 788 int incarnation_count_for_pool_; 789 790 // Most recently started (i.e. most nested) stopwatch on the current thread, 791 // if it exists; NULL otherwise. 792 TaskStopwatch* current_stopwatch_; 793 794 DISALLOW_COPY_AND_ASSIGN(ThreadData); 795 }; 796 797 //------------------------------------------------------------------------------ 798 // Stopwatch to measure task run time or simply create a time interval that will 799 // be subtracted from the current most nested task's run time. Stopwatches 800 // coordinate with the stopwatches in which they are nested to avoid 801 // double-counting nested tasks run times. 802 803 class BASE_EXPORT TaskStopwatch { 804 public: 805 // Starts the stopwatch. 806 TaskStopwatch(); 807 ~TaskStopwatch(); 808 809 // Starts stopwatch. 810 void Start(); 811 812 // Stops stopwatch. 813 void Stop(); 814 815 // Returns the start time. 816 TrackedTime StartTime() const; 817 818 // Task's duration is calculated as the wallclock duration between starting 819 // and stopping this stopwatch, minus the wallclock durations of any other 820 // instances that are immediately nested in this one, started and stopped on 821 // this thread during that period. 822 int32_t RunDurationMs() const; 823 824 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER) heap_usage()825 const base::debug::ThreadHeapUsageTracker& heap_usage() const { 826 return heap_usage_; 827 } heap_tracking_enabled()828 bool heap_tracking_enabled() const { return heap_tracking_enabled_; } 829 #endif 830 831 // Returns tracking info for the current thread. 832 ThreadData* GetThreadData() const; 833 834 private: 835 // Time when the stopwatch was started. 836 TrackedTime start_time_; 837 838 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER) 839 base::debug::ThreadHeapUsageTracker heap_usage_; 840 bool heap_tracking_enabled_; 841 #endif 842 843 // Wallclock duration of the task. 844 int32_t wallclock_duration_ms_; 845 846 // Tracking info for the current thread. 847 ThreadData* current_thread_data_; 848 849 // Sum of wallclock durations of all stopwatches that were directly nested in 850 // this one. 851 int32_t excluded_duration_ms_; 852 853 // Stopwatch which was running on our thread when this stopwatch was started. 854 // That preexisting stopwatch must be adjusted to the exclude the wallclock 855 // duration of this stopwatch. 856 TaskStopwatch* parent_; 857 858 #if DCHECK_IS_ON() 859 // State of the stopwatch. Stopwatch is first constructed in a created state 860 // state, then is optionally started/stopped, then destructed. 861 enum { CREATED, RUNNING, STOPPED } state_; 862 863 // Currently running stopwatch that is directly nested in this one, if such 864 // stopwatch exists. NULL otherwise. 865 TaskStopwatch* child_; 866 #endif 867 }; 868 869 //------------------------------------------------------------------------------ 870 // A snapshotted representation of the list of ThreadData objects for a process, 871 // for a single profiling phase. 872 873 struct BASE_EXPORT ProcessDataPhaseSnapshot { 874 public: 875 ProcessDataPhaseSnapshot(); 876 ProcessDataPhaseSnapshot(const ProcessDataPhaseSnapshot& other); 877 ~ProcessDataPhaseSnapshot(); 878 879 std::vector<TaskSnapshot> tasks; 880 }; 881 882 //------------------------------------------------------------------------------ 883 // A snapshotted representation of the list of ThreadData objects for a process, 884 // for all profiling phases, including the current one. 885 886 struct BASE_EXPORT ProcessDataSnapshot { 887 public: 888 ProcessDataSnapshot(); 889 ProcessDataSnapshot(const ProcessDataSnapshot& other); 890 ~ProcessDataSnapshot(); 891 892 PhasedProcessDataSnapshotMap phased_snapshots; 893 base::ProcessId process_id; 894 }; 895 896 } // namespace tracked_objects 897 898 #endif // BASE_TRACKED_OBJECTS_H_ 899