• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/tracked_objects.h"
6 
7 #include <ctype.h>
8 #include <limits.h>
9 #include <stdlib.h>
10 
11 #include "base/atomicops.h"
12 #include "base/base_switches.h"
13 #include "base/command_line.h"
14 #include "base/compiler_specific.h"
15 #include "base/debug/leak_annotations.h"
16 #include "base/logging.h"
17 #include "base/metrics/histogram_macros.h"
18 #include "base/numerics/safe_conversions.h"
19 #include "base/numerics/safe_math.h"
20 #include "base/process/process_handle.h"
21 #include "base/third_party/valgrind/memcheck.h"
22 #include "base/threading/worker_pool.h"
23 #include "base/tracking_info.h"
24 #include "build/build_config.h"
25 
26 using base::TimeDelta;
27 
28 namespace base {
29 class TimeDelta;
30 }
31 
32 namespace tracked_objects {
33 
34 namespace {
35 
36 constexpr char kWorkerThreadSanitizedName[] = "WorkerThread-*";
37 
38 // When ThreadData is first initialized, should we start in an ACTIVE state to
39 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
40 // that we only record after parsing the command line flag --enable-tracking.
41 // Note that the flag may force either state, so this really controls only the
42 // period of time up until that flag is parsed.  If there is no flag seen, then
43 // this state may prevail for much or all of the process lifetime.
44 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;
45 
46 // Possible states of the profiler timing enabledness.
47 enum {
48   UNDEFINED_TIMING,
49   ENABLED_TIMING,
50   DISABLED_TIMING,
51 };
52 
53 // State of the profiler timing enabledness.
54 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
55 
56 // Returns whether profiler timing is enabled.  The default is true, but this
57 // may be overridden by a command-line flag.  Some platforms may
58 // programmatically set this command-line flag to the "off" value if it's not
59 // specified.
60 // This in turn can be overridden by explicitly calling
61 // ThreadData::EnableProfilerTiming, say, based on a field trial.
IsProfilerTimingEnabled()62 inline bool IsProfilerTimingEnabled() {
63   // Reading |g_profiler_timing_enabled| is done without barrier because
64   // multiple initialization is not an issue while the barrier can be relatively
65   // costly given that this method is sometimes called in a tight loop.
66   base::subtle::Atomic32 current_timing_enabled =
67       base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
68   if (current_timing_enabled == UNDEFINED_TIMING) {
69     if (!base::CommandLine::InitializedForCurrentProcess())
70       return true;
71     current_timing_enabled =
72         (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
73              switches::kProfilerTiming) ==
74          switches::kProfilerTimingDisabledValue)
75             ? DISABLED_TIMING
76             : ENABLED_TIMING;
77     base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
78                                   current_timing_enabled);
79   }
80   return current_timing_enabled == ENABLED_TIMING;
81 }
82 
83 // Sanitize a thread name by replacing trailing sequence of digits with "*".
84 // Examples:
85 // 1. "BrowserBlockingWorker1/23857" => "BrowserBlockingWorker1/*"
86 // 2. "Chrome_IOThread" => "Chrome_IOThread"
SanitizeThreadName(const std::string & thread_name)87 std::string SanitizeThreadName(const std::string& thread_name) {
88   size_t i = thread_name.length();
89 
90   while (i > 0 && isdigit(thread_name[i - 1]))
91     --i;
92 
93   if (i == thread_name.length())
94     return thread_name;
95 
96   return thread_name.substr(0, i) + '*';
97 }
98 
99 }  // namespace
100 
101 //------------------------------------------------------------------------------
102 // DeathData tallies durations when a death takes place.
103 
DeathData()104 DeathData::DeathData()
105     : count_(0),
106       sample_probability_count_(0),
107       run_duration_sum_(0),
108       queue_duration_sum_(0),
109       run_duration_max_(0),
110       queue_duration_max_(0),
111       alloc_ops_(0),
112       free_ops_(0),
113       allocated_bytes_(0),
114       freed_bytes_(0),
115       alloc_overhead_bytes_(0),
116       max_allocated_bytes_(0),
117       run_duration_sample_(0),
118       queue_duration_sample_(0),
119       last_phase_snapshot_(nullptr) {}
120 
DeathData(const DeathData & other)121 DeathData::DeathData(const DeathData& other)
122     : count_(other.count_),
123       sample_probability_count_(other.sample_probability_count_),
124       run_duration_sum_(other.run_duration_sum_),
125       queue_duration_sum_(other.queue_duration_sum_),
126       run_duration_max_(other.run_duration_max_),
127       queue_duration_max_(other.queue_duration_max_),
128       alloc_ops_(other.alloc_ops_),
129       free_ops_(other.free_ops_),
130       allocated_bytes_(other.allocated_bytes_),
131       freed_bytes_(other.freed_bytes_),
132       alloc_overhead_bytes_(other.alloc_overhead_bytes_),
133       max_allocated_bytes_(other.max_allocated_bytes_),
134       run_duration_sample_(other.run_duration_sample_),
135       queue_duration_sample_(other.queue_duration_sample_),
136       last_phase_snapshot_(nullptr) {
137   // This constructor will be used by std::map when adding new DeathData values
138   // to the map.  At that point, last_phase_snapshot_ is still NULL, so we don't
139   // need to worry about ownership transfer.
140   DCHECK(other.last_phase_snapshot_ == nullptr);
141 }
142 
~DeathData()143 DeathData::~DeathData() {
144   while (last_phase_snapshot_) {
145     const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
146     last_phase_snapshot_ = snapshot->prev;
147     delete snapshot;
148   }
149 }
150 
151 // TODO(jar): I need to see if this macro to optimize branching is worth using.
152 //
153 // This macro has no branching, so it is surely fast, and is equivalent to:
154 //             if (assign_it)
155 //               target = source;
156 // We use a macro rather than a template to force this to inline.
157 // Related code for calculating max is discussed on the web.
158 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
159   ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it))
160 
RecordDurations(const int32_t queue_duration,const int32_t run_duration,const uint32_t random_number)161 void DeathData::RecordDurations(const int32_t queue_duration,
162                                 const int32_t run_duration,
163                                 const uint32_t random_number) {
164   // We'll just clamp at INT_MAX, but we should note this in the UI as such.
165   if (count_ < INT_MAX)
166     base::subtle::NoBarrier_Store(&count_, count_ + 1);
167 
168   int sample_probability_count =
169       base::subtle::NoBarrier_Load(&sample_probability_count_);
170   if (sample_probability_count < INT_MAX)
171     ++sample_probability_count;
172   base::subtle::NoBarrier_Store(&sample_probability_count_,
173                                 sample_probability_count);
174 
175   base::subtle::NoBarrier_Store(&queue_duration_sum_,
176                                 queue_duration_sum_ + queue_duration);
177   base::subtle::NoBarrier_Store(&run_duration_sum_,
178                                 run_duration_sum_ + run_duration);
179 
180   if (queue_duration_max() < queue_duration)
181     base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration);
182   if (run_duration_max() < run_duration)
183     base::subtle::NoBarrier_Store(&run_duration_max_, run_duration);
184 
185   // Take a uniformly distributed sample over all durations ever supplied during
186   // the current profiling phase.
187   // The probability that we (instead) use this new sample is
188   // 1/sample_probability_count_. This results in a completely uniform selection
189   // of the sample (at least when we don't clamp sample_probability_count_...
190   // but that should be inconsequentially likely).  We ignore the fact that we
191   // correlated our selection of a sample to the run and queue times (i.e., we
192   // used them to generate random_number).
193   CHECK_GT(sample_probability_count, 0);
194   if (0 == (random_number % sample_probability_count)) {
195     base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration);
196     base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration);
197   }
198 }
199 
RecordAllocations(const uint32_t alloc_ops,const uint32_t free_ops,const uint32_t allocated_bytes,const uint32_t freed_bytes,const uint32_t alloc_overhead_bytes,const uint32_t max_allocated_bytes)200 void DeathData::RecordAllocations(const uint32_t alloc_ops,
201                                   const uint32_t free_ops,
202                                   const uint32_t allocated_bytes,
203                                   const uint32_t freed_bytes,
204                                   const uint32_t alloc_overhead_bytes,
205                                   const uint32_t max_allocated_bytes) {
206   // Use saturating arithmetic.
207   SaturatingMemberAdd(alloc_ops, &alloc_ops_);
208   SaturatingMemberAdd(free_ops, &free_ops_);
209   SaturatingMemberAdd(allocated_bytes, &allocated_bytes_);
210   SaturatingMemberAdd(freed_bytes, &freed_bytes_);
211   SaturatingMemberAdd(alloc_overhead_bytes, &alloc_overhead_bytes_);
212 
213   int32_t max = base::saturated_cast<int32_t>(max_allocated_bytes);
214   if (max > max_allocated_bytes_)
215     base::subtle::NoBarrier_Store(&max_allocated_bytes_, max);
216 }
217 
OnProfilingPhaseCompleted(int profiling_phase)218 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
219   // Snapshotting and storing current state.
220   last_phase_snapshot_ =
221       new DeathDataPhaseSnapshot(profiling_phase, *this, last_phase_snapshot_);
222 
223   // Not touching fields for which a delta can be computed by comparing with a
224   // snapshot from the previous phase. Resetting other fields.  Sample values
225   // will be reset upon next death recording because sample_probability_count_
226   // is set to 0.
227   // We avoid resetting to 0 in favor of deltas whenever possible.  The reason
228   // is that for incrementable fields, resetting to 0 from the snapshot thread
229   // potentially in parallel with incrementing in the death thread may result in
230   // significant data corruption that has a potential to grow with time.  Not
231   // resetting incrementable fields and using deltas will cause any
232   // off-by-little corruptions to be likely fixed at the next snapshot.
233   // The max values are not incrementable, and cannot be deduced using deltas
234   // for a given phase. Hence, we have to reset them to 0.  But the potential
235   // damage is limited to getting the previous phase's max to apply for the next
236   // phase, and the error doesn't have a potential to keep growing with new
237   // resets.
238   // sample_probability_count_ is incrementable, but must be reset to 0 at the
239   // phase end, so that we start a new uniformly randomized sample selection
240   // after the reset. These fields are updated using atomics. However, race
241   // conditions are possible since these are updated individually and not
242   // together atomically, resulting in the values being mutually inconsistent.
243   // The damage is limited to selecting a wrong sample, which is not something
244   // that can cause accumulating or cascading effects.
245   // If there were no inconsistencies caused by race conditions, we never send a
246   // sample for the previous phase in the next phase's snapshot because
247   // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
248   base::subtle::NoBarrier_Store(&sample_probability_count_, 0);
249   base::subtle::NoBarrier_Store(&run_duration_max_, 0);
250   base::subtle::NoBarrier_Store(&queue_duration_max_, 0);
251 }
252 
SaturatingMemberAdd(const uint32_t addend,base::subtle::Atomic32 * sum)253 void DeathData::SaturatingMemberAdd(const uint32_t addend,
254                                     base::subtle::Atomic32* sum) {
255   // Bail quick if no work or already saturated.
256   if (addend == 0U || *sum == INT_MAX)
257     return;
258 
259   base::CheckedNumeric<int32_t> new_sum = *sum;
260   new_sum += addend;
261   base::subtle::NoBarrier_Store(sum, new_sum.ValueOrDefault(INT_MAX));
262 }
263 
264 //------------------------------------------------------------------------------
DeathDataSnapshot()265 DeathDataSnapshot::DeathDataSnapshot()
266     : count(-1),
267       run_duration_sum(-1),
268       run_duration_max(-1),
269       run_duration_sample(-1),
270       queue_duration_sum(-1),
271       queue_duration_max(-1),
272       queue_duration_sample(-1),
273       alloc_ops(-1),
274       free_ops(-1),
275       allocated_bytes(-1),
276       freed_bytes(-1),
277       alloc_overhead_bytes(-1),
278       max_allocated_bytes(-1) {}
279 
DeathDataSnapshot(int count,int32_t run_duration_sum,int32_t run_duration_max,int32_t run_duration_sample,int32_t queue_duration_sum,int32_t queue_duration_max,int32_t queue_duration_sample,int32_t alloc_ops,int32_t free_ops,int32_t allocated_bytes,int32_t freed_bytes,int32_t alloc_overhead_bytes,int32_t max_allocated_bytes)280 DeathDataSnapshot::DeathDataSnapshot(int count,
281                                      int32_t run_duration_sum,
282                                      int32_t run_duration_max,
283                                      int32_t run_duration_sample,
284                                      int32_t queue_duration_sum,
285                                      int32_t queue_duration_max,
286                                      int32_t queue_duration_sample,
287                                      int32_t alloc_ops,
288                                      int32_t free_ops,
289                                      int32_t allocated_bytes,
290                                      int32_t freed_bytes,
291                                      int32_t alloc_overhead_bytes,
292                                      int32_t max_allocated_bytes)
293     : count(count),
294       run_duration_sum(run_duration_sum),
295       run_duration_max(run_duration_max),
296       run_duration_sample(run_duration_sample),
297       queue_duration_sum(queue_duration_sum),
298       queue_duration_max(queue_duration_max),
299       queue_duration_sample(queue_duration_sample),
300       alloc_ops(alloc_ops),
301       free_ops(free_ops),
302       allocated_bytes(allocated_bytes),
303       freed_bytes(freed_bytes),
304       alloc_overhead_bytes(alloc_overhead_bytes),
305       max_allocated_bytes(max_allocated_bytes) {}
306 
DeathDataSnapshot(const DeathData & death_data)307 DeathDataSnapshot::DeathDataSnapshot(const DeathData& death_data)
308     : count(death_data.count()),
309       run_duration_sum(death_data.run_duration_sum()),
310       run_duration_max(death_data.run_duration_max()),
311       run_duration_sample(death_data.run_duration_sample()),
312       queue_duration_sum(death_data.queue_duration_sum()),
313       queue_duration_max(death_data.queue_duration_max()),
314       queue_duration_sample(death_data.queue_duration_sample()),
315       alloc_ops(death_data.alloc_ops()),
316       free_ops(death_data.free_ops()),
317       allocated_bytes(death_data.allocated_bytes()),
318       freed_bytes(death_data.freed_bytes()),
319       alloc_overhead_bytes(death_data.alloc_overhead_bytes()),
320       max_allocated_bytes(death_data.max_allocated_bytes()) {}
321 
322 DeathDataSnapshot::DeathDataSnapshot(const DeathDataSnapshot& death_data) =
323     default;
324 
~DeathDataSnapshot()325 DeathDataSnapshot::~DeathDataSnapshot() {
326 }
327 
Delta(const DeathDataSnapshot & older) const328 DeathDataSnapshot DeathDataSnapshot::Delta(
329     const DeathDataSnapshot& older) const {
330   return DeathDataSnapshot(
331       count - older.count, run_duration_sum - older.run_duration_sum,
332       run_duration_max, run_duration_sample,
333       queue_duration_sum - older.queue_duration_sum, queue_duration_max,
334       queue_duration_sample, alloc_ops - older.alloc_ops,
335       free_ops - older.free_ops, allocated_bytes - older.allocated_bytes,
336       freed_bytes - older.freed_bytes,
337       alloc_overhead_bytes - older.alloc_overhead_bytes, max_allocated_bytes);
338 }
339 
340 //------------------------------------------------------------------------------
BirthOnThread(const Location & location,const ThreadData & current)341 BirthOnThread::BirthOnThread(const Location& location,
342                              const ThreadData& current)
343     : location_(location),
344       birth_thread_(&current) {
345 }
346 
347 //------------------------------------------------------------------------------
BirthOnThreadSnapshot()348 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
349 }
350 
BirthOnThreadSnapshot(const BirthOnThread & birth)351 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
352     : location(birth.location()),
353       sanitized_thread_name(birth.birth_thread()->sanitized_thread_name()) {}
354 
~BirthOnThreadSnapshot()355 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
356 }
357 
358 //------------------------------------------------------------------------------
Births(const Location & location,const ThreadData & current)359 Births::Births(const Location& location, const ThreadData& current)
360     : BirthOnThread(location, current),
361       birth_count_(1) { }
362 
birth_count() const363 int Births::birth_count() const { return birth_count_; }
364 
RecordBirth()365 void Births::RecordBirth() { ++birth_count_; }
366 
367 //------------------------------------------------------------------------------
368 // ThreadData maintains the central data for all births and deaths on a single
369 // thread.
370 
371 // TODO(jar): We should pull all these static vars together, into a struct, and
372 // optimize layout so that we benefit from locality of reference during accesses
373 // to them.
374 
375 // static
376 ThreadData::NowFunction* ThreadData::now_function_for_testing_ = NULL;
377 
378 // A TLS slot which points to the ThreadData instance for the current thread.
379 // We do a fake initialization here (zeroing out data), and then the real
380 // in-place construction happens when we call tls_index_.Initialize().
381 // static
382 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
383 
384 // static
385 int ThreadData::cleanup_count_ = 0;
386 
387 // static
388 int ThreadData::incarnation_counter_ = 0;
389 
390 // static
391 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
392 
393 // static
394 ThreadData* ThreadData::first_retired_thread_data_ = NULL;
395 
396 // static
397 base::LazyInstance<base::Lock>::Leaky
398     ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
399 
400 // static
401 base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED;
402 
ThreadData(const std::string & sanitized_thread_name)403 ThreadData::ThreadData(const std::string& sanitized_thread_name)
404     : next_(NULL),
405       next_retired_thread_data_(NULL),
406       sanitized_thread_name_(sanitized_thread_name),
407       incarnation_count_for_pool_(-1),
408       current_stopwatch_(NULL) {
409   DCHECK(sanitized_thread_name_.empty() ||
410          !isdigit(sanitized_thread_name_.back()));
411   PushToHeadOfList();  // Which sets real incarnation_count_for_pool_.
412 }
413 
~ThreadData()414 ThreadData::~ThreadData() {
415 }
416 
PushToHeadOfList()417 void ThreadData::PushToHeadOfList() {
418   // Toss in a hint of randomness (atop the uniniitalized value).
419   (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
420                                                  sizeof(random_number_));
421   MSAN_UNPOISON(&random_number_, sizeof(random_number_));
422   random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0));
423   random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
424 
425   DCHECK(!next_);
426   base::AutoLock lock(*list_lock_.Pointer());
427   incarnation_count_for_pool_ = incarnation_counter_;
428   next_ = all_thread_data_list_head_;
429   all_thread_data_list_head_ = this;
430 }
431 
432 // static
first()433 ThreadData* ThreadData::first() {
434   base::AutoLock lock(*list_lock_.Pointer());
435   return all_thread_data_list_head_;
436 }
437 
next() const438 ThreadData* ThreadData::next() const { return next_; }
439 
440 // static
InitializeThreadContext(const std::string & thread_name)441 void ThreadData::InitializeThreadContext(const std::string& thread_name) {
442   if (base::WorkerPool::RunsTasksOnCurrentThread())
443     return;
444   DCHECK_NE(thread_name, kWorkerThreadSanitizedName);
445   EnsureTlsInitialization();
446   ThreadData* current_thread_data =
447       reinterpret_cast<ThreadData*>(tls_index_.Get());
448   if (current_thread_data)
449     return;  // Browser tests instigate this.
450   current_thread_data =
451       GetRetiredOrCreateThreadData(SanitizeThreadName(thread_name));
452   tls_index_.Set(current_thread_data);
453 }
454 
455 // static
Get()456 ThreadData* ThreadData::Get() {
457   if (!tls_index_.initialized())
458     return NULL;  // For unittests only.
459   ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
460   if (registered)
461     return registered;
462 
463   // We must be a worker thread, since we didn't pre-register.
464   ThreadData* worker_thread_data =
465       GetRetiredOrCreateThreadData(kWorkerThreadSanitizedName);
466   tls_index_.Set(worker_thread_data);
467   return worker_thread_data;
468 }
469 
470 // static
OnThreadTermination(void * thread_data)471 void ThreadData::OnThreadTermination(void* thread_data) {
472   DCHECK(thread_data);  // TLS should *never* call us with a NULL.
473   // We must NOT do any allocations during this callback.  There is a chance
474   // that the allocator is no longer active on this thread.
475   reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
476 }
477 
OnThreadTerminationCleanup()478 void ThreadData::OnThreadTerminationCleanup() {
479   // We must NOT do any allocations during this callback. There is a chance that
480   // the allocator is no longer active on this thread.
481 
482   // The list_lock_ was created when we registered the callback, so it won't be
483   // allocated here despite the lazy reference.
484   base::AutoLock lock(*list_lock_.Pointer());
485   if (incarnation_counter_ != incarnation_count_for_pool_)
486     return;  // ThreadData was constructed in an earlier unit test.
487   ++cleanup_count_;
488 
489   // Add this ThreadData to a retired list so that it can be reused by a thread
490   // with the same name sanitized name in the future.
491   // |next_retired_thread_data_| is expected to be nullptr for a ThreadData
492   // associated with an active thread.
493   DCHECK(!next_retired_thread_data_);
494   next_retired_thread_data_ = first_retired_thread_data_;
495   first_retired_thread_data_ = this;
496 }
497 
498 // static
Snapshot(int current_profiling_phase,ProcessDataSnapshot * process_data_snapshot)499 void ThreadData::Snapshot(int current_profiling_phase,
500                           ProcessDataSnapshot* process_data_snapshot) {
501   // Get an unchanging copy of a ThreadData list.
502   ThreadData* my_list = ThreadData::first();
503 
504   // Gather data serially.
505   // This hackish approach *can* get some slightly corrupt tallies, as we are
506   // grabbing values without the protection of a lock, but it has the advantage
507   // of working even with threads that don't have message loops.  If a user
508   // sees any strangeness, they can always just run their stats gathering a
509   // second time.
510   BirthCountMap birth_counts;
511   for (ThreadData* thread_data = my_list; thread_data;
512        thread_data = thread_data->next()) {
513     thread_data->SnapshotExecutedTasks(current_profiling_phase,
514                                        &process_data_snapshot->phased_snapshots,
515                                        &birth_counts);
516   }
517 
518   // Add births that are still active -- i.e. objects that have tallied a birth,
519   // but have not yet tallied a matching death, and hence must be either
520   // running, queued up, or being held in limbo for future posting.
521   auto* current_phase_tasks =
522       &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
523   for (const auto& birth_count : birth_counts) {
524     if (birth_count.second > 0) {
525       current_phase_tasks->push_back(
526           TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
527                        DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0,
528                                          0, 0, 0, 0, 0, 0),
529                        "Still_Alive"));
530     }
531   }
532 }
533 
534 // static
OnProfilingPhaseCompleted(int profiling_phase)535 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
536   // Get an unchanging copy of a ThreadData list.
537   ThreadData* my_list = ThreadData::first();
538 
539   // Add snapshots for all instances of death data in all threads serially.
540   // This hackish approach *can* get some slightly corrupt tallies, as we are
541   // grabbing values without the protection of a lock, but it has the advantage
542   // of working even with threads that don't have message loops.  Any corruption
543   // shouldn't cause "cascading damage" to anything else (in later phases).
544   for (ThreadData* thread_data = my_list; thread_data;
545        thread_data = thread_data->next()) {
546     thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
547   }
548 }
549 
TallyABirth(const Location & location)550 Births* ThreadData::TallyABirth(const Location& location) {
551   BirthMap::iterator it = birth_map_.find(location);
552   Births* child;
553   if (it != birth_map_.end()) {
554     child =  it->second;
555     child->RecordBirth();
556   } else {
557     child = new Births(location, *this);  // Leak this.
558     // Lock since the map may get relocated now, and other threads sometimes
559     // snapshot it (but they lock before copying it).
560     base::AutoLock lock(map_lock_);
561     birth_map_[location] = child;
562   }
563 
564   return child;
565 }
566 
TallyADeath(const Births & births,int32_t queue_duration,const TaskStopwatch & stopwatch)567 void ThreadData::TallyADeath(const Births& births,
568                              int32_t queue_duration,
569                              const TaskStopwatch& stopwatch) {
570   int32_t run_duration = stopwatch.RunDurationMs();
571 
572   // Stir in some randomness, plus add constant in case durations are zero.
573   const uint32_t kSomePrimeNumber = 2147483647;
574   random_number_ += queue_duration + run_duration + kSomePrimeNumber;
575   // An address is going to have some randomness to it as well ;-).
576   random_number_ ^=
577       static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0));
578 
579   DeathMap::iterator it = death_map_.find(&births);
580   DeathData* death_data;
581   if (it != death_map_.end()) {
582     death_data = &it->second;
583   } else {
584     base::AutoLock lock(map_lock_);  // Lock as the map may get relocated now.
585     death_data = &death_map_[&births];
586   }  // Release lock ASAP.
587   death_data->RecordDurations(queue_duration, run_duration, random_number_);
588 
589 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
590   if (stopwatch.heap_tracking_enabled()) {
591     base::debug::ThreadHeapUsage heap_usage = stopwatch.heap_usage().usage();
592     // Saturate the 64 bit counts on conversion to 32 bit storage.
593     death_data->RecordAllocations(
594         base::saturated_cast<int32_t>(heap_usage.alloc_ops),
595         base::saturated_cast<int32_t>(heap_usage.free_ops),
596         base::saturated_cast<int32_t>(heap_usage.alloc_bytes),
597         base::saturated_cast<int32_t>(heap_usage.free_bytes),
598         base::saturated_cast<int32_t>(heap_usage.alloc_overhead_bytes),
599         base::saturated_cast<int32_t>(heap_usage.max_allocated_bytes));
600   }
601 #endif
602 }
603 
604 // static
TallyABirthIfActive(const Location & location)605 Births* ThreadData::TallyABirthIfActive(const Location& location) {
606   if (!TrackingStatus())
607     return NULL;
608   ThreadData* current_thread_data = Get();
609   if (!current_thread_data)
610     return NULL;
611   return current_thread_data->TallyABirth(location);
612 }
613 
614 // static
TallyRunOnNamedThreadIfTracking(const base::TrackingInfo & completed_task,const TaskStopwatch & stopwatch)615 void ThreadData::TallyRunOnNamedThreadIfTracking(
616     const base::TrackingInfo& completed_task,
617     const TaskStopwatch& stopwatch) {
618   // Even if we have been DEACTIVATED, we will process any pending births so
619   // that our data structures (which counted the outstanding births) remain
620   // consistent.
621   const Births* births = completed_task.birth_tally;
622   if (!births)
623     return;
624   ThreadData* current_thread_data = stopwatch.GetThreadData();
625   if (!current_thread_data)
626     return;
627 
628   // Watch out for a race where status_ is changing, and hence one or both
629   // of start_of_run or end_of_run is zero.  In that case, we didn't bother to
630   // get a time value since we "weren't tracking" and we were trying to be
631   // efficient by not calling for a genuine time value.  For simplicity, we'll
632   // use a default zero duration when we can't calculate a true value.
633   TrackedTime start_of_run = stopwatch.StartTime();
634   int32_t queue_duration = 0;
635   if (!start_of_run.is_null()) {
636     queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
637         .InMilliseconds();
638   }
639   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
640 }
641 
642 // static
TallyRunOnWorkerThreadIfTracking(const Births * births,const TrackedTime & time_posted,const TaskStopwatch & stopwatch)643 void ThreadData::TallyRunOnWorkerThreadIfTracking(
644     const Births* births,
645     const TrackedTime& time_posted,
646     const TaskStopwatch& stopwatch) {
647   // Even if we have been DEACTIVATED, we will process any pending births so
648   // that our data structures (which counted the outstanding births) remain
649   // consistent.
650   if (!births)
651     return;
652 
653   // TODO(jar): Support the option to coalesce all worker-thread activity under
654   // one ThreadData instance that uses locks to protect *all* access.  This will
655   // reduce memory (making it provably bounded), but run incrementally slower
656   // (since we'll use locks on TallyABirth and TallyADeath).  The good news is
657   // that the locks on TallyADeath will be *after* the worker thread has run,
658   // and hence nothing will be waiting for the completion (...  besides some
659   // other thread that might like to run).  Also, the worker threads tasks are
660   // generally longer, and hence the cost of the lock may perchance be amortized
661   // over the long task's lifetime.
662   ThreadData* current_thread_data = stopwatch.GetThreadData();
663   if (!current_thread_data)
664     return;
665 
666   TrackedTime start_of_run = stopwatch.StartTime();
667   int32_t queue_duration = 0;
668   if (!start_of_run.is_null()) {
669     queue_duration = (start_of_run - time_posted).InMilliseconds();
670   }
671   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
672 }
673 
674 // static
TallyRunInAScopedRegionIfTracking(const Births * births,const TaskStopwatch & stopwatch)675 void ThreadData::TallyRunInAScopedRegionIfTracking(
676     const Births* births,
677     const TaskStopwatch& stopwatch) {
678   // Even if we have been DEACTIVATED, we will process any pending births so
679   // that our data structures (which counted the outstanding births) remain
680   // consistent.
681   if (!births)
682     return;
683 
684   ThreadData* current_thread_data = stopwatch.GetThreadData();
685   if (!current_thread_data)
686     return;
687 
688   int32_t queue_duration = 0;
689   current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
690 }
691 
SnapshotExecutedTasks(int current_profiling_phase,PhasedProcessDataSnapshotMap * phased_snapshots,BirthCountMap * birth_counts)692 void ThreadData::SnapshotExecutedTasks(
693     int current_profiling_phase,
694     PhasedProcessDataSnapshotMap* phased_snapshots,
695     BirthCountMap* birth_counts) {
696   // Get copy of data, so that the data will not change during the iterations
697   // and processing.
698   BirthMap birth_map;
699   DeathsSnapshot deaths;
700   SnapshotMaps(current_profiling_phase, &birth_map, &deaths);
701 
702   for (const auto& birth : birth_map) {
703     (*birth_counts)[birth.second] += birth.second->birth_count();
704   }
705 
706   for (const auto& death : deaths) {
707     (*birth_counts)[death.first] -= death.first->birth_count();
708 
709     // For the current death data, walk through all its snapshots, starting from
710     // the current one, then from the previous profiling phase etc., and for
711     // each snapshot calculate the delta between the snapshot and the previous
712     // phase, if any.  Store the deltas in the result.
713     for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
714          phase = phase->prev) {
715       const DeathDataSnapshot& death_data =
716           phase->prev ? phase->death_data.Delta(phase->prev->death_data)
717                       : phase->death_data;
718 
719       if (death_data.count > 0) {
720         (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
721             TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
722                          sanitized_thread_name()));
723       }
724     }
725   }
726 }
727 
728 // This may be called from another thread.
SnapshotMaps(int profiling_phase,BirthMap * birth_map,DeathsSnapshot * deaths)729 void ThreadData::SnapshotMaps(int profiling_phase,
730                               BirthMap* birth_map,
731                               DeathsSnapshot* deaths) {
732   base::AutoLock lock(map_lock_);
733 
734   for (const auto& birth : birth_map_)
735     (*birth_map)[birth.first] = birth.second;
736 
737   for (const auto& death : death_map_) {
738     deaths->push_back(std::make_pair(
739         death.first,
740         DeathDataPhaseSnapshot(profiling_phase, death.second,
741                                death.second.last_phase_snapshot())));
742   }
743 }
744 
OnProfilingPhaseCompletedOnThread(int profiling_phase)745 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
746   base::AutoLock lock(map_lock_);
747 
748   for (auto& death : death_map_) {
749     death.second.OnProfilingPhaseCompleted(profiling_phase);
750   }
751 }
752 
EnsureTlsInitialization()753 void ThreadData::EnsureTlsInitialization() {
754   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
755     return;  // Someone else did the initialization.
756   // Due to racy lazy initialization in tests, we'll need to recheck status_
757   // after we acquire the lock.
758 
759   // Ensure that we don't double initialize tls.  We are called when single
760   // threaded in the product, but some tests may be racy and lazy about our
761   // initialization.
762   base::AutoLock lock(*list_lock_.Pointer());
763   if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
764     return;  // Someone raced in here and beat us.
765 
766   // Perform the "real" TLS initialization now, and leave it intact through
767   // process termination.
768   if (!tls_index_.initialized()) {  // Testing may have initialized this.
769     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED);
770     tls_index_.Initialize(&ThreadData::OnThreadTermination);
771     DCHECK(tls_index_.initialized());
772   } else {
773     // TLS was initialzed for us earlier.
774     DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS);
775   }
776 
777   // Incarnation counter is only significant to testing, as it otherwise will
778   // never again change in this process.
779   ++incarnation_counter_;
780 
781   // The lock is not critical for setting status_, but it doesn't hurt.  It also
782   // ensures that if we have a racy initialization, that we'll bail as soon as
783   // we get the lock earlier in this method.
784   base::subtle::Release_Store(&status_, kInitialStartupState);
785   DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED);
786 }
787 
788 // static
InitializeAndSetTrackingStatus(Status status)789 void ThreadData::InitializeAndSetTrackingStatus(Status status) {
790   DCHECK_GE(status, DEACTIVATED);
791   DCHECK_LE(status, PROFILING_ACTIVE);
792 
793   EnsureTlsInitialization();  // No-op if already initialized.
794 
795   if (status > DEACTIVATED)
796     status = PROFILING_ACTIVE;
797 
798   base::subtle::Release_Store(&status_, status);
799 }
800 
801 // static
status()802 ThreadData::Status ThreadData::status() {
803   return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_));
804 }
805 
806 // static
TrackingStatus()807 bool ThreadData::TrackingStatus() {
808   return base::subtle::Acquire_Load(&status_) > DEACTIVATED;
809 }
810 
811 // static
EnableProfilerTiming()812 void ThreadData::EnableProfilerTiming() {
813   base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
814 }
815 
816 // static
Now()817 TrackedTime ThreadData::Now() {
818   if (now_function_for_testing_)
819     return TrackedTime::FromMilliseconds((*now_function_for_testing_)());
820   if (IsProfilerTimingEnabled() && TrackingStatus())
821     return TrackedTime::Now();
822   return TrackedTime();  // Super fast when disabled, or not compiled.
823 }
824 
825 // static
EnsureCleanupWasCalled(int major_threads_shutdown_count)826 void ThreadData::EnsureCleanupWasCalled(int major_threads_shutdown_count) {
827   base::AutoLock lock(*list_lock_.Pointer());
828 
829   // TODO(jar): until this is working on XP, don't run the real test.
830 #if 0
831   // Verify that we've at least shutdown/cleanup the major namesd threads.  The
832   // caller should tell us how many thread shutdowns should have taken place by
833   // now.
834   CHECK_GT(cleanup_count_, major_threads_shutdown_count);
835 #endif
836 }
837 
838 // static
ShutdownSingleThreadedCleanup(bool leak)839 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
840   // This is only called from test code, where we need to cleanup so that
841   // additional tests can be run.
842   // We must be single threaded... but be careful anyway.
843   InitializeAndSetTrackingStatus(DEACTIVATED);
844 
845   ThreadData* thread_data_list;
846   {
847     base::AutoLock lock(*list_lock_.Pointer());
848     thread_data_list = all_thread_data_list_head_;
849     all_thread_data_list_head_ = NULL;
850     ++incarnation_counter_;
851     // To be clean, break apart the retired worker list (though we leak them).
852     while (first_retired_thread_data_) {
853       ThreadData* thread_data = first_retired_thread_data_;
854       first_retired_thread_data_ = thread_data->next_retired_thread_data_;
855       thread_data->next_retired_thread_data_ = nullptr;
856     }
857   }
858 
859   // Put most global static back in pristine shape.
860   cleanup_count_ = 0;
861   tls_index_.Set(NULL);
862   // Almost UNINITIALIZED.
863   base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS);
864 
865   // To avoid any chance of racing in unit tests, which is the only place we
866   // call this function, we may sometimes leak all the data structures we
867   // recovered, as they may still be in use on threads from prior tests!
868   if (leak) {
869     ThreadData* thread_data = thread_data_list;
870     while (thread_data) {
871       ANNOTATE_LEAKING_OBJECT_PTR(thread_data);
872       thread_data = thread_data->next();
873     }
874     return;
875   }
876 
877   // When we want to cleanup (on a single thread), here is what we do.
878 
879   // Do actual recursive delete in all ThreadData instances.
880   while (thread_data_list) {
881     ThreadData* next_thread_data = thread_data_list;
882     thread_data_list = thread_data_list->next();
883 
884     for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
885          next_thread_data->birth_map_.end() != it; ++it)
886       delete it->second;  // Delete the Birth Records.
887     delete next_thread_data;  // Includes all Death Records.
888   }
889 }
890 
891 // static
GetRetiredOrCreateThreadData(const std::string & sanitized_thread_name)892 ThreadData* ThreadData::GetRetiredOrCreateThreadData(
893     const std::string& sanitized_thread_name) {
894   SCOPED_UMA_HISTOGRAM_TIMER("TrackedObjects.GetRetiredOrCreateThreadData");
895 
896   {
897     base::AutoLock lock(*list_lock_.Pointer());
898     ThreadData** pcursor = &first_retired_thread_data_;
899     ThreadData* cursor = first_retired_thread_data_;
900 
901     // Assuming that there aren't more than a few tens of retired ThreadData
902     // instances, this lookup should be quick compared to the thread creation
903     // time. Retired ThreadData instances cannot be stored in a map because
904     // insertions are done from OnThreadTerminationCleanup() where allocations
905     // are not allowed.
906     //
907     // Note: Test processes may have more than a few tens of retired ThreadData
908     // instances.
909     while (cursor) {
910       if (cursor->sanitized_thread_name() == sanitized_thread_name) {
911         DCHECK_EQ(*pcursor, cursor);
912         *pcursor = cursor->next_retired_thread_data_;
913         cursor->next_retired_thread_data_ = nullptr;
914         return cursor;
915       }
916       pcursor = &cursor->next_retired_thread_data_;
917       cursor = cursor->next_retired_thread_data_;
918     }
919   }
920 
921   return new ThreadData(sanitized_thread_name);
922 }
923 
924 //------------------------------------------------------------------------------
TaskStopwatch()925 TaskStopwatch::TaskStopwatch()
926     : wallclock_duration_ms_(0),
927       current_thread_data_(NULL),
928       excluded_duration_ms_(0),
929       parent_(NULL) {
930 #if DCHECK_IS_ON()
931   state_ = CREATED;
932   child_ = NULL;
933 #endif
934 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
935   heap_tracking_enabled_ =
936       base::debug::ThreadHeapUsageTracker::IsHeapTrackingEnabled();
937 #endif
938 }
939 
~TaskStopwatch()940 TaskStopwatch::~TaskStopwatch() {
941 #if DCHECK_IS_ON()
942   DCHECK(state_ != RUNNING);
943   DCHECK(child_ == NULL);
944 #endif
945 }
946 
Start()947 void TaskStopwatch::Start() {
948 #if DCHECK_IS_ON()
949   DCHECK(state_ == CREATED);
950   state_ = RUNNING;
951 #endif
952 
953   start_time_ = ThreadData::Now();
954 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
955   if (heap_tracking_enabled_)
956     heap_usage_.Start();
957 #endif
958 
959   current_thread_data_ = ThreadData::Get();
960   if (!current_thread_data_)
961     return;
962 
963   parent_ = current_thread_data_->current_stopwatch_;
964 #if DCHECK_IS_ON()
965   if (parent_) {
966     DCHECK(parent_->state_ == RUNNING);
967     DCHECK(parent_->child_ == NULL);
968     parent_->child_ = this;
969   }
970 #endif
971   current_thread_data_->current_stopwatch_ = this;
972 }
973 
Stop()974 void TaskStopwatch::Stop() {
975   const TrackedTime end_time = ThreadData::Now();
976 #if DCHECK_IS_ON()
977   DCHECK(state_ == RUNNING);
978   state_ = STOPPED;
979   DCHECK(child_ == NULL);
980 #endif
981 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
982   if (heap_tracking_enabled_)
983     heap_usage_.Stop(true);
984 #endif
985 
986   if (!start_time_.is_null() && !end_time.is_null()) {
987     wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
988   }
989 
990   if (!current_thread_data_)
991     return;
992 
993   DCHECK(current_thread_data_->current_stopwatch_ == this);
994   current_thread_data_->current_stopwatch_ = parent_;
995   if (!parent_)
996     return;
997 
998 #if DCHECK_IS_ON()
999   DCHECK(parent_->state_ == RUNNING);
1000   DCHECK(parent_->child_ == this);
1001   parent_->child_ = NULL;
1002 #endif
1003   parent_->excluded_duration_ms_ += wallclock_duration_ms_;
1004   parent_ = NULL;
1005 }
1006 
StartTime() const1007 TrackedTime TaskStopwatch::StartTime() const {
1008 #if DCHECK_IS_ON()
1009   DCHECK(state_ != CREATED);
1010 #endif
1011 
1012   return start_time_;
1013 }
1014 
RunDurationMs() const1015 int32_t TaskStopwatch::RunDurationMs() const {
1016 #if DCHECK_IS_ON()
1017   DCHECK(state_ == STOPPED);
1018 #endif
1019 
1020   return wallclock_duration_ms_ - excluded_duration_ms_;
1021 }
1022 
GetThreadData() const1023 ThreadData* TaskStopwatch::GetThreadData() const {
1024 #if DCHECK_IS_ON()
1025   DCHECK(state_ != CREATED);
1026 #endif
1027 
1028   return current_thread_data_;
1029 }
1030 
1031 //------------------------------------------------------------------------------
1032 // DeathDataPhaseSnapshot
1033 
DeathDataPhaseSnapshot(int profiling_phase,const DeathData & death,const DeathDataPhaseSnapshot * prev)1034 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
1035     int profiling_phase,
1036     const DeathData& death,
1037     const DeathDataPhaseSnapshot* prev)
1038     : profiling_phase(profiling_phase), death_data(death), prev(prev) {}
1039 
1040 //------------------------------------------------------------------------------
1041 // TaskSnapshot
1042 
TaskSnapshot()1043 TaskSnapshot::TaskSnapshot() {
1044 }
1045 
TaskSnapshot(const BirthOnThreadSnapshot & birth,const DeathDataSnapshot & death_data,const std::string & death_sanitized_thread_name)1046 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
1047                            const DeathDataSnapshot& death_data,
1048                            const std::string& death_sanitized_thread_name)
1049     : birth(birth),
1050       death_data(death_data),
1051       death_sanitized_thread_name(death_sanitized_thread_name) {}
1052 
~TaskSnapshot()1053 TaskSnapshot::~TaskSnapshot() {
1054 }
1055 
1056 //------------------------------------------------------------------------------
1057 // ProcessDataPhaseSnapshot
1058 
ProcessDataPhaseSnapshot()1059 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
1060 }
1061 
1062 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot(
1063     const ProcessDataPhaseSnapshot& other) = default;
1064 
~ProcessDataPhaseSnapshot()1065 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
1066 }
1067 
1068 //------------------------------------------------------------------------------
1069 // ProcessDataPhaseSnapshot
1070 
ProcessDataSnapshot()1071 ProcessDataSnapshot::ProcessDataSnapshot()
1072 #if !defined(OS_NACL)
1073     : process_id(base::GetCurrentProcId()) {
1074 #else
1075     : process_id(base::kNullProcessId) {
1076 #endif
1077 }
1078 
1079 ProcessDataSnapshot::ProcessDataSnapshot(const ProcessDataSnapshot& other) =
1080     default;
1081 
1082 ProcessDataSnapshot::~ProcessDataSnapshot() {
1083 }
1084 
1085 }  // namespace tracked_objects
1086