1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/tracked_objects.h"
6
7 #include <ctype.h>
8 #include <limits.h>
9 #include <stdlib.h>
10
11 #include "base/atomicops.h"
12 #include "base/base_switches.h"
13 #include "base/command_line.h"
14 #include "base/compiler_specific.h"
15 #include "base/debug/leak_annotations.h"
16 #include "base/logging.h"
17 #include "base/metrics/histogram_macros.h"
18 #include "base/numerics/safe_conversions.h"
19 #include "base/numerics/safe_math.h"
20 #include "base/process/process_handle.h"
21 #include "base/third_party/valgrind/memcheck.h"
22 #include "base/threading/worker_pool.h"
23 #include "base/tracking_info.h"
24 #include "build/build_config.h"
25
26 using base::TimeDelta;
27
28 namespace base {
29 class TimeDelta;
30 }
31
32 namespace tracked_objects {
33
34 namespace {
35
36 constexpr char kWorkerThreadSanitizedName[] = "WorkerThread-*";
37
38 // When ThreadData is first initialized, should we start in an ACTIVE state to
39 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
40 // that we only record after parsing the command line flag --enable-tracking.
41 // Note that the flag may force either state, so this really controls only the
42 // period of time up until that flag is parsed. If there is no flag seen, then
43 // this state may prevail for much or all of the process lifetime.
44 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;
45
46 // Possible states of the profiler timing enabledness.
47 enum {
48 UNDEFINED_TIMING,
49 ENABLED_TIMING,
50 DISABLED_TIMING,
51 };
52
53 // State of the profiler timing enabledness.
54 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
55
56 // Returns whether profiler timing is enabled. The default is true, but this
57 // may be overridden by a command-line flag. Some platforms may
58 // programmatically set this command-line flag to the "off" value if it's not
59 // specified.
60 // This in turn can be overridden by explicitly calling
61 // ThreadData::EnableProfilerTiming, say, based on a field trial.
IsProfilerTimingEnabled()62 inline bool IsProfilerTimingEnabled() {
63 // Reading |g_profiler_timing_enabled| is done without barrier because
64 // multiple initialization is not an issue while the barrier can be relatively
65 // costly given that this method is sometimes called in a tight loop.
66 base::subtle::Atomic32 current_timing_enabled =
67 base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
68 if (current_timing_enabled == UNDEFINED_TIMING) {
69 if (!base::CommandLine::InitializedForCurrentProcess())
70 return true;
71 current_timing_enabled =
72 (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
73 switches::kProfilerTiming) ==
74 switches::kProfilerTimingDisabledValue)
75 ? DISABLED_TIMING
76 : ENABLED_TIMING;
77 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
78 current_timing_enabled);
79 }
80 return current_timing_enabled == ENABLED_TIMING;
81 }
82
83 // Sanitize a thread name by replacing trailing sequence of digits with "*".
84 // Examples:
85 // 1. "BrowserBlockingWorker1/23857" => "BrowserBlockingWorker1/*"
86 // 2. "Chrome_IOThread" => "Chrome_IOThread"
SanitizeThreadName(const std::string & thread_name)87 std::string SanitizeThreadName(const std::string& thread_name) {
88 size_t i = thread_name.length();
89
90 while (i > 0 && isdigit(thread_name[i - 1]))
91 --i;
92
93 if (i == thread_name.length())
94 return thread_name;
95
96 return thread_name.substr(0, i) + '*';
97 }
98
99 } // namespace
100
101 //------------------------------------------------------------------------------
102 // DeathData tallies durations when a death takes place.
103
DeathData()104 DeathData::DeathData()
105 : count_(0),
106 sample_probability_count_(0),
107 run_duration_sum_(0),
108 queue_duration_sum_(0),
109 run_duration_max_(0),
110 queue_duration_max_(0),
111 alloc_ops_(0),
112 free_ops_(0),
113 allocated_bytes_(0),
114 freed_bytes_(0),
115 alloc_overhead_bytes_(0),
116 max_allocated_bytes_(0),
117 run_duration_sample_(0),
118 queue_duration_sample_(0),
119 last_phase_snapshot_(nullptr) {}
120
DeathData(const DeathData & other)121 DeathData::DeathData(const DeathData& other)
122 : count_(other.count_),
123 sample_probability_count_(other.sample_probability_count_),
124 run_duration_sum_(other.run_duration_sum_),
125 queue_duration_sum_(other.queue_duration_sum_),
126 run_duration_max_(other.run_duration_max_),
127 queue_duration_max_(other.queue_duration_max_),
128 alloc_ops_(other.alloc_ops_),
129 free_ops_(other.free_ops_),
130 allocated_bytes_(other.allocated_bytes_),
131 freed_bytes_(other.freed_bytes_),
132 alloc_overhead_bytes_(other.alloc_overhead_bytes_),
133 max_allocated_bytes_(other.max_allocated_bytes_),
134 run_duration_sample_(other.run_duration_sample_),
135 queue_duration_sample_(other.queue_duration_sample_),
136 last_phase_snapshot_(nullptr) {
137 // This constructor will be used by std::map when adding new DeathData values
138 // to the map. At that point, last_phase_snapshot_ is still NULL, so we don't
139 // need to worry about ownership transfer.
140 DCHECK(other.last_phase_snapshot_ == nullptr);
141 }
142
~DeathData()143 DeathData::~DeathData() {
144 while (last_phase_snapshot_) {
145 const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
146 last_phase_snapshot_ = snapshot->prev;
147 delete snapshot;
148 }
149 }
150
151 // TODO(jar): I need to see if this macro to optimize branching is worth using.
152 //
153 // This macro has no branching, so it is surely fast, and is equivalent to:
154 // if (assign_it)
155 // target = source;
156 // We use a macro rather than a template to force this to inline.
157 // Related code for calculating max is discussed on the web.
158 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
159 ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it))
160
RecordDurations(const int32_t queue_duration,const int32_t run_duration,const uint32_t random_number)161 void DeathData::RecordDurations(const int32_t queue_duration,
162 const int32_t run_duration,
163 const uint32_t random_number) {
164 // We'll just clamp at INT_MAX, but we should note this in the UI as such.
165 if (count_ < INT_MAX)
166 base::subtle::NoBarrier_Store(&count_, count_ + 1);
167
168 int sample_probability_count =
169 base::subtle::NoBarrier_Load(&sample_probability_count_);
170 if (sample_probability_count < INT_MAX)
171 ++sample_probability_count;
172 base::subtle::NoBarrier_Store(&sample_probability_count_,
173 sample_probability_count);
174
175 base::subtle::NoBarrier_Store(&queue_duration_sum_,
176 queue_duration_sum_ + queue_duration);
177 base::subtle::NoBarrier_Store(&run_duration_sum_,
178 run_duration_sum_ + run_duration);
179
180 if (queue_duration_max() < queue_duration)
181 base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration);
182 if (run_duration_max() < run_duration)
183 base::subtle::NoBarrier_Store(&run_duration_max_, run_duration);
184
185 // Take a uniformly distributed sample over all durations ever supplied during
186 // the current profiling phase.
187 // The probability that we (instead) use this new sample is
188 // 1/sample_probability_count_. This results in a completely uniform selection
189 // of the sample (at least when we don't clamp sample_probability_count_...
190 // but that should be inconsequentially likely). We ignore the fact that we
191 // correlated our selection of a sample to the run and queue times (i.e., we
192 // used them to generate random_number).
193 CHECK_GT(sample_probability_count, 0);
194 if (0 == (random_number % sample_probability_count)) {
195 base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration);
196 base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration);
197 }
198 }
199
RecordAllocations(const uint32_t alloc_ops,const uint32_t free_ops,const uint32_t allocated_bytes,const uint32_t freed_bytes,const uint32_t alloc_overhead_bytes,const uint32_t max_allocated_bytes)200 void DeathData::RecordAllocations(const uint32_t alloc_ops,
201 const uint32_t free_ops,
202 const uint32_t allocated_bytes,
203 const uint32_t freed_bytes,
204 const uint32_t alloc_overhead_bytes,
205 const uint32_t max_allocated_bytes) {
206 // Use saturating arithmetic.
207 SaturatingMemberAdd(alloc_ops, &alloc_ops_);
208 SaturatingMemberAdd(free_ops, &free_ops_);
209 SaturatingMemberAdd(allocated_bytes, &allocated_bytes_);
210 SaturatingMemberAdd(freed_bytes, &freed_bytes_);
211 SaturatingMemberAdd(alloc_overhead_bytes, &alloc_overhead_bytes_);
212
213 int32_t max = base::saturated_cast<int32_t>(max_allocated_bytes);
214 if (max > max_allocated_bytes_)
215 base::subtle::NoBarrier_Store(&max_allocated_bytes_, max);
216 }
217
OnProfilingPhaseCompleted(int profiling_phase)218 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
219 // Snapshotting and storing current state.
220 last_phase_snapshot_ =
221 new DeathDataPhaseSnapshot(profiling_phase, *this, last_phase_snapshot_);
222
223 // Not touching fields for which a delta can be computed by comparing with a
224 // snapshot from the previous phase. Resetting other fields. Sample values
225 // will be reset upon next death recording because sample_probability_count_
226 // is set to 0.
227 // We avoid resetting to 0 in favor of deltas whenever possible. The reason
228 // is that for incrementable fields, resetting to 0 from the snapshot thread
229 // potentially in parallel with incrementing in the death thread may result in
230 // significant data corruption that has a potential to grow with time. Not
231 // resetting incrementable fields and using deltas will cause any
232 // off-by-little corruptions to be likely fixed at the next snapshot.
233 // The max values are not incrementable, and cannot be deduced using deltas
234 // for a given phase. Hence, we have to reset them to 0. But the potential
235 // damage is limited to getting the previous phase's max to apply for the next
236 // phase, and the error doesn't have a potential to keep growing with new
237 // resets.
238 // sample_probability_count_ is incrementable, but must be reset to 0 at the
239 // phase end, so that we start a new uniformly randomized sample selection
240 // after the reset. These fields are updated using atomics. However, race
241 // conditions are possible since these are updated individually and not
242 // together atomically, resulting in the values being mutually inconsistent.
243 // The damage is limited to selecting a wrong sample, which is not something
244 // that can cause accumulating or cascading effects.
245 // If there were no inconsistencies caused by race conditions, we never send a
246 // sample for the previous phase in the next phase's snapshot because
247 // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
248 base::subtle::NoBarrier_Store(&sample_probability_count_, 0);
249 base::subtle::NoBarrier_Store(&run_duration_max_, 0);
250 base::subtle::NoBarrier_Store(&queue_duration_max_, 0);
251 }
252
SaturatingMemberAdd(const uint32_t addend,base::subtle::Atomic32 * sum)253 void DeathData::SaturatingMemberAdd(const uint32_t addend,
254 base::subtle::Atomic32* sum) {
255 // Bail quick if no work or already saturated.
256 if (addend == 0U || *sum == INT_MAX)
257 return;
258
259 base::CheckedNumeric<int32_t> new_sum = *sum;
260 new_sum += addend;
261 base::subtle::NoBarrier_Store(sum, new_sum.ValueOrDefault(INT_MAX));
262 }
263
264 //------------------------------------------------------------------------------
DeathDataSnapshot()265 DeathDataSnapshot::DeathDataSnapshot()
266 : count(-1),
267 run_duration_sum(-1),
268 run_duration_max(-1),
269 run_duration_sample(-1),
270 queue_duration_sum(-1),
271 queue_duration_max(-1),
272 queue_duration_sample(-1),
273 alloc_ops(-1),
274 free_ops(-1),
275 allocated_bytes(-1),
276 freed_bytes(-1),
277 alloc_overhead_bytes(-1),
278 max_allocated_bytes(-1) {}
279
DeathDataSnapshot(int count,int32_t run_duration_sum,int32_t run_duration_max,int32_t run_duration_sample,int32_t queue_duration_sum,int32_t queue_duration_max,int32_t queue_duration_sample,int32_t alloc_ops,int32_t free_ops,int32_t allocated_bytes,int32_t freed_bytes,int32_t alloc_overhead_bytes,int32_t max_allocated_bytes)280 DeathDataSnapshot::DeathDataSnapshot(int count,
281 int32_t run_duration_sum,
282 int32_t run_duration_max,
283 int32_t run_duration_sample,
284 int32_t queue_duration_sum,
285 int32_t queue_duration_max,
286 int32_t queue_duration_sample,
287 int32_t alloc_ops,
288 int32_t free_ops,
289 int32_t allocated_bytes,
290 int32_t freed_bytes,
291 int32_t alloc_overhead_bytes,
292 int32_t max_allocated_bytes)
293 : count(count),
294 run_duration_sum(run_duration_sum),
295 run_duration_max(run_duration_max),
296 run_duration_sample(run_duration_sample),
297 queue_duration_sum(queue_duration_sum),
298 queue_duration_max(queue_duration_max),
299 queue_duration_sample(queue_duration_sample),
300 alloc_ops(alloc_ops),
301 free_ops(free_ops),
302 allocated_bytes(allocated_bytes),
303 freed_bytes(freed_bytes),
304 alloc_overhead_bytes(alloc_overhead_bytes),
305 max_allocated_bytes(max_allocated_bytes) {}
306
DeathDataSnapshot(const DeathData & death_data)307 DeathDataSnapshot::DeathDataSnapshot(const DeathData& death_data)
308 : count(death_data.count()),
309 run_duration_sum(death_data.run_duration_sum()),
310 run_duration_max(death_data.run_duration_max()),
311 run_duration_sample(death_data.run_duration_sample()),
312 queue_duration_sum(death_data.queue_duration_sum()),
313 queue_duration_max(death_data.queue_duration_max()),
314 queue_duration_sample(death_data.queue_duration_sample()),
315 alloc_ops(death_data.alloc_ops()),
316 free_ops(death_data.free_ops()),
317 allocated_bytes(death_data.allocated_bytes()),
318 freed_bytes(death_data.freed_bytes()),
319 alloc_overhead_bytes(death_data.alloc_overhead_bytes()),
320 max_allocated_bytes(death_data.max_allocated_bytes()) {}
321
322 DeathDataSnapshot::DeathDataSnapshot(const DeathDataSnapshot& death_data) =
323 default;
324
~DeathDataSnapshot()325 DeathDataSnapshot::~DeathDataSnapshot() {
326 }
327
Delta(const DeathDataSnapshot & older) const328 DeathDataSnapshot DeathDataSnapshot::Delta(
329 const DeathDataSnapshot& older) const {
330 return DeathDataSnapshot(
331 count - older.count, run_duration_sum - older.run_duration_sum,
332 run_duration_max, run_duration_sample,
333 queue_duration_sum - older.queue_duration_sum, queue_duration_max,
334 queue_duration_sample, alloc_ops - older.alloc_ops,
335 free_ops - older.free_ops, allocated_bytes - older.allocated_bytes,
336 freed_bytes - older.freed_bytes,
337 alloc_overhead_bytes - older.alloc_overhead_bytes, max_allocated_bytes);
338 }
339
340 //------------------------------------------------------------------------------
BirthOnThread(const Location & location,const ThreadData & current)341 BirthOnThread::BirthOnThread(const Location& location,
342 const ThreadData& current)
343 : location_(location),
344 birth_thread_(¤t) {
345 }
346
347 //------------------------------------------------------------------------------
BirthOnThreadSnapshot()348 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
349 }
350
BirthOnThreadSnapshot(const BirthOnThread & birth)351 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
352 : location(birth.location()),
353 sanitized_thread_name(birth.birth_thread()->sanitized_thread_name()) {}
354
~BirthOnThreadSnapshot()355 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
356 }
357
358 //------------------------------------------------------------------------------
Births(const Location & location,const ThreadData & current)359 Births::Births(const Location& location, const ThreadData& current)
360 : BirthOnThread(location, current),
361 birth_count_(1) { }
362
birth_count() const363 int Births::birth_count() const { return birth_count_; }
364
RecordBirth()365 void Births::RecordBirth() { ++birth_count_; }
366
367 //------------------------------------------------------------------------------
368 // ThreadData maintains the central data for all births and deaths on a single
369 // thread.
370
371 // TODO(jar): We should pull all these static vars together, into a struct, and
372 // optimize layout so that we benefit from locality of reference during accesses
373 // to them.
374
375 // static
376 ThreadData::NowFunction* ThreadData::now_function_for_testing_ = NULL;
377
378 // A TLS slot which points to the ThreadData instance for the current thread.
379 // We do a fake initialization here (zeroing out data), and then the real
380 // in-place construction happens when we call tls_index_.Initialize().
381 // static
382 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
383
384 // static
385 int ThreadData::cleanup_count_ = 0;
386
387 // static
388 int ThreadData::incarnation_counter_ = 0;
389
390 // static
391 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
392
393 // static
394 ThreadData* ThreadData::first_retired_thread_data_ = NULL;
395
396 // static
397 base::LazyInstance<base::Lock>::Leaky
398 ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
399
400 // static
401 base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED;
402
ThreadData(const std::string & sanitized_thread_name)403 ThreadData::ThreadData(const std::string& sanitized_thread_name)
404 : next_(NULL),
405 next_retired_thread_data_(NULL),
406 sanitized_thread_name_(sanitized_thread_name),
407 incarnation_count_for_pool_(-1),
408 current_stopwatch_(NULL) {
409 DCHECK(sanitized_thread_name_.empty() ||
410 !isdigit(sanitized_thread_name_.back()));
411 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_.
412 }
413
~ThreadData()414 ThreadData::~ThreadData() {
415 }
416
PushToHeadOfList()417 void ThreadData::PushToHeadOfList() {
418 // Toss in a hint of randomness (atop the uniniitalized value).
419 (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
420 sizeof(random_number_));
421 MSAN_UNPOISON(&random_number_, sizeof(random_number_));
422 random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0));
423 random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
424
425 DCHECK(!next_);
426 base::AutoLock lock(*list_lock_.Pointer());
427 incarnation_count_for_pool_ = incarnation_counter_;
428 next_ = all_thread_data_list_head_;
429 all_thread_data_list_head_ = this;
430 }
431
432 // static
first()433 ThreadData* ThreadData::first() {
434 base::AutoLock lock(*list_lock_.Pointer());
435 return all_thread_data_list_head_;
436 }
437
next() const438 ThreadData* ThreadData::next() const { return next_; }
439
440 // static
InitializeThreadContext(const std::string & thread_name)441 void ThreadData::InitializeThreadContext(const std::string& thread_name) {
442 if (base::WorkerPool::RunsTasksOnCurrentThread())
443 return;
444 DCHECK_NE(thread_name, kWorkerThreadSanitizedName);
445 EnsureTlsInitialization();
446 ThreadData* current_thread_data =
447 reinterpret_cast<ThreadData*>(tls_index_.Get());
448 if (current_thread_data)
449 return; // Browser tests instigate this.
450 current_thread_data =
451 GetRetiredOrCreateThreadData(SanitizeThreadName(thread_name));
452 tls_index_.Set(current_thread_data);
453 }
454
455 // static
Get()456 ThreadData* ThreadData::Get() {
457 if (!tls_index_.initialized())
458 return NULL; // For unittests only.
459 ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
460 if (registered)
461 return registered;
462
463 // We must be a worker thread, since we didn't pre-register.
464 ThreadData* worker_thread_data =
465 GetRetiredOrCreateThreadData(kWorkerThreadSanitizedName);
466 tls_index_.Set(worker_thread_data);
467 return worker_thread_data;
468 }
469
470 // static
OnThreadTermination(void * thread_data)471 void ThreadData::OnThreadTermination(void* thread_data) {
472 DCHECK(thread_data); // TLS should *never* call us with a NULL.
473 // We must NOT do any allocations during this callback. There is a chance
474 // that the allocator is no longer active on this thread.
475 reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
476 }
477
OnThreadTerminationCleanup()478 void ThreadData::OnThreadTerminationCleanup() {
479 // We must NOT do any allocations during this callback. There is a chance that
480 // the allocator is no longer active on this thread.
481
482 // The list_lock_ was created when we registered the callback, so it won't be
483 // allocated here despite the lazy reference.
484 base::AutoLock lock(*list_lock_.Pointer());
485 if (incarnation_counter_ != incarnation_count_for_pool_)
486 return; // ThreadData was constructed in an earlier unit test.
487 ++cleanup_count_;
488
489 // Add this ThreadData to a retired list so that it can be reused by a thread
490 // with the same name sanitized name in the future.
491 // |next_retired_thread_data_| is expected to be nullptr for a ThreadData
492 // associated with an active thread.
493 DCHECK(!next_retired_thread_data_);
494 next_retired_thread_data_ = first_retired_thread_data_;
495 first_retired_thread_data_ = this;
496 }
497
498 // static
Snapshot(int current_profiling_phase,ProcessDataSnapshot * process_data_snapshot)499 void ThreadData::Snapshot(int current_profiling_phase,
500 ProcessDataSnapshot* process_data_snapshot) {
501 // Get an unchanging copy of a ThreadData list.
502 ThreadData* my_list = ThreadData::first();
503
504 // Gather data serially.
505 // This hackish approach *can* get some slightly corrupt tallies, as we are
506 // grabbing values without the protection of a lock, but it has the advantage
507 // of working even with threads that don't have message loops. If a user
508 // sees any strangeness, they can always just run their stats gathering a
509 // second time.
510 BirthCountMap birth_counts;
511 for (ThreadData* thread_data = my_list; thread_data;
512 thread_data = thread_data->next()) {
513 thread_data->SnapshotExecutedTasks(current_profiling_phase,
514 &process_data_snapshot->phased_snapshots,
515 &birth_counts);
516 }
517
518 // Add births that are still active -- i.e. objects that have tallied a birth,
519 // but have not yet tallied a matching death, and hence must be either
520 // running, queued up, or being held in limbo for future posting.
521 auto* current_phase_tasks =
522 &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
523 for (const auto& birth_count : birth_counts) {
524 if (birth_count.second > 0) {
525 current_phase_tasks->push_back(
526 TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
527 DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0),
529 "Still_Alive"));
530 }
531 }
532 }
533
534 // static
OnProfilingPhaseCompleted(int profiling_phase)535 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
536 // Get an unchanging copy of a ThreadData list.
537 ThreadData* my_list = ThreadData::first();
538
539 // Add snapshots for all instances of death data in all threads serially.
540 // This hackish approach *can* get some slightly corrupt tallies, as we are
541 // grabbing values without the protection of a lock, but it has the advantage
542 // of working even with threads that don't have message loops. Any corruption
543 // shouldn't cause "cascading damage" to anything else (in later phases).
544 for (ThreadData* thread_data = my_list; thread_data;
545 thread_data = thread_data->next()) {
546 thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
547 }
548 }
549
TallyABirth(const Location & location)550 Births* ThreadData::TallyABirth(const Location& location) {
551 BirthMap::iterator it = birth_map_.find(location);
552 Births* child;
553 if (it != birth_map_.end()) {
554 child = it->second;
555 child->RecordBirth();
556 } else {
557 child = new Births(location, *this); // Leak this.
558 // Lock since the map may get relocated now, and other threads sometimes
559 // snapshot it (but they lock before copying it).
560 base::AutoLock lock(map_lock_);
561 birth_map_[location] = child;
562 }
563
564 return child;
565 }
566
TallyADeath(const Births & births,int32_t queue_duration,const TaskStopwatch & stopwatch)567 void ThreadData::TallyADeath(const Births& births,
568 int32_t queue_duration,
569 const TaskStopwatch& stopwatch) {
570 int32_t run_duration = stopwatch.RunDurationMs();
571
572 // Stir in some randomness, plus add constant in case durations are zero.
573 const uint32_t kSomePrimeNumber = 2147483647;
574 random_number_ += queue_duration + run_duration + kSomePrimeNumber;
575 // An address is going to have some randomness to it as well ;-).
576 random_number_ ^=
577 static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0));
578
579 DeathMap::iterator it = death_map_.find(&births);
580 DeathData* death_data;
581 if (it != death_map_.end()) {
582 death_data = &it->second;
583 } else {
584 base::AutoLock lock(map_lock_); // Lock as the map may get relocated now.
585 death_data = &death_map_[&births];
586 } // Release lock ASAP.
587 death_data->RecordDurations(queue_duration, run_duration, random_number_);
588
589 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
590 if (stopwatch.heap_tracking_enabled()) {
591 base::debug::ThreadHeapUsage heap_usage = stopwatch.heap_usage().usage();
592 // Saturate the 64 bit counts on conversion to 32 bit storage.
593 death_data->RecordAllocations(
594 base::saturated_cast<int32_t>(heap_usage.alloc_ops),
595 base::saturated_cast<int32_t>(heap_usage.free_ops),
596 base::saturated_cast<int32_t>(heap_usage.alloc_bytes),
597 base::saturated_cast<int32_t>(heap_usage.free_bytes),
598 base::saturated_cast<int32_t>(heap_usage.alloc_overhead_bytes),
599 base::saturated_cast<int32_t>(heap_usage.max_allocated_bytes));
600 }
601 #endif
602 }
603
604 // static
TallyABirthIfActive(const Location & location)605 Births* ThreadData::TallyABirthIfActive(const Location& location) {
606 if (!TrackingStatus())
607 return NULL;
608 ThreadData* current_thread_data = Get();
609 if (!current_thread_data)
610 return NULL;
611 return current_thread_data->TallyABirth(location);
612 }
613
614 // static
TallyRunOnNamedThreadIfTracking(const base::TrackingInfo & completed_task,const TaskStopwatch & stopwatch)615 void ThreadData::TallyRunOnNamedThreadIfTracking(
616 const base::TrackingInfo& completed_task,
617 const TaskStopwatch& stopwatch) {
618 // Even if we have been DEACTIVATED, we will process any pending births so
619 // that our data structures (which counted the outstanding births) remain
620 // consistent.
621 const Births* births = completed_task.birth_tally;
622 if (!births)
623 return;
624 ThreadData* current_thread_data = stopwatch.GetThreadData();
625 if (!current_thread_data)
626 return;
627
628 // Watch out for a race where status_ is changing, and hence one or both
629 // of start_of_run or end_of_run is zero. In that case, we didn't bother to
630 // get a time value since we "weren't tracking" and we were trying to be
631 // efficient by not calling for a genuine time value. For simplicity, we'll
632 // use a default zero duration when we can't calculate a true value.
633 TrackedTime start_of_run = stopwatch.StartTime();
634 int32_t queue_duration = 0;
635 if (!start_of_run.is_null()) {
636 queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
637 .InMilliseconds();
638 }
639 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
640 }
641
642 // static
TallyRunOnWorkerThreadIfTracking(const Births * births,const TrackedTime & time_posted,const TaskStopwatch & stopwatch)643 void ThreadData::TallyRunOnWorkerThreadIfTracking(
644 const Births* births,
645 const TrackedTime& time_posted,
646 const TaskStopwatch& stopwatch) {
647 // Even if we have been DEACTIVATED, we will process any pending births so
648 // that our data structures (which counted the outstanding births) remain
649 // consistent.
650 if (!births)
651 return;
652
653 // TODO(jar): Support the option to coalesce all worker-thread activity under
654 // one ThreadData instance that uses locks to protect *all* access. This will
655 // reduce memory (making it provably bounded), but run incrementally slower
656 // (since we'll use locks on TallyABirth and TallyADeath). The good news is
657 // that the locks on TallyADeath will be *after* the worker thread has run,
658 // and hence nothing will be waiting for the completion (... besides some
659 // other thread that might like to run). Also, the worker threads tasks are
660 // generally longer, and hence the cost of the lock may perchance be amortized
661 // over the long task's lifetime.
662 ThreadData* current_thread_data = stopwatch.GetThreadData();
663 if (!current_thread_data)
664 return;
665
666 TrackedTime start_of_run = stopwatch.StartTime();
667 int32_t queue_duration = 0;
668 if (!start_of_run.is_null()) {
669 queue_duration = (start_of_run - time_posted).InMilliseconds();
670 }
671 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
672 }
673
674 // static
TallyRunInAScopedRegionIfTracking(const Births * births,const TaskStopwatch & stopwatch)675 void ThreadData::TallyRunInAScopedRegionIfTracking(
676 const Births* births,
677 const TaskStopwatch& stopwatch) {
678 // Even if we have been DEACTIVATED, we will process any pending births so
679 // that our data structures (which counted the outstanding births) remain
680 // consistent.
681 if (!births)
682 return;
683
684 ThreadData* current_thread_data = stopwatch.GetThreadData();
685 if (!current_thread_data)
686 return;
687
688 int32_t queue_duration = 0;
689 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
690 }
691
SnapshotExecutedTasks(int current_profiling_phase,PhasedProcessDataSnapshotMap * phased_snapshots,BirthCountMap * birth_counts)692 void ThreadData::SnapshotExecutedTasks(
693 int current_profiling_phase,
694 PhasedProcessDataSnapshotMap* phased_snapshots,
695 BirthCountMap* birth_counts) {
696 // Get copy of data, so that the data will not change during the iterations
697 // and processing.
698 BirthMap birth_map;
699 DeathsSnapshot deaths;
700 SnapshotMaps(current_profiling_phase, &birth_map, &deaths);
701
702 for (const auto& birth : birth_map) {
703 (*birth_counts)[birth.second] += birth.second->birth_count();
704 }
705
706 for (const auto& death : deaths) {
707 (*birth_counts)[death.first] -= death.first->birth_count();
708
709 // For the current death data, walk through all its snapshots, starting from
710 // the current one, then from the previous profiling phase etc., and for
711 // each snapshot calculate the delta between the snapshot and the previous
712 // phase, if any. Store the deltas in the result.
713 for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
714 phase = phase->prev) {
715 const DeathDataSnapshot& death_data =
716 phase->prev ? phase->death_data.Delta(phase->prev->death_data)
717 : phase->death_data;
718
719 if (death_data.count > 0) {
720 (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
721 TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
722 sanitized_thread_name()));
723 }
724 }
725 }
726 }
727
728 // This may be called from another thread.
SnapshotMaps(int profiling_phase,BirthMap * birth_map,DeathsSnapshot * deaths)729 void ThreadData::SnapshotMaps(int profiling_phase,
730 BirthMap* birth_map,
731 DeathsSnapshot* deaths) {
732 base::AutoLock lock(map_lock_);
733
734 for (const auto& birth : birth_map_)
735 (*birth_map)[birth.first] = birth.second;
736
737 for (const auto& death : death_map_) {
738 deaths->push_back(std::make_pair(
739 death.first,
740 DeathDataPhaseSnapshot(profiling_phase, death.second,
741 death.second.last_phase_snapshot())));
742 }
743 }
744
OnProfilingPhaseCompletedOnThread(int profiling_phase)745 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
746 base::AutoLock lock(map_lock_);
747
748 for (auto& death : death_map_) {
749 death.second.OnProfilingPhaseCompleted(profiling_phase);
750 }
751 }
752
EnsureTlsInitialization()753 void ThreadData::EnsureTlsInitialization() {
754 if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
755 return; // Someone else did the initialization.
756 // Due to racy lazy initialization in tests, we'll need to recheck status_
757 // after we acquire the lock.
758
759 // Ensure that we don't double initialize tls. We are called when single
760 // threaded in the product, but some tests may be racy and lazy about our
761 // initialization.
762 base::AutoLock lock(*list_lock_.Pointer());
763 if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
764 return; // Someone raced in here and beat us.
765
766 // Perform the "real" TLS initialization now, and leave it intact through
767 // process termination.
768 if (!tls_index_.initialized()) { // Testing may have initialized this.
769 DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED);
770 tls_index_.Initialize(&ThreadData::OnThreadTermination);
771 DCHECK(tls_index_.initialized());
772 } else {
773 // TLS was initialzed for us earlier.
774 DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS);
775 }
776
777 // Incarnation counter is only significant to testing, as it otherwise will
778 // never again change in this process.
779 ++incarnation_counter_;
780
781 // The lock is not critical for setting status_, but it doesn't hurt. It also
782 // ensures that if we have a racy initialization, that we'll bail as soon as
783 // we get the lock earlier in this method.
784 base::subtle::Release_Store(&status_, kInitialStartupState);
785 DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED);
786 }
787
788 // static
InitializeAndSetTrackingStatus(Status status)789 void ThreadData::InitializeAndSetTrackingStatus(Status status) {
790 DCHECK_GE(status, DEACTIVATED);
791 DCHECK_LE(status, PROFILING_ACTIVE);
792
793 EnsureTlsInitialization(); // No-op if already initialized.
794
795 if (status > DEACTIVATED)
796 status = PROFILING_ACTIVE;
797
798 base::subtle::Release_Store(&status_, status);
799 }
800
801 // static
status()802 ThreadData::Status ThreadData::status() {
803 return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_));
804 }
805
806 // static
TrackingStatus()807 bool ThreadData::TrackingStatus() {
808 return base::subtle::Acquire_Load(&status_) > DEACTIVATED;
809 }
810
811 // static
EnableProfilerTiming()812 void ThreadData::EnableProfilerTiming() {
813 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
814 }
815
816 // static
Now()817 TrackedTime ThreadData::Now() {
818 if (now_function_for_testing_)
819 return TrackedTime::FromMilliseconds((*now_function_for_testing_)());
820 if (IsProfilerTimingEnabled() && TrackingStatus())
821 return TrackedTime::Now();
822 return TrackedTime(); // Super fast when disabled, or not compiled.
823 }
824
825 // static
EnsureCleanupWasCalled(int major_threads_shutdown_count)826 void ThreadData::EnsureCleanupWasCalled(int major_threads_shutdown_count) {
827 base::AutoLock lock(*list_lock_.Pointer());
828
829 // TODO(jar): until this is working on XP, don't run the real test.
830 #if 0
831 // Verify that we've at least shutdown/cleanup the major namesd threads. The
832 // caller should tell us how many thread shutdowns should have taken place by
833 // now.
834 CHECK_GT(cleanup_count_, major_threads_shutdown_count);
835 #endif
836 }
837
838 // static
ShutdownSingleThreadedCleanup(bool leak)839 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
840 // This is only called from test code, where we need to cleanup so that
841 // additional tests can be run.
842 // We must be single threaded... but be careful anyway.
843 InitializeAndSetTrackingStatus(DEACTIVATED);
844
845 ThreadData* thread_data_list;
846 {
847 base::AutoLock lock(*list_lock_.Pointer());
848 thread_data_list = all_thread_data_list_head_;
849 all_thread_data_list_head_ = NULL;
850 ++incarnation_counter_;
851 // To be clean, break apart the retired worker list (though we leak them).
852 while (first_retired_thread_data_) {
853 ThreadData* thread_data = first_retired_thread_data_;
854 first_retired_thread_data_ = thread_data->next_retired_thread_data_;
855 thread_data->next_retired_thread_data_ = nullptr;
856 }
857 }
858
859 // Put most global static back in pristine shape.
860 cleanup_count_ = 0;
861 tls_index_.Set(NULL);
862 // Almost UNINITIALIZED.
863 base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS);
864
865 // To avoid any chance of racing in unit tests, which is the only place we
866 // call this function, we may sometimes leak all the data structures we
867 // recovered, as they may still be in use on threads from prior tests!
868 if (leak) {
869 ThreadData* thread_data = thread_data_list;
870 while (thread_data) {
871 ANNOTATE_LEAKING_OBJECT_PTR(thread_data);
872 thread_data = thread_data->next();
873 }
874 return;
875 }
876
877 // When we want to cleanup (on a single thread), here is what we do.
878
879 // Do actual recursive delete in all ThreadData instances.
880 while (thread_data_list) {
881 ThreadData* next_thread_data = thread_data_list;
882 thread_data_list = thread_data_list->next();
883
884 for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
885 next_thread_data->birth_map_.end() != it; ++it)
886 delete it->second; // Delete the Birth Records.
887 delete next_thread_data; // Includes all Death Records.
888 }
889 }
890
891 // static
GetRetiredOrCreateThreadData(const std::string & sanitized_thread_name)892 ThreadData* ThreadData::GetRetiredOrCreateThreadData(
893 const std::string& sanitized_thread_name) {
894 SCOPED_UMA_HISTOGRAM_TIMER("TrackedObjects.GetRetiredOrCreateThreadData");
895
896 {
897 base::AutoLock lock(*list_lock_.Pointer());
898 ThreadData** pcursor = &first_retired_thread_data_;
899 ThreadData* cursor = first_retired_thread_data_;
900
901 // Assuming that there aren't more than a few tens of retired ThreadData
902 // instances, this lookup should be quick compared to the thread creation
903 // time. Retired ThreadData instances cannot be stored in a map because
904 // insertions are done from OnThreadTerminationCleanup() where allocations
905 // are not allowed.
906 //
907 // Note: Test processes may have more than a few tens of retired ThreadData
908 // instances.
909 while (cursor) {
910 if (cursor->sanitized_thread_name() == sanitized_thread_name) {
911 DCHECK_EQ(*pcursor, cursor);
912 *pcursor = cursor->next_retired_thread_data_;
913 cursor->next_retired_thread_data_ = nullptr;
914 return cursor;
915 }
916 pcursor = &cursor->next_retired_thread_data_;
917 cursor = cursor->next_retired_thread_data_;
918 }
919 }
920
921 return new ThreadData(sanitized_thread_name);
922 }
923
924 //------------------------------------------------------------------------------
TaskStopwatch()925 TaskStopwatch::TaskStopwatch()
926 : wallclock_duration_ms_(0),
927 current_thread_data_(NULL),
928 excluded_duration_ms_(0),
929 parent_(NULL) {
930 #if DCHECK_IS_ON()
931 state_ = CREATED;
932 child_ = NULL;
933 #endif
934 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
935 heap_tracking_enabled_ =
936 base::debug::ThreadHeapUsageTracker::IsHeapTrackingEnabled();
937 #endif
938 }
939
~TaskStopwatch()940 TaskStopwatch::~TaskStopwatch() {
941 #if DCHECK_IS_ON()
942 DCHECK(state_ != RUNNING);
943 DCHECK(child_ == NULL);
944 #endif
945 }
946
Start()947 void TaskStopwatch::Start() {
948 #if DCHECK_IS_ON()
949 DCHECK(state_ == CREATED);
950 state_ = RUNNING;
951 #endif
952
953 start_time_ = ThreadData::Now();
954 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
955 if (heap_tracking_enabled_)
956 heap_usage_.Start();
957 #endif
958
959 current_thread_data_ = ThreadData::Get();
960 if (!current_thread_data_)
961 return;
962
963 parent_ = current_thread_data_->current_stopwatch_;
964 #if DCHECK_IS_ON()
965 if (parent_) {
966 DCHECK(parent_->state_ == RUNNING);
967 DCHECK(parent_->child_ == NULL);
968 parent_->child_ = this;
969 }
970 #endif
971 current_thread_data_->current_stopwatch_ = this;
972 }
973
Stop()974 void TaskStopwatch::Stop() {
975 const TrackedTime end_time = ThreadData::Now();
976 #if DCHECK_IS_ON()
977 DCHECK(state_ == RUNNING);
978 state_ = STOPPED;
979 DCHECK(child_ == NULL);
980 #endif
981 #if BUILDFLAG(ENABLE_MEMORY_TASK_PROFILER)
982 if (heap_tracking_enabled_)
983 heap_usage_.Stop(true);
984 #endif
985
986 if (!start_time_.is_null() && !end_time.is_null()) {
987 wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
988 }
989
990 if (!current_thread_data_)
991 return;
992
993 DCHECK(current_thread_data_->current_stopwatch_ == this);
994 current_thread_data_->current_stopwatch_ = parent_;
995 if (!parent_)
996 return;
997
998 #if DCHECK_IS_ON()
999 DCHECK(parent_->state_ == RUNNING);
1000 DCHECK(parent_->child_ == this);
1001 parent_->child_ = NULL;
1002 #endif
1003 parent_->excluded_duration_ms_ += wallclock_duration_ms_;
1004 parent_ = NULL;
1005 }
1006
StartTime() const1007 TrackedTime TaskStopwatch::StartTime() const {
1008 #if DCHECK_IS_ON()
1009 DCHECK(state_ != CREATED);
1010 #endif
1011
1012 return start_time_;
1013 }
1014
RunDurationMs() const1015 int32_t TaskStopwatch::RunDurationMs() const {
1016 #if DCHECK_IS_ON()
1017 DCHECK(state_ == STOPPED);
1018 #endif
1019
1020 return wallclock_duration_ms_ - excluded_duration_ms_;
1021 }
1022
GetThreadData() const1023 ThreadData* TaskStopwatch::GetThreadData() const {
1024 #if DCHECK_IS_ON()
1025 DCHECK(state_ != CREATED);
1026 #endif
1027
1028 return current_thread_data_;
1029 }
1030
1031 //------------------------------------------------------------------------------
1032 // DeathDataPhaseSnapshot
1033
DeathDataPhaseSnapshot(int profiling_phase,const DeathData & death,const DeathDataPhaseSnapshot * prev)1034 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
1035 int profiling_phase,
1036 const DeathData& death,
1037 const DeathDataPhaseSnapshot* prev)
1038 : profiling_phase(profiling_phase), death_data(death), prev(prev) {}
1039
1040 //------------------------------------------------------------------------------
1041 // TaskSnapshot
1042
TaskSnapshot()1043 TaskSnapshot::TaskSnapshot() {
1044 }
1045
TaskSnapshot(const BirthOnThreadSnapshot & birth,const DeathDataSnapshot & death_data,const std::string & death_sanitized_thread_name)1046 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
1047 const DeathDataSnapshot& death_data,
1048 const std::string& death_sanitized_thread_name)
1049 : birth(birth),
1050 death_data(death_data),
1051 death_sanitized_thread_name(death_sanitized_thread_name) {}
1052
~TaskSnapshot()1053 TaskSnapshot::~TaskSnapshot() {
1054 }
1055
1056 //------------------------------------------------------------------------------
1057 // ProcessDataPhaseSnapshot
1058
ProcessDataPhaseSnapshot()1059 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
1060 }
1061
1062 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot(
1063 const ProcessDataPhaseSnapshot& other) = default;
1064
~ProcessDataPhaseSnapshot()1065 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
1066 }
1067
1068 //------------------------------------------------------------------------------
1069 // ProcessDataPhaseSnapshot
1070
ProcessDataSnapshot()1071 ProcessDataSnapshot::ProcessDataSnapshot()
1072 #if !defined(OS_NACL)
1073 : process_id(base::GetCurrentProcId()) {
1074 #else
1075 : process_id(base::kNullProcessId) {
1076 #endif
1077 }
1078
1079 ProcessDataSnapshot::ProcessDataSnapshot(const ProcessDataSnapshot& other) =
1080 default;
1081
1082 ProcessDataSnapshot::~ProcessDataSnapshot() {
1083 }
1084
1085 } // namespace tracked_objects
1086