1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/tracked_objects.h"
6
7 #include <limits.h>
8 #include <stdlib.h>
9
10 #include "base/atomicops.h"
11 #include "base/base_switches.h"
12 #include "base/command_line.h"
13 #include "base/compiler_specific.h"
14 #include "base/debug/leak_annotations.h"
15 #include "base/logging.h"
16 #include "base/process/process_handle.h"
17 #include "base/strings/stringprintf.h"
18 #include "base/third_party/valgrind/memcheck.h"
19 #include "base/threading/worker_pool.h"
20 #include "base/tracking_info.h"
21 #include "build/build_config.h"
22
23 using base::TimeDelta;
24
25 namespace base {
26 class TimeDelta;
27 }
28
29 namespace tracked_objects {
30
31 namespace {
32 // When ThreadData is first initialized, should we start in an ACTIVE state to
33 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
34 // that we only record after parsing the command line flag --enable-tracking.
35 // Note that the flag may force either state, so this really controls only the
36 // period of time up until that flag is parsed. If there is no flag seen, then
37 // this state may prevail for much or all of the process lifetime.
38 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;
39
40 // Possible states of the profiler timing enabledness.
41 enum {
42 UNDEFINED_TIMING,
43 ENABLED_TIMING,
44 DISABLED_TIMING,
45 };
46
47 // State of the profiler timing enabledness.
48 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
49
50 // Returns whether profiler timing is enabled. The default is true, but this
51 // may be overridden by a command-line flag. Some platforms may
52 // programmatically set this command-line flag to the "off" value if it's not
53 // specified.
54 // This in turn can be overridden by explicitly calling
55 // ThreadData::EnableProfilerTiming, say, based on a field trial.
IsProfilerTimingEnabled()56 inline bool IsProfilerTimingEnabled() {
57 // Reading |g_profiler_timing_enabled| is done without barrier because
58 // multiple initialization is not an issue while the barrier can be relatively
59 // costly given that this method is sometimes called in a tight loop.
60 base::subtle::Atomic32 current_timing_enabled =
61 base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
62 if (current_timing_enabled == UNDEFINED_TIMING) {
63 if (!base::CommandLine::InitializedForCurrentProcess())
64 return true;
65 current_timing_enabled =
66 (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
67 switches::kProfilerTiming) ==
68 switches::kProfilerTimingDisabledValue)
69 ? DISABLED_TIMING
70 : ENABLED_TIMING;
71 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
72 current_timing_enabled);
73 }
74 return current_timing_enabled == ENABLED_TIMING;
75 }
76
77 } // namespace
78
79 //------------------------------------------------------------------------------
80 // DeathData tallies durations when a death takes place.
81
DeathData()82 DeathData::DeathData()
83 : count_(0),
84 sample_probability_count_(0),
85 run_duration_sum_(0),
86 queue_duration_sum_(0),
87 run_duration_max_(0),
88 queue_duration_max_(0),
89 run_duration_sample_(0),
90 queue_duration_sample_(0),
91 last_phase_snapshot_(nullptr) {
92 }
93
DeathData(const DeathData & other)94 DeathData::DeathData(const DeathData& other)
95 : count_(other.count_),
96 sample_probability_count_(other.sample_probability_count_),
97 run_duration_sum_(other.run_duration_sum_),
98 queue_duration_sum_(other.queue_duration_sum_),
99 run_duration_max_(other.run_duration_max_),
100 queue_duration_max_(other.queue_duration_max_),
101 run_duration_sample_(other.run_duration_sample_),
102 queue_duration_sample_(other.queue_duration_sample_),
103 last_phase_snapshot_(nullptr) {
104 // This constructor will be used by std::map when adding new DeathData values
105 // to the map. At that point, last_phase_snapshot_ is still NULL, so we don't
106 // need to worry about ownership transfer.
107 DCHECK(other.last_phase_snapshot_ == nullptr);
108 }
109
~DeathData()110 DeathData::~DeathData() {
111 while (last_phase_snapshot_) {
112 const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
113 last_phase_snapshot_ = snapshot->prev;
114 delete snapshot;
115 }
116 }
117
118 // TODO(jar): I need to see if this macro to optimize branching is worth using.
119 //
120 // This macro has no branching, so it is surely fast, and is equivalent to:
121 // if (assign_it)
122 // target = source;
123 // We use a macro rather than a template to force this to inline.
124 // Related code for calculating max is discussed on the web.
125 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
126 ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it))
127
RecordDeath(const int32_t queue_duration,const int32_t run_duration,const uint32_t random_number)128 void DeathData::RecordDeath(const int32_t queue_duration,
129 const int32_t run_duration,
130 const uint32_t random_number) {
131 // We'll just clamp at INT_MAX, but we should note this in the UI as such.
132 if (count_ < INT_MAX)
133 base::subtle::NoBarrier_Store(&count_, count_ + 1);
134
135 int sample_probability_count =
136 base::subtle::NoBarrier_Load(&sample_probability_count_);
137 if (sample_probability_count < INT_MAX)
138 ++sample_probability_count;
139 base::subtle::NoBarrier_Store(&sample_probability_count_,
140 sample_probability_count);
141
142 base::subtle::NoBarrier_Store(&queue_duration_sum_,
143 queue_duration_sum_ + queue_duration);
144 base::subtle::NoBarrier_Store(&run_duration_sum_,
145 run_duration_sum_ + run_duration);
146
147 if (queue_duration_max() < queue_duration)
148 base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration);
149 if (run_duration_max() < run_duration)
150 base::subtle::NoBarrier_Store(&run_duration_max_, run_duration);
151
152 // Take a uniformly distributed sample over all durations ever supplied during
153 // the current profiling phase.
154 // The probability that we (instead) use this new sample is
155 // 1/sample_probability_count_. This results in a completely uniform selection
156 // of the sample (at least when we don't clamp sample_probability_count_...
157 // but that should be inconsequentially likely). We ignore the fact that we
158 // correlated our selection of a sample to the run and queue times (i.e., we
159 // used them to generate random_number).
160 CHECK_GT(sample_probability_count, 0);
161 if (0 == (random_number % sample_probability_count)) {
162 base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration);
163 base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration);
164 }
165 }
166
OnProfilingPhaseCompleted(int profiling_phase)167 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
168 // Snapshotting and storing current state.
169 last_phase_snapshot_ = new DeathDataPhaseSnapshot(
170 profiling_phase, count(), run_duration_sum(), run_duration_max(),
171 run_duration_sample(), queue_duration_sum(), queue_duration_max(),
172 queue_duration_sample(), last_phase_snapshot_);
173
174 // Not touching fields for which a delta can be computed by comparing with a
175 // snapshot from the previous phase. Resetting other fields. Sample values
176 // will be reset upon next death recording because sample_probability_count_
177 // is set to 0.
178 // We avoid resetting to 0 in favor of deltas whenever possible. The reason
179 // is that for incrementable fields, resetting to 0 from the snapshot thread
180 // potentially in parallel with incrementing in the death thread may result in
181 // significant data corruption that has a potential to grow with time. Not
182 // resetting incrementable fields and using deltas will cause any
183 // off-by-little corruptions to be likely fixed at the next snapshot.
184 // The max values are not incrementable, and cannot be deduced using deltas
185 // for a given phase. Hence, we have to reset them to 0. But the potential
186 // damage is limited to getting the previous phase's max to apply for the next
187 // phase, and the error doesn't have a potential to keep growing with new
188 // resets.
189 // sample_probability_count_ is incrementable, but must be reset to 0 at the
190 // phase end, so that we start a new uniformly randomized sample selection
191 // after the reset. These fields are updated using atomics. However, race
192 // conditions are possible since these are updated individually and not
193 // together atomically, resulting in the values being mutually inconsistent.
194 // The damage is limited to selecting a wrong sample, which is not something
195 // that can cause accumulating or cascading effects.
196 // If there were no inconsistencies caused by race conditions, we never send a
197 // sample for the previous phase in the next phase's snapshot because
198 // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
199 base::subtle::NoBarrier_Store(&sample_probability_count_, 0);
200 base::subtle::NoBarrier_Store(&run_duration_max_, 0);
201 base::subtle::NoBarrier_Store(&queue_duration_max_, 0);
202 }
203
204 //------------------------------------------------------------------------------
DeathDataSnapshot()205 DeathDataSnapshot::DeathDataSnapshot()
206 : count(-1),
207 run_duration_sum(-1),
208 run_duration_max(-1),
209 run_duration_sample(-1),
210 queue_duration_sum(-1),
211 queue_duration_max(-1),
212 queue_duration_sample(-1) {
213 }
214
DeathDataSnapshot(int count,int32_t run_duration_sum,int32_t run_duration_max,int32_t run_duration_sample,int32_t queue_duration_sum,int32_t queue_duration_max,int32_t queue_duration_sample)215 DeathDataSnapshot::DeathDataSnapshot(int count,
216 int32_t run_duration_sum,
217 int32_t run_duration_max,
218 int32_t run_duration_sample,
219 int32_t queue_duration_sum,
220 int32_t queue_duration_max,
221 int32_t queue_duration_sample)
222 : count(count),
223 run_duration_sum(run_duration_sum),
224 run_duration_max(run_duration_max),
225 run_duration_sample(run_duration_sample),
226 queue_duration_sum(queue_duration_sum),
227 queue_duration_max(queue_duration_max),
228 queue_duration_sample(queue_duration_sample) {}
229
~DeathDataSnapshot()230 DeathDataSnapshot::~DeathDataSnapshot() {
231 }
232
Delta(const DeathDataSnapshot & older) const233 DeathDataSnapshot DeathDataSnapshot::Delta(
234 const DeathDataSnapshot& older) const {
235 return DeathDataSnapshot(count - older.count,
236 run_duration_sum - older.run_duration_sum,
237 run_duration_max, run_duration_sample,
238 queue_duration_sum - older.queue_duration_sum,
239 queue_duration_max, queue_duration_sample);
240 }
241
242 //------------------------------------------------------------------------------
BirthOnThread(const Location & location,const ThreadData & current)243 BirthOnThread::BirthOnThread(const Location& location,
244 const ThreadData& current)
245 : location_(location),
246 birth_thread_(¤t) {
247 }
248
249 //------------------------------------------------------------------------------
BirthOnThreadSnapshot()250 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
251 }
252
BirthOnThreadSnapshot(const BirthOnThread & birth)253 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
254 : location(birth.location()),
255 thread_name(birth.birth_thread()->thread_name()) {
256 }
257
~BirthOnThreadSnapshot()258 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
259 }
260
261 //------------------------------------------------------------------------------
Births(const Location & location,const ThreadData & current)262 Births::Births(const Location& location, const ThreadData& current)
263 : BirthOnThread(location, current),
264 birth_count_(1) { }
265
birth_count() const266 int Births::birth_count() const { return birth_count_; }
267
RecordBirth()268 void Births::RecordBirth() { ++birth_count_; }
269
270 //------------------------------------------------------------------------------
271 // ThreadData maintains the central data for all births and deaths on a single
272 // thread.
273
274 // TODO(jar): We should pull all these static vars together, into a struct, and
275 // optimize layout so that we benefit from locality of reference during accesses
276 // to them.
277
278 // static
279 ThreadData::NowFunction* ThreadData::now_function_for_testing_ = NULL;
280
281 // A TLS slot which points to the ThreadData instance for the current thread.
282 // We do a fake initialization here (zeroing out data), and then the real
283 // in-place construction happens when we call tls_index_.Initialize().
284 // static
285 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
286
287 // static
288 int ThreadData::worker_thread_data_creation_count_ = 0;
289
290 // static
291 int ThreadData::cleanup_count_ = 0;
292
293 // static
294 int ThreadData::incarnation_counter_ = 0;
295
296 // static
297 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
298
299 // static
300 ThreadData* ThreadData::first_retired_worker_ = NULL;
301
302 // static
303 base::LazyInstance<base::Lock>::Leaky
304 ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
305
306 // static
307 base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED;
308
ThreadData(const std::string & suggested_name)309 ThreadData::ThreadData(const std::string& suggested_name)
310 : next_(NULL),
311 next_retired_worker_(NULL),
312 worker_thread_number_(0),
313 incarnation_count_for_pool_(-1),
314 current_stopwatch_(NULL) {
315 DCHECK_GE(suggested_name.size(), 0u);
316 thread_name_ = suggested_name;
317 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_.
318 }
319
ThreadData(int thread_number)320 ThreadData::ThreadData(int thread_number)
321 : next_(NULL),
322 next_retired_worker_(NULL),
323 worker_thread_number_(thread_number),
324 incarnation_count_for_pool_(-1),
325 current_stopwatch_(NULL) {
326 CHECK_GT(thread_number, 0);
327 base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number);
328 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_.
329 }
330
~ThreadData()331 ThreadData::~ThreadData() {
332 }
333
PushToHeadOfList()334 void ThreadData::PushToHeadOfList() {
335 // Toss in a hint of randomness (atop the uniniitalized value).
336 (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
337 sizeof(random_number_));
338 MSAN_UNPOISON(&random_number_, sizeof(random_number_));
339 random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0));
340 random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
341
342 DCHECK(!next_);
343 base::AutoLock lock(*list_lock_.Pointer());
344 incarnation_count_for_pool_ = incarnation_counter_;
345 next_ = all_thread_data_list_head_;
346 all_thread_data_list_head_ = this;
347 }
348
349 // static
first()350 ThreadData* ThreadData::first() {
351 base::AutoLock lock(*list_lock_.Pointer());
352 return all_thread_data_list_head_;
353 }
354
next() const355 ThreadData* ThreadData::next() const { return next_; }
356
357 // static
InitializeThreadContext(const std::string & suggested_name)358 void ThreadData::InitializeThreadContext(const std::string& suggested_name) {
359 if (base::WorkerPool::RunsTasksOnCurrentThread())
360 return;
361 EnsureTlsInitialization();
362 ThreadData* current_thread_data =
363 reinterpret_cast<ThreadData*>(tls_index_.Get());
364 if (current_thread_data)
365 return; // Browser tests instigate this.
366 current_thread_data = new ThreadData(suggested_name);
367 tls_index_.Set(current_thread_data);
368 }
369
370 // static
Get()371 ThreadData* ThreadData::Get() {
372 if (!tls_index_.initialized())
373 return NULL; // For unittests only.
374 ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
375 if (registered)
376 return registered;
377
378 // We must be a worker thread, since we didn't pre-register.
379 ThreadData* worker_thread_data = NULL;
380 int worker_thread_number = 0;
381 {
382 base::AutoLock lock(*list_lock_.Pointer());
383 if (first_retired_worker_) {
384 worker_thread_data = first_retired_worker_;
385 first_retired_worker_ = first_retired_worker_->next_retired_worker_;
386 worker_thread_data->next_retired_worker_ = NULL;
387 } else {
388 worker_thread_number = ++worker_thread_data_creation_count_;
389 }
390 }
391
392 // If we can't find a previously used instance, then we have to create one.
393 if (!worker_thread_data) {
394 DCHECK_GT(worker_thread_number, 0);
395 worker_thread_data = new ThreadData(worker_thread_number);
396 }
397 DCHECK_GT(worker_thread_data->worker_thread_number_, 0);
398
399 tls_index_.Set(worker_thread_data);
400 return worker_thread_data;
401 }
402
403 // static
OnThreadTermination(void * thread_data)404 void ThreadData::OnThreadTermination(void* thread_data) {
405 DCHECK(thread_data); // TLS should *never* call us with a NULL.
406 // We must NOT do any allocations during this callback. There is a chance
407 // that the allocator is no longer active on this thread.
408 reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
409 }
410
OnThreadTerminationCleanup()411 void ThreadData::OnThreadTerminationCleanup() {
412 // The list_lock_ was created when we registered the callback, so it won't be
413 // allocated here despite the lazy reference.
414 base::AutoLock lock(*list_lock_.Pointer());
415 if (incarnation_counter_ != incarnation_count_for_pool_)
416 return; // ThreadData was constructed in an earlier unit test.
417 ++cleanup_count_;
418 // Only worker threads need to be retired and reused.
419 if (!worker_thread_number_) {
420 return;
421 }
422 // We must NOT do any allocations during this callback.
423 // Using the simple linked lists avoids all allocations.
424 DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL));
425 this->next_retired_worker_ = first_retired_worker_;
426 first_retired_worker_ = this;
427 }
428
429 // static
Snapshot(int current_profiling_phase,ProcessDataSnapshot * process_data_snapshot)430 void ThreadData::Snapshot(int current_profiling_phase,
431 ProcessDataSnapshot* process_data_snapshot) {
432 // Get an unchanging copy of a ThreadData list.
433 ThreadData* my_list = ThreadData::first();
434
435 // Gather data serially.
436 // This hackish approach *can* get some slightly corrupt tallies, as we are
437 // grabbing values without the protection of a lock, but it has the advantage
438 // of working even with threads that don't have message loops. If a user
439 // sees any strangeness, they can always just run their stats gathering a
440 // second time.
441 BirthCountMap birth_counts;
442 for (ThreadData* thread_data = my_list; thread_data;
443 thread_data = thread_data->next()) {
444 thread_data->SnapshotExecutedTasks(current_profiling_phase,
445 &process_data_snapshot->phased_snapshots,
446 &birth_counts);
447 }
448
449 // Add births that are still active -- i.e. objects that have tallied a birth,
450 // but have not yet tallied a matching death, and hence must be either
451 // running, queued up, or being held in limbo for future posting.
452 auto* current_phase_tasks =
453 &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
454 for (const auto& birth_count : birth_counts) {
455 if (birth_count.second > 0) {
456 current_phase_tasks->push_back(
457 TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
458 DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0),
459 "Still_Alive"));
460 }
461 }
462 }
463
464 // static
OnProfilingPhaseCompleted(int profiling_phase)465 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
466 // Get an unchanging copy of a ThreadData list.
467 ThreadData* my_list = ThreadData::first();
468
469 // Add snapshots for all instances of death data in all threads serially.
470 // This hackish approach *can* get some slightly corrupt tallies, as we are
471 // grabbing values without the protection of a lock, but it has the advantage
472 // of working even with threads that don't have message loops. Any corruption
473 // shouldn't cause "cascading damage" to anything else (in later phases).
474 for (ThreadData* thread_data = my_list; thread_data;
475 thread_data = thread_data->next()) {
476 thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
477 }
478 }
479
TallyABirth(const Location & location)480 Births* ThreadData::TallyABirth(const Location& location) {
481 BirthMap::iterator it = birth_map_.find(location);
482 Births* child;
483 if (it != birth_map_.end()) {
484 child = it->second;
485 child->RecordBirth();
486 } else {
487 child = new Births(location, *this); // Leak this.
488 // Lock since the map may get relocated now, and other threads sometimes
489 // snapshot it (but they lock before copying it).
490 base::AutoLock lock(map_lock_);
491 birth_map_[location] = child;
492 }
493
494 return child;
495 }
496
TallyADeath(const Births & births,int32_t queue_duration,const TaskStopwatch & stopwatch)497 void ThreadData::TallyADeath(const Births& births,
498 int32_t queue_duration,
499 const TaskStopwatch& stopwatch) {
500 int32_t run_duration = stopwatch.RunDurationMs();
501
502 // Stir in some randomness, plus add constant in case durations are zero.
503 const uint32_t kSomePrimeNumber = 2147483647;
504 random_number_ += queue_duration + run_duration + kSomePrimeNumber;
505 // An address is going to have some randomness to it as well ;-).
506 random_number_ ^=
507 static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0));
508
509 DeathMap::iterator it = death_map_.find(&births);
510 DeathData* death_data;
511 if (it != death_map_.end()) {
512 death_data = &it->second;
513 } else {
514 base::AutoLock lock(map_lock_); // Lock as the map may get relocated now.
515 death_data = &death_map_[&births];
516 } // Release lock ASAP.
517 death_data->RecordDeath(queue_duration, run_duration, random_number_);
518 }
519
520 // static
TallyABirthIfActive(const Location & location)521 Births* ThreadData::TallyABirthIfActive(const Location& location) {
522 if (!TrackingStatus())
523 return NULL;
524 ThreadData* current_thread_data = Get();
525 if (!current_thread_data)
526 return NULL;
527 return current_thread_data->TallyABirth(location);
528 }
529
530 // static
TallyRunOnNamedThreadIfTracking(const base::TrackingInfo & completed_task,const TaskStopwatch & stopwatch)531 void ThreadData::TallyRunOnNamedThreadIfTracking(
532 const base::TrackingInfo& completed_task,
533 const TaskStopwatch& stopwatch) {
534 // Even if we have been DEACTIVATED, we will process any pending births so
535 // that our data structures (which counted the outstanding births) remain
536 // consistent.
537 const Births* births = completed_task.birth_tally;
538 if (!births)
539 return;
540 ThreadData* current_thread_data = stopwatch.GetThreadData();
541 if (!current_thread_data)
542 return;
543
544 // Watch out for a race where status_ is changing, and hence one or both
545 // of start_of_run or end_of_run is zero. In that case, we didn't bother to
546 // get a time value since we "weren't tracking" and we were trying to be
547 // efficient by not calling for a genuine time value. For simplicity, we'll
548 // use a default zero duration when we can't calculate a true value.
549 TrackedTime start_of_run = stopwatch.StartTime();
550 int32_t queue_duration = 0;
551 if (!start_of_run.is_null()) {
552 queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
553 .InMilliseconds();
554 }
555 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
556 }
557
558 // static
TallyRunOnWorkerThreadIfTracking(const Births * births,const TrackedTime & time_posted,const TaskStopwatch & stopwatch)559 void ThreadData::TallyRunOnWorkerThreadIfTracking(
560 const Births* births,
561 const TrackedTime& time_posted,
562 const TaskStopwatch& stopwatch) {
563 // Even if we have been DEACTIVATED, we will process any pending births so
564 // that our data structures (which counted the outstanding births) remain
565 // consistent.
566 if (!births)
567 return;
568
569 // TODO(jar): Support the option to coalesce all worker-thread activity under
570 // one ThreadData instance that uses locks to protect *all* access. This will
571 // reduce memory (making it provably bounded), but run incrementally slower
572 // (since we'll use locks on TallyABirth and TallyADeath). The good news is
573 // that the locks on TallyADeath will be *after* the worker thread has run,
574 // and hence nothing will be waiting for the completion (... besides some
575 // other thread that might like to run). Also, the worker threads tasks are
576 // generally longer, and hence the cost of the lock may perchance be amortized
577 // over the long task's lifetime.
578 ThreadData* current_thread_data = stopwatch.GetThreadData();
579 if (!current_thread_data)
580 return;
581
582 TrackedTime start_of_run = stopwatch.StartTime();
583 int32_t queue_duration = 0;
584 if (!start_of_run.is_null()) {
585 queue_duration = (start_of_run - time_posted).InMilliseconds();
586 }
587 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
588 }
589
590 // static
TallyRunInAScopedRegionIfTracking(const Births * births,const TaskStopwatch & stopwatch)591 void ThreadData::TallyRunInAScopedRegionIfTracking(
592 const Births* births,
593 const TaskStopwatch& stopwatch) {
594 // Even if we have been DEACTIVATED, we will process any pending births so
595 // that our data structures (which counted the outstanding births) remain
596 // consistent.
597 if (!births)
598 return;
599
600 ThreadData* current_thread_data = stopwatch.GetThreadData();
601 if (!current_thread_data)
602 return;
603
604 int32_t queue_duration = 0;
605 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
606 }
607
SnapshotExecutedTasks(int current_profiling_phase,PhasedProcessDataSnapshotMap * phased_snapshots,BirthCountMap * birth_counts)608 void ThreadData::SnapshotExecutedTasks(
609 int current_profiling_phase,
610 PhasedProcessDataSnapshotMap* phased_snapshots,
611 BirthCountMap* birth_counts) {
612 // Get copy of data, so that the data will not change during the iterations
613 // and processing.
614 BirthMap birth_map;
615 DeathsSnapshot deaths;
616 SnapshotMaps(current_profiling_phase, &birth_map, &deaths);
617
618 for (const auto& birth : birth_map) {
619 (*birth_counts)[birth.second] += birth.second->birth_count();
620 }
621
622 for (const auto& death : deaths) {
623 (*birth_counts)[death.first] -= death.first->birth_count();
624
625 // For the current death data, walk through all its snapshots, starting from
626 // the current one, then from the previous profiling phase etc., and for
627 // each snapshot calculate the delta between the snapshot and the previous
628 // phase, if any. Store the deltas in the result.
629 for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
630 phase = phase->prev) {
631 const DeathDataSnapshot& death_data =
632 phase->prev ? phase->death_data.Delta(phase->prev->death_data)
633 : phase->death_data;
634
635 if (death_data.count > 0) {
636 (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
637 TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
638 thread_name()));
639 }
640 }
641 }
642 }
643
644 // This may be called from another thread.
SnapshotMaps(int profiling_phase,BirthMap * birth_map,DeathsSnapshot * deaths)645 void ThreadData::SnapshotMaps(int profiling_phase,
646 BirthMap* birth_map,
647 DeathsSnapshot* deaths) {
648 base::AutoLock lock(map_lock_);
649
650 for (const auto& birth : birth_map_)
651 (*birth_map)[birth.first] = birth.second;
652
653 for (const auto& death : death_map_) {
654 deaths->push_back(std::make_pair(
655 death.first,
656 DeathDataPhaseSnapshot(profiling_phase, death.second.count(),
657 death.second.run_duration_sum(),
658 death.second.run_duration_max(),
659 death.second.run_duration_sample(),
660 death.second.queue_duration_sum(),
661 death.second.queue_duration_max(),
662 death.second.queue_duration_sample(),
663 death.second.last_phase_snapshot())));
664 }
665 }
666
OnProfilingPhaseCompletedOnThread(int profiling_phase)667 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
668 base::AutoLock lock(map_lock_);
669
670 for (auto& death : death_map_) {
671 death.second.OnProfilingPhaseCompleted(profiling_phase);
672 }
673 }
674
EnsureTlsInitialization()675 void ThreadData::EnsureTlsInitialization() {
676 if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
677 return; // Someone else did the initialization.
678 // Due to racy lazy initialization in tests, we'll need to recheck status_
679 // after we acquire the lock.
680
681 // Ensure that we don't double initialize tls. We are called when single
682 // threaded in the product, but some tests may be racy and lazy about our
683 // initialization.
684 base::AutoLock lock(*list_lock_.Pointer());
685 if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
686 return; // Someone raced in here and beat us.
687
688 // Perform the "real" TLS initialization now, and leave it intact through
689 // process termination.
690 if (!tls_index_.initialized()) { // Testing may have initialized this.
691 DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED);
692 tls_index_.Initialize(&ThreadData::OnThreadTermination);
693 DCHECK(tls_index_.initialized());
694 } else {
695 // TLS was initialzed for us earlier.
696 DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS);
697 }
698
699 // Incarnation counter is only significant to testing, as it otherwise will
700 // never again change in this process.
701 ++incarnation_counter_;
702
703 // The lock is not critical for setting status_, but it doesn't hurt. It also
704 // ensures that if we have a racy initialization, that we'll bail as soon as
705 // we get the lock earlier in this method.
706 base::subtle::Release_Store(&status_, kInitialStartupState);
707 DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED);
708 }
709
710 // static
InitializeAndSetTrackingStatus(Status status)711 void ThreadData::InitializeAndSetTrackingStatus(Status status) {
712 DCHECK_GE(status, DEACTIVATED);
713 DCHECK_LE(status, PROFILING_ACTIVE);
714
715 EnsureTlsInitialization(); // No-op if already initialized.
716
717 if (status > DEACTIVATED)
718 status = PROFILING_ACTIVE;
719 base::subtle::Release_Store(&status_, status);
720 }
721
722 // static
status()723 ThreadData::Status ThreadData::status() {
724 return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_));
725 }
726
727 // static
TrackingStatus()728 bool ThreadData::TrackingStatus() {
729 return base::subtle::Acquire_Load(&status_) > DEACTIVATED;
730 }
731
732 // static
EnableProfilerTiming()733 void ThreadData::EnableProfilerTiming() {
734 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
735 }
736
737 // static
Now()738 TrackedTime ThreadData::Now() {
739 if (now_function_for_testing_)
740 return TrackedTime::FromMilliseconds((*now_function_for_testing_)());
741 if (IsProfilerTimingEnabled() && TrackingStatus())
742 return TrackedTime::Now();
743 return TrackedTime(); // Super fast when disabled, or not compiled.
744 }
745
746 // static
EnsureCleanupWasCalled(int)747 void ThreadData::EnsureCleanupWasCalled(int /*major_threads_shutdown_count*/) {
748 base::AutoLock lock(*list_lock_.Pointer());
749 if (worker_thread_data_creation_count_ == 0)
750 return; // We haven't really run much, and couldn't have leaked.
751
752 // TODO(jar): until this is working on XP, don't run the real test.
753 #if 0
754 // Verify that we've at least shutdown/cleanup the major namesd threads. The
755 // caller should tell us how many thread shutdowns should have taken place by
756 // now.
757 CHECK_GT(cleanup_count_, major_threads_shutdown_count);
758 #endif
759 }
760
761 // static
ShutdownSingleThreadedCleanup(bool leak)762 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
763 // This is only called from test code, where we need to cleanup so that
764 // additional tests can be run.
765 // We must be single threaded... but be careful anyway.
766 InitializeAndSetTrackingStatus(DEACTIVATED);
767
768 ThreadData* thread_data_list;
769 {
770 base::AutoLock lock(*list_lock_.Pointer());
771 thread_data_list = all_thread_data_list_head_;
772 all_thread_data_list_head_ = NULL;
773 ++incarnation_counter_;
774 // To be clean, break apart the retired worker list (though we leak them).
775 while (first_retired_worker_) {
776 ThreadData* worker = first_retired_worker_;
777 CHECK_GT(worker->worker_thread_number_, 0);
778 first_retired_worker_ = worker->next_retired_worker_;
779 worker->next_retired_worker_ = NULL;
780 }
781 }
782
783 // Put most global static back in pristine shape.
784 worker_thread_data_creation_count_ = 0;
785 cleanup_count_ = 0;
786 tls_index_.Set(NULL);
787 // Almost UNINITIALIZED.
788 base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS);
789
790 // To avoid any chance of racing in unit tests, which is the only place we
791 // call this function, we may sometimes leak all the data structures we
792 // recovered, as they may still be in use on threads from prior tests!
793 if (leak) {
794 ThreadData* thread_data = thread_data_list;
795 while (thread_data) {
796 ANNOTATE_LEAKING_OBJECT_PTR(thread_data);
797 thread_data = thread_data->next();
798 }
799 return;
800 }
801
802 // When we want to cleanup (on a single thread), here is what we do.
803
804 // Do actual recursive delete in all ThreadData instances.
805 while (thread_data_list) {
806 ThreadData* next_thread_data = thread_data_list;
807 thread_data_list = thread_data_list->next();
808
809 for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
810 next_thread_data->birth_map_.end() != it; ++it)
811 delete it->second; // Delete the Birth Records.
812 delete next_thread_data; // Includes all Death Records.
813 }
814 }
815
816 //------------------------------------------------------------------------------
TaskStopwatch()817 TaskStopwatch::TaskStopwatch()
818 : wallclock_duration_ms_(0),
819 current_thread_data_(NULL),
820 excluded_duration_ms_(0),
821 parent_(NULL) {
822 #if DCHECK_IS_ON()
823 state_ = CREATED;
824 child_ = NULL;
825 #endif
826 }
827
~TaskStopwatch()828 TaskStopwatch::~TaskStopwatch() {
829 #if DCHECK_IS_ON()
830 DCHECK(state_ != RUNNING);
831 DCHECK(child_ == NULL);
832 #endif
833 }
834
Start()835 void TaskStopwatch::Start() {
836 #if DCHECK_IS_ON()
837 DCHECK(state_ == CREATED);
838 state_ = RUNNING;
839 #endif
840
841 start_time_ = ThreadData::Now();
842
843 current_thread_data_ = ThreadData::Get();
844 if (!current_thread_data_)
845 return;
846
847 parent_ = current_thread_data_->current_stopwatch_;
848 #if DCHECK_IS_ON()
849 if (parent_) {
850 DCHECK(parent_->state_ == RUNNING);
851 DCHECK(parent_->child_ == NULL);
852 parent_->child_ = this;
853 }
854 #endif
855 current_thread_data_->current_stopwatch_ = this;
856 }
857
Stop()858 void TaskStopwatch::Stop() {
859 const TrackedTime end_time = ThreadData::Now();
860 #if DCHECK_IS_ON()
861 DCHECK(state_ == RUNNING);
862 state_ = STOPPED;
863 DCHECK(child_ == NULL);
864 #endif
865
866 if (!start_time_.is_null() && !end_time.is_null()) {
867 wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
868 }
869
870 if (!current_thread_data_)
871 return;
872
873 DCHECK(current_thread_data_->current_stopwatch_ == this);
874 current_thread_data_->current_stopwatch_ = parent_;
875 if (!parent_)
876 return;
877
878 #if DCHECK_IS_ON()
879 DCHECK(parent_->state_ == RUNNING);
880 DCHECK(parent_->child_ == this);
881 parent_->child_ = NULL;
882 #endif
883 parent_->excluded_duration_ms_ += wallclock_duration_ms_;
884 parent_ = NULL;
885 }
886
StartTime() const887 TrackedTime TaskStopwatch::StartTime() const {
888 #if DCHECK_IS_ON()
889 DCHECK(state_ != CREATED);
890 #endif
891
892 return start_time_;
893 }
894
RunDurationMs() const895 int32_t TaskStopwatch::RunDurationMs() const {
896 #if DCHECK_IS_ON()
897 DCHECK(state_ == STOPPED);
898 #endif
899
900 return wallclock_duration_ms_ - excluded_duration_ms_;
901 }
902
GetThreadData() const903 ThreadData* TaskStopwatch::GetThreadData() const {
904 #if DCHECK_IS_ON()
905 DCHECK(state_ != CREATED);
906 #endif
907
908 return current_thread_data_;
909 }
910
911 //------------------------------------------------------------------------------
912 // DeathDataPhaseSnapshot
913
DeathDataPhaseSnapshot(int profiling_phase,int count,int32_t run_duration_sum,int32_t run_duration_max,int32_t run_duration_sample,int32_t queue_duration_sum,int32_t queue_duration_max,int32_t queue_duration_sample,const DeathDataPhaseSnapshot * prev)914 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
915 int profiling_phase,
916 int count,
917 int32_t run_duration_sum,
918 int32_t run_duration_max,
919 int32_t run_duration_sample,
920 int32_t queue_duration_sum,
921 int32_t queue_duration_max,
922 int32_t queue_duration_sample,
923 const DeathDataPhaseSnapshot* prev)
924 : profiling_phase(profiling_phase),
925 death_data(count,
926 run_duration_sum,
927 run_duration_max,
928 run_duration_sample,
929 queue_duration_sum,
930 queue_duration_max,
931 queue_duration_sample),
932 prev(prev) {}
933
934 //------------------------------------------------------------------------------
935 // TaskSnapshot
936
TaskSnapshot()937 TaskSnapshot::TaskSnapshot() {
938 }
939
TaskSnapshot(const BirthOnThreadSnapshot & birth,const DeathDataSnapshot & death_data,const std::string & death_thread_name)940 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
941 const DeathDataSnapshot& death_data,
942 const std::string& death_thread_name)
943 : birth(birth),
944 death_data(death_data),
945 death_thread_name(death_thread_name) {
946 }
947
~TaskSnapshot()948 TaskSnapshot::~TaskSnapshot() {
949 }
950
951 //------------------------------------------------------------------------------
952 // ProcessDataPhaseSnapshot
953
ProcessDataPhaseSnapshot()954 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
955 }
956
957 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot(
958 const ProcessDataPhaseSnapshot& other) = default;
959
~ProcessDataPhaseSnapshot()960 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
961 }
962
963 //------------------------------------------------------------------------------
964 // ProcessDataPhaseSnapshot
965
ProcessDataSnapshot()966 ProcessDataSnapshot::ProcessDataSnapshot()
967 #if !defined(OS_NACL)
968 : process_id(base::GetCurrentProcId()) {
969 #else
970 : process_id(base::kNullProcessId) {
971 #endif
972 }
973
974 ProcessDataSnapshot::ProcessDataSnapshot(const ProcessDataSnapshot& other) =
975 default;
976
977 ProcessDataSnapshot::~ProcessDataSnapshot() {
978 }
979
980 } // namespace tracked_objects
981