1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/tracked_objects.h"
6
7 #include <limits.h>
8 #include <stdlib.h>
9
10 #include "base/atomicops.h"
11 #include "base/base_switches.h"
12 #include "base/command_line.h"
13 #include "base/compiler_specific.h"
14 #include "base/logging.h"
15 #include "base/process/process_handle.h"
16 #include "base/profiler/alternate_timer.h"
17 #include "base/strings/stringprintf.h"
18 #include "base/tracking_info.h"
19 #include "build/build_config.h"
20 #include "third_party/valgrind/memcheck.h"
21
22 using base::TimeDelta;
23
24 namespace base {
25 class TimeDelta;
26 }
27
28 namespace tracked_objects {
29
30 namespace {
31 // When ThreadData is first initialized, should we start in an ACTIVE state to
32 // record all of the startup-time tasks, or should we start up DEACTIVATED, so
33 // that we only record after parsing the command line flag --enable-tracking.
34 // Note that the flag may force either state, so this really controls only the
35 // period of time up until that flag is parsed. If there is no flag seen, then
36 // this state may prevail for much or all of the process lifetime.
37 const ThreadData::Status kInitialStartupState = ThreadData::PROFILING_ACTIVE;
38
39 // Control whether an alternate time source (Now() function) is supported by
40 // the ThreadData class. This compile time flag should be set to true if we
41 // want other modules (such as a memory allocator, or a thread-specific CPU time
42 // clock) to be able to provide a thread-specific Now() function. Without this
43 // compile-time flag, the code will only support the wall-clock time. This flag
44 // can be flipped to efficiently disable this path (if there is a performance
45 // problem with its presence).
46 static const bool kAllowAlternateTimeSourceHandling = true;
47
48 // Possible states of the profiler timing enabledness.
49 enum {
50 UNDEFINED_TIMING,
51 ENABLED_TIMING,
52 DISABLED_TIMING,
53 };
54
55 // State of the profiler timing enabledness.
56 base::subtle::Atomic32 g_profiler_timing_enabled = UNDEFINED_TIMING;
57
58 // Returns whether profiler timing is enabled. The default is true, but this
59 // may be overridden by a command-line flag. Some platforms may
60 // programmatically set this command-line flag to the "off" value if it's not
61 // specified.
62 // This in turn can be overridden by explicitly calling
63 // ThreadData::EnableProfilerTiming, say, based on a field trial.
IsProfilerTimingEnabled()64 inline bool IsProfilerTimingEnabled() {
65 // Reading |g_profiler_timing_enabled| is done without barrier because
66 // multiple initialization is not an issue while the barrier can be relatively
67 // costly given that this method is sometimes called in a tight loop.
68 base::subtle::Atomic32 current_timing_enabled =
69 base::subtle::NoBarrier_Load(&g_profiler_timing_enabled);
70 if (current_timing_enabled == UNDEFINED_TIMING) {
71 if (!base::CommandLine::InitializedForCurrentProcess())
72 return true;
73 current_timing_enabled =
74 (base::CommandLine::ForCurrentProcess()->GetSwitchValueASCII(
75 switches::kProfilerTiming) ==
76 switches::kProfilerTimingDisabledValue)
77 ? DISABLED_TIMING
78 : ENABLED_TIMING;
79 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled,
80 current_timing_enabled);
81 }
82 return current_timing_enabled == ENABLED_TIMING;
83 }
84
85 } // namespace
86
87 //------------------------------------------------------------------------------
88 // DeathData tallies durations when a death takes place.
89
DeathData()90 DeathData::DeathData()
91 : count_(0),
92 sample_probability_count_(0),
93 run_duration_sum_(0),
94 queue_duration_sum_(0),
95 run_duration_max_(0),
96 queue_duration_max_(0),
97 run_duration_sample_(0),
98 queue_duration_sample_(0),
99 last_phase_snapshot_(nullptr) {
100 }
101
DeathData(const DeathData & other)102 DeathData::DeathData(const DeathData& other)
103 : count_(other.count_),
104 sample_probability_count_(other.sample_probability_count_),
105 run_duration_sum_(other.run_duration_sum_),
106 queue_duration_sum_(other.queue_duration_sum_),
107 run_duration_max_(other.run_duration_max_),
108 queue_duration_max_(other.queue_duration_max_),
109 run_duration_sample_(other.run_duration_sample_),
110 queue_duration_sample_(other.queue_duration_sample_),
111 last_phase_snapshot_(nullptr) {
112 // This constructor will be used by std::map when adding new DeathData values
113 // to the map. At that point, last_phase_snapshot_ is still NULL, so we don't
114 // need to worry about ownership transfer.
115 DCHECK(other.last_phase_snapshot_ == nullptr);
116 }
117
~DeathData()118 DeathData::~DeathData() {
119 while (last_phase_snapshot_) {
120 const DeathDataPhaseSnapshot* snapshot = last_phase_snapshot_;
121 last_phase_snapshot_ = snapshot->prev;
122 delete snapshot;
123 }
124 }
125
126 // TODO(jar): I need to see if this macro to optimize branching is worth using.
127 //
128 // This macro has no branching, so it is surely fast, and is equivalent to:
129 // if (assign_it)
130 // target = source;
131 // We use a macro rather than a template to force this to inline.
132 // Related code for calculating max is discussed on the web.
133 #define CONDITIONAL_ASSIGN(assign_it, target, source) \
134 ((target) ^= ((target) ^ (source)) & -static_cast<int32_t>(assign_it))
135
RecordDeath(const int32_t queue_duration,const int32_t run_duration,const uint32_t random_number)136 void DeathData::RecordDeath(const int32_t queue_duration,
137 const int32_t run_duration,
138 const uint32_t random_number) {
139 // We'll just clamp at INT_MAX, but we should note this in the UI as such.
140 if (count_ < INT_MAX)
141 base::subtle::NoBarrier_Store(&count_, count_ + 1);
142
143 int sample_probability_count =
144 base::subtle::NoBarrier_Load(&sample_probability_count_);
145 if (sample_probability_count < INT_MAX)
146 ++sample_probability_count;
147 base::subtle::NoBarrier_Store(&sample_probability_count_,
148 sample_probability_count);
149
150 base::subtle::NoBarrier_Store(&queue_duration_sum_,
151 queue_duration_sum_ + queue_duration);
152 base::subtle::NoBarrier_Store(&run_duration_sum_,
153 run_duration_sum_ + run_duration);
154
155 if (queue_duration_max() < queue_duration)
156 base::subtle::NoBarrier_Store(&queue_duration_max_, queue_duration);
157 if (run_duration_max() < run_duration)
158 base::subtle::NoBarrier_Store(&run_duration_max_, run_duration);
159
160 // Take a uniformly distributed sample over all durations ever supplied during
161 // the current profiling phase.
162 // The probability that we (instead) use this new sample is
163 // 1/sample_probability_count_. This results in a completely uniform selection
164 // of the sample (at least when we don't clamp sample_probability_count_...
165 // but that should be inconsequentially likely). We ignore the fact that we
166 // correlated our selection of a sample to the run and queue times (i.e., we
167 // used them to generate random_number).
168 CHECK_GT(sample_probability_count, 0);
169 if (0 == (random_number % sample_probability_count)) {
170 base::subtle::NoBarrier_Store(&queue_duration_sample_, queue_duration);
171 base::subtle::NoBarrier_Store(&run_duration_sample_, run_duration);
172 }
173 }
174
OnProfilingPhaseCompleted(int profiling_phase)175 void DeathData::OnProfilingPhaseCompleted(int profiling_phase) {
176 // Snapshotting and storing current state.
177 last_phase_snapshot_ = new DeathDataPhaseSnapshot(
178 profiling_phase, count(), run_duration_sum(), run_duration_max(),
179 run_duration_sample(), queue_duration_sum(), queue_duration_max(),
180 queue_duration_sample(), last_phase_snapshot_);
181
182 // Not touching fields for which a delta can be computed by comparing with a
183 // snapshot from the previous phase. Resetting other fields. Sample values
184 // will be reset upon next death recording because sample_probability_count_
185 // is set to 0.
186 // We avoid resetting to 0 in favor of deltas whenever possible. The reason
187 // is that for incrementable fields, resetting to 0 from the snapshot thread
188 // potentially in parallel with incrementing in the death thread may result in
189 // significant data corruption that has a potential to grow with time. Not
190 // resetting incrementable fields and using deltas will cause any
191 // off-by-little corruptions to be likely fixed at the next snapshot.
192 // The max values are not incrementable, and cannot be deduced using deltas
193 // for a given phase. Hence, we have to reset them to 0. But the potential
194 // damage is limited to getting the previous phase's max to apply for the next
195 // phase, and the error doesn't have a potential to keep growing with new
196 // resets.
197 // sample_probability_count_ is incrementable, but must be reset to 0 at the
198 // phase end, so that we start a new uniformly randomized sample selection
199 // after the reset. These fields are updated using atomics. However, race
200 // conditions are possible since these are updated individually and not
201 // together atomically, resulting in the values being mutually inconsistent.
202 // The damage is limited to selecting a wrong sample, which is not something
203 // that can cause accumulating or cascading effects.
204 // If there were no inconsistencies caused by race conditions, we never send a
205 // sample for the previous phase in the next phase's snapshot because
206 // ThreadData::SnapshotExecutedTasks doesn't send deltas with 0 count.
207 base::subtle::NoBarrier_Store(&sample_probability_count_, 0);
208 base::subtle::NoBarrier_Store(&run_duration_max_, 0);
209 base::subtle::NoBarrier_Store(&queue_duration_max_, 0);
210 }
211
212 //------------------------------------------------------------------------------
DeathDataSnapshot()213 DeathDataSnapshot::DeathDataSnapshot()
214 : count(-1),
215 run_duration_sum(-1),
216 run_duration_max(-1),
217 run_duration_sample(-1),
218 queue_duration_sum(-1),
219 queue_duration_max(-1),
220 queue_duration_sample(-1) {
221 }
222
DeathDataSnapshot(int count,int32_t run_duration_sum,int32_t run_duration_max,int32_t run_duration_sample,int32_t queue_duration_sum,int32_t queue_duration_max,int32_t queue_duration_sample)223 DeathDataSnapshot::DeathDataSnapshot(int count,
224 int32_t run_duration_sum,
225 int32_t run_duration_max,
226 int32_t run_duration_sample,
227 int32_t queue_duration_sum,
228 int32_t queue_duration_max,
229 int32_t queue_duration_sample)
230 : count(count),
231 run_duration_sum(run_duration_sum),
232 run_duration_max(run_duration_max),
233 run_duration_sample(run_duration_sample),
234 queue_duration_sum(queue_duration_sum),
235 queue_duration_max(queue_duration_max),
236 queue_duration_sample(queue_duration_sample) {}
237
~DeathDataSnapshot()238 DeathDataSnapshot::~DeathDataSnapshot() {
239 }
240
Delta(const DeathDataSnapshot & older) const241 DeathDataSnapshot DeathDataSnapshot::Delta(
242 const DeathDataSnapshot& older) const {
243 return DeathDataSnapshot(count - older.count,
244 run_duration_sum - older.run_duration_sum,
245 run_duration_max, run_duration_sample,
246 queue_duration_sum - older.queue_duration_sum,
247 queue_duration_max, queue_duration_sample);
248 }
249
250 //------------------------------------------------------------------------------
BirthOnThread(const Location & location,const ThreadData & current)251 BirthOnThread::BirthOnThread(const Location& location,
252 const ThreadData& current)
253 : location_(location),
254 birth_thread_(¤t) {
255 }
256
257 //------------------------------------------------------------------------------
BirthOnThreadSnapshot()258 BirthOnThreadSnapshot::BirthOnThreadSnapshot() {
259 }
260
BirthOnThreadSnapshot(const BirthOnThread & birth)261 BirthOnThreadSnapshot::BirthOnThreadSnapshot(const BirthOnThread& birth)
262 : location(birth.location()),
263 thread_name(birth.birth_thread()->thread_name()) {
264 }
265
~BirthOnThreadSnapshot()266 BirthOnThreadSnapshot::~BirthOnThreadSnapshot() {
267 }
268
269 //------------------------------------------------------------------------------
Births(const Location & location,const ThreadData & current)270 Births::Births(const Location& location, const ThreadData& current)
271 : BirthOnThread(location, current),
272 birth_count_(1) { }
273
birth_count() const274 int Births::birth_count() const { return birth_count_; }
275
RecordBirth()276 void Births::RecordBirth() { ++birth_count_; }
277
278 //------------------------------------------------------------------------------
279 // ThreadData maintains the central data for all births and deaths on a single
280 // thread.
281
282 // TODO(jar): We should pull all these static vars together, into a struct, and
283 // optimize layout so that we benefit from locality of reference during accesses
284 // to them.
285
286 // static
287 NowFunction* ThreadData::now_function_ = NULL;
288
289 // static
290 bool ThreadData::now_function_is_time_ = false;
291
292 // A TLS slot which points to the ThreadData instance for the current thread.
293 // We do a fake initialization here (zeroing out data), and then the real
294 // in-place construction happens when we call tls_index_.Initialize().
295 // static
296 base::ThreadLocalStorage::StaticSlot ThreadData::tls_index_ = TLS_INITIALIZER;
297
298 // static
299 int ThreadData::worker_thread_data_creation_count_ = 0;
300
301 // static
302 int ThreadData::cleanup_count_ = 0;
303
304 // static
305 int ThreadData::incarnation_counter_ = 0;
306
307 // static
308 ThreadData* ThreadData::all_thread_data_list_head_ = NULL;
309
310 // static
311 ThreadData* ThreadData::first_retired_worker_ = NULL;
312
313 // static
314 base::LazyInstance<base::Lock>::Leaky
315 ThreadData::list_lock_ = LAZY_INSTANCE_INITIALIZER;
316
317 // static
318 base::subtle::Atomic32 ThreadData::status_ = ThreadData::UNINITIALIZED;
319
ThreadData(const std::string & suggested_name)320 ThreadData::ThreadData(const std::string& suggested_name)
321 : next_(NULL),
322 next_retired_worker_(NULL),
323 worker_thread_number_(0),
324 incarnation_count_for_pool_(-1),
325 current_stopwatch_(NULL) {
326 DCHECK_GE(suggested_name.size(), 0u);
327 thread_name_ = suggested_name;
328 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_.
329 }
330
ThreadData(int thread_number)331 ThreadData::ThreadData(int thread_number)
332 : next_(NULL),
333 next_retired_worker_(NULL),
334 worker_thread_number_(thread_number),
335 incarnation_count_for_pool_(-1),
336 current_stopwatch_(NULL) {
337 CHECK_GT(thread_number, 0);
338 base::StringAppendF(&thread_name_, "WorkerThread-%d", thread_number);
339 PushToHeadOfList(); // Which sets real incarnation_count_for_pool_.
340 }
341
~ThreadData()342 ThreadData::~ThreadData() {
343 }
344
PushToHeadOfList()345 void ThreadData::PushToHeadOfList() {
346 // Toss in a hint of randomness (atop the uniniitalized value).
347 (void)VALGRIND_MAKE_MEM_DEFINED_IF_ADDRESSABLE(&random_number_,
348 sizeof(random_number_));
349 MSAN_UNPOISON(&random_number_, sizeof(random_number_));
350 random_number_ += static_cast<uint32_t>(this - static_cast<ThreadData*>(0));
351 random_number_ ^= (Now() - TrackedTime()).InMilliseconds();
352
353 DCHECK(!next_);
354 base::AutoLock lock(*list_lock_.Pointer());
355 incarnation_count_for_pool_ = incarnation_counter_;
356 next_ = all_thread_data_list_head_;
357 all_thread_data_list_head_ = this;
358 }
359
360 // static
first()361 ThreadData* ThreadData::first() {
362 base::AutoLock lock(*list_lock_.Pointer());
363 return all_thread_data_list_head_;
364 }
365
next() const366 ThreadData* ThreadData::next() const { return next_; }
367
368 // static
InitializeThreadContext(const std::string & suggested_name)369 void ThreadData::InitializeThreadContext(const std::string& suggested_name) {
370 Initialize();
371 ThreadData* current_thread_data =
372 reinterpret_cast<ThreadData*>(tls_index_.Get());
373 if (current_thread_data)
374 return; // Browser tests instigate this.
375 current_thread_data = new ThreadData(suggested_name);
376 tls_index_.Set(current_thread_data);
377 }
378
379 // static
Get()380 ThreadData* ThreadData::Get() {
381 if (!tls_index_.initialized())
382 return NULL; // For unittests only.
383 ThreadData* registered = reinterpret_cast<ThreadData*>(tls_index_.Get());
384 if (registered)
385 return registered;
386
387 // We must be a worker thread, since we didn't pre-register.
388 ThreadData* worker_thread_data = NULL;
389 int worker_thread_number = 0;
390 {
391 base::AutoLock lock(*list_lock_.Pointer());
392 if (first_retired_worker_) {
393 worker_thread_data = first_retired_worker_;
394 first_retired_worker_ = first_retired_worker_->next_retired_worker_;
395 worker_thread_data->next_retired_worker_ = NULL;
396 } else {
397 worker_thread_number = ++worker_thread_data_creation_count_;
398 }
399 }
400
401 // If we can't find a previously used instance, then we have to create one.
402 if (!worker_thread_data) {
403 DCHECK_GT(worker_thread_number, 0);
404 worker_thread_data = new ThreadData(worker_thread_number);
405 }
406 DCHECK_GT(worker_thread_data->worker_thread_number_, 0);
407
408 tls_index_.Set(worker_thread_data);
409 return worker_thread_data;
410 }
411
412 // static
OnThreadTermination(void * thread_data)413 void ThreadData::OnThreadTermination(void* thread_data) {
414 DCHECK(thread_data); // TLS should *never* call us with a NULL.
415 // We must NOT do any allocations during this callback. There is a chance
416 // that the allocator is no longer active on this thread.
417 reinterpret_cast<ThreadData*>(thread_data)->OnThreadTerminationCleanup();
418 }
419
OnThreadTerminationCleanup()420 void ThreadData::OnThreadTerminationCleanup() {
421 // The list_lock_ was created when we registered the callback, so it won't be
422 // allocated here despite the lazy reference.
423 base::AutoLock lock(*list_lock_.Pointer());
424 if (incarnation_counter_ != incarnation_count_for_pool_)
425 return; // ThreadData was constructed in an earlier unit test.
426 ++cleanup_count_;
427 // Only worker threads need to be retired and reused.
428 if (!worker_thread_number_) {
429 return;
430 }
431 // We must NOT do any allocations during this callback.
432 // Using the simple linked lists avoids all allocations.
433 DCHECK_EQ(this->next_retired_worker_, reinterpret_cast<ThreadData*>(NULL));
434 this->next_retired_worker_ = first_retired_worker_;
435 first_retired_worker_ = this;
436 }
437
438 // static
Snapshot(int current_profiling_phase,ProcessDataSnapshot * process_data_snapshot)439 void ThreadData::Snapshot(int current_profiling_phase,
440 ProcessDataSnapshot* process_data_snapshot) {
441 // Get an unchanging copy of a ThreadData list.
442 ThreadData* my_list = ThreadData::first();
443
444 // Gather data serially.
445 // This hackish approach *can* get some slightly corrupt tallies, as we are
446 // grabbing values without the protection of a lock, but it has the advantage
447 // of working even with threads that don't have message loops. If a user
448 // sees any strangeness, they can always just run their stats gathering a
449 // second time.
450 BirthCountMap birth_counts;
451 for (ThreadData* thread_data = my_list; thread_data;
452 thread_data = thread_data->next()) {
453 thread_data->SnapshotExecutedTasks(current_profiling_phase,
454 &process_data_snapshot->phased_snapshots,
455 &birth_counts);
456 }
457
458 // Add births that are still active -- i.e. objects that have tallied a birth,
459 // but have not yet tallied a matching death, and hence must be either
460 // running, queued up, or being held in limbo for future posting.
461 auto* current_phase_tasks =
462 &process_data_snapshot->phased_snapshots[current_profiling_phase].tasks;
463 for (const auto& birth_count : birth_counts) {
464 if (birth_count.second > 0) {
465 current_phase_tasks->push_back(
466 TaskSnapshot(BirthOnThreadSnapshot(*birth_count.first),
467 DeathDataSnapshot(birth_count.second, 0, 0, 0, 0, 0, 0),
468 "Still_Alive"));
469 }
470 }
471 }
472
473 // static
OnProfilingPhaseCompleted(int profiling_phase)474 void ThreadData::OnProfilingPhaseCompleted(int profiling_phase) {
475 // Get an unchanging copy of a ThreadData list.
476 ThreadData* my_list = ThreadData::first();
477
478 // Add snapshots for all instances of death data in all threads serially.
479 // This hackish approach *can* get some slightly corrupt tallies, as we are
480 // grabbing values without the protection of a lock, but it has the advantage
481 // of working even with threads that don't have message loops. Any corruption
482 // shouldn't cause "cascading damage" to anything else (in later phases).
483 for (ThreadData* thread_data = my_list; thread_data;
484 thread_data = thread_data->next()) {
485 thread_data->OnProfilingPhaseCompletedOnThread(profiling_phase);
486 }
487 }
488
TallyABirth(const Location & location)489 Births* ThreadData::TallyABirth(const Location& location) {
490 BirthMap::iterator it = birth_map_.find(location);
491 Births* child;
492 if (it != birth_map_.end()) {
493 child = it->second;
494 child->RecordBirth();
495 } else {
496 child = new Births(location, *this); // Leak this.
497 // Lock since the map may get relocated now, and other threads sometimes
498 // snapshot it (but they lock before copying it).
499 base::AutoLock lock(map_lock_);
500 birth_map_[location] = child;
501 }
502
503 return child;
504 }
505
TallyADeath(const Births & births,int32_t queue_duration,const TaskStopwatch & stopwatch)506 void ThreadData::TallyADeath(const Births& births,
507 int32_t queue_duration,
508 const TaskStopwatch& stopwatch) {
509 int32_t run_duration = stopwatch.RunDurationMs();
510
511 // Stir in some randomness, plus add constant in case durations are zero.
512 const uint32_t kSomePrimeNumber = 2147483647;
513 random_number_ += queue_duration + run_duration + kSomePrimeNumber;
514 // An address is going to have some randomness to it as well ;-).
515 random_number_ ^=
516 static_cast<uint32_t>(&births - reinterpret_cast<Births*>(0));
517
518 // We don't have queue durations without OS timer. OS timer is automatically
519 // used for task-post-timing, so the use of an alternate timer implies all
520 // queue times are invalid, unless it was explicitly said that we can trust
521 // the alternate timer.
522 if (kAllowAlternateTimeSourceHandling &&
523 now_function_ &&
524 !now_function_is_time_) {
525 queue_duration = 0;
526 }
527
528 DeathMap::iterator it = death_map_.find(&births);
529 DeathData* death_data;
530 if (it != death_map_.end()) {
531 death_data = &it->second;
532 } else {
533 base::AutoLock lock(map_lock_); // Lock as the map may get relocated now.
534 death_data = &death_map_[&births];
535 } // Release lock ASAP.
536 death_data->RecordDeath(queue_duration, run_duration, random_number_);
537 }
538
539 // static
TallyABirthIfActive(const Location & location)540 Births* ThreadData::TallyABirthIfActive(const Location& location) {
541 if (!TrackingStatus())
542 return NULL;
543 ThreadData* current_thread_data = Get();
544 if (!current_thread_data)
545 return NULL;
546 return current_thread_data->TallyABirth(location);
547 }
548
549 // static
TallyRunOnNamedThreadIfTracking(const base::TrackingInfo & completed_task,const TaskStopwatch & stopwatch)550 void ThreadData::TallyRunOnNamedThreadIfTracking(
551 const base::TrackingInfo& completed_task,
552 const TaskStopwatch& stopwatch) {
553 // Even if we have been DEACTIVATED, we will process any pending births so
554 // that our data structures (which counted the outstanding births) remain
555 // consistent.
556 const Births* births = completed_task.birth_tally;
557 if (!births)
558 return;
559 ThreadData* current_thread_data = stopwatch.GetThreadData();
560 if (!current_thread_data)
561 return;
562
563 // Watch out for a race where status_ is changing, and hence one or both
564 // of start_of_run or end_of_run is zero. In that case, we didn't bother to
565 // get a time value since we "weren't tracking" and we were trying to be
566 // efficient by not calling for a genuine time value. For simplicity, we'll
567 // use a default zero duration when we can't calculate a true value.
568 TrackedTime start_of_run = stopwatch.StartTime();
569 int32_t queue_duration = 0;
570 if (!start_of_run.is_null()) {
571 queue_duration = (start_of_run - completed_task.EffectiveTimePosted())
572 .InMilliseconds();
573 }
574 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
575 }
576
577 // static
TallyRunOnWorkerThreadIfTracking(const Births * births,const TrackedTime & time_posted,const TaskStopwatch & stopwatch)578 void ThreadData::TallyRunOnWorkerThreadIfTracking(
579 const Births* births,
580 const TrackedTime& time_posted,
581 const TaskStopwatch& stopwatch) {
582 // Even if we have been DEACTIVATED, we will process any pending births so
583 // that our data structures (which counted the outstanding births) remain
584 // consistent.
585 if (!births)
586 return;
587
588 // TODO(jar): Support the option to coalesce all worker-thread activity under
589 // one ThreadData instance that uses locks to protect *all* access. This will
590 // reduce memory (making it provably bounded), but run incrementally slower
591 // (since we'll use locks on TallyABirth and TallyADeath). The good news is
592 // that the locks on TallyADeath will be *after* the worker thread has run,
593 // and hence nothing will be waiting for the completion (... besides some
594 // other thread that might like to run). Also, the worker threads tasks are
595 // generally longer, and hence the cost of the lock may perchance be amortized
596 // over the long task's lifetime.
597 ThreadData* current_thread_data = stopwatch.GetThreadData();
598 if (!current_thread_data)
599 return;
600
601 TrackedTime start_of_run = stopwatch.StartTime();
602 int32_t queue_duration = 0;
603 if (!start_of_run.is_null()) {
604 queue_duration = (start_of_run - time_posted).InMilliseconds();
605 }
606 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
607 }
608
609 // static
TallyRunInAScopedRegionIfTracking(const Births * births,const TaskStopwatch & stopwatch)610 void ThreadData::TallyRunInAScopedRegionIfTracking(
611 const Births* births,
612 const TaskStopwatch& stopwatch) {
613 // Even if we have been DEACTIVATED, we will process any pending births so
614 // that our data structures (which counted the outstanding births) remain
615 // consistent.
616 if (!births)
617 return;
618
619 ThreadData* current_thread_data = stopwatch.GetThreadData();
620 if (!current_thread_data)
621 return;
622
623 int32_t queue_duration = 0;
624 current_thread_data->TallyADeath(*births, queue_duration, stopwatch);
625 }
626
SnapshotExecutedTasks(int current_profiling_phase,PhasedProcessDataSnapshotMap * phased_snapshots,BirthCountMap * birth_counts)627 void ThreadData::SnapshotExecutedTasks(
628 int current_profiling_phase,
629 PhasedProcessDataSnapshotMap* phased_snapshots,
630 BirthCountMap* birth_counts) {
631 // Get copy of data, so that the data will not change during the iterations
632 // and processing.
633 BirthMap birth_map;
634 DeathsSnapshot deaths;
635 SnapshotMaps(current_profiling_phase, &birth_map, &deaths);
636
637 for (const auto& birth : birth_map) {
638 (*birth_counts)[birth.second] += birth.second->birth_count();
639 }
640
641 for (const auto& death : deaths) {
642 (*birth_counts)[death.first] -= death.first->birth_count();
643
644 // For the current death data, walk through all its snapshots, starting from
645 // the current one, then from the previous profiling phase etc., and for
646 // each snapshot calculate the delta between the snapshot and the previous
647 // phase, if any. Store the deltas in the result.
648 for (const DeathDataPhaseSnapshot* phase = &death.second; phase;
649 phase = phase->prev) {
650 const DeathDataSnapshot& death_data =
651 phase->prev ? phase->death_data.Delta(phase->prev->death_data)
652 : phase->death_data;
653
654 if (death_data.count > 0) {
655 (*phased_snapshots)[phase->profiling_phase].tasks.push_back(
656 TaskSnapshot(BirthOnThreadSnapshot(*death.first), death_data,
657 thread_name()));
658 }
659 }
660 }
661 }
662
663 // This may be called from another thread.
SnapshotMaps(int profiling_phase,BirthMap * birth_map,DeathsSnapshot * deaths)664 void ThreadData::SnapshotMaps(int profiling_phase,
665 BirthMap* birth_map,
666 DeathsSnapshot* deaths) {
667 base::AutoLock lock(map_lock_);
668
669 for (const auto& birth : birth_map_)
670 (*birth_map)[birth.first] = birth.second;
671
672 for (const auto& death : death_map_) {
673 deaths->push_back(std::make_pair(
674 death.first,
675 DeathDataPhaseSnapshot(profiling_phase, death.second.count(),
676 death.second.run_duration_sum(),
677 death.second.run_duration_max(),
678 death.second.run_duration_sample(),
679 death.second.queue_duration_sum(),
680 death.second.queue_duration_max(),
681 death.second.queue_duration_sample(),
682 death.second.last_phase_snapshot())));
683 }
684 }
685
OnProfilingPhaseCompletedOnThread(int profiling_phase)686 void ThreadData::OnProfilingPhaseCompletedOnThread(int profiling_phase) {
687 base::AutoLock lock(map_lock_);
688
689 for (auto& death : death_map_) {
690 death.second.OnProfilingPhaseCompleted(profiling_phase);
691 }
692 }
693
OptionallyInitializeAlternateTimer()694 static void OptionallyInitializeAlternateTimer() {
695 NowFunction* alternate_time_source = GetAlternateTimeSource();
696 if (alternate_time_source)
697 ThreadData::SetAlternateTimeSource(alternate_time_source);
698 }
699
Initialize()700 void ThreadData::Initialize() {
701 if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
702 return; // Someone else did the initialization.
703 // Due to racy lazy initialization in tests, we'll need to recheck status_
704 // after we acquire the lock.
705
706 // Ensure that we don't double initialize tls. We are called when single
707 // threaded in the product, but some tests may be racy and lazy about our
708 // initialization.
709 base::AutoLock lock(*list_lock_.Pointer());
710 if (base::subtle::Acquire_Load(&status_) >= DEACTIVATED)
711 return; // Someone raced in here and beat us.
712
713 // Put an alternate timer in place if the environment calls for it, such as
714 // for tracking TCMalloc allocations. This insertion is idempotent, so we
715 // don't mind if there is a race, and we'd prefer not to be in a lock while
716 // doing this work.
717 if (kAllowAlternateTimeSourceHandling)
718 OptionallyInitializeAlternateTimer();
719
720 // Perform the "real" TLS initialization now, and leave it intact through
721 // process termination.
722 if (!tls_index_.initialized()) { // Testing may have initialized this.
723 DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), UNINITIALIZED);
724 tls_index_.Initialize(&ThreadData::OnThreadTermination);
725 DCHECK(tls_index_.initialized());
726 } else {
727 // TLS was initialzed for us earlier.
728 DCHECK_EQ(base::subtle::NoBarrier_Load(&status_), DORMANT_DURING_TESTS);
729 }
730
731 // Incarnation counter is only significant to testing, as it otherwise will
732 // never again change in this process.
733 ++incarnation_counter_;
734
735 // The lock is not critical for setting status_, but it doesn't hurt. It also
736 // ensures that if we have a racy initialization, that we'll bail as soon as
737 // we get the lock earlier in this method.
738 base::subtle::Release_Store(&status_, kInitialStartupState);
739 DCHECK(base::subtle::NoBarrier_Load(&status_) != UNINITIALIZED);
740 }
741
742 // static
InitializeAndSetTrackingStatus(Status status)743 void ThreadData::InitializeAndSetTrackingStatus(Status status) {
744 DCHECK_GE(status, DEACTIVATED);
745 DCHECK_LE(status, PROFILING_ACTIVE);
746
747 Initialize(); // No-op if already initialized.
748
749 if (status > DEACTIVATED)
750 status = PROFILING_ACTIVE;
751 base::subtle::Release_Store(&status_, status);
752 }
753
754 // static
status()755 ThreadData::Status ThreadData::status() {
756 return static_cast<ThreadData::Status>(base::subtle::Acquire_Load(&status_));
757 }
758
759 // static
TrackingStatus()760 bool ThreadData::TrackingStatus() {
761 return base::subtle::Acquire_Load(&status_) > DEACTIVATED;
762 }
763
764 // static
SetAlternateTimeSource(NowFunction * now_function)765 void ThreadData::SetAlternateTimeSource(NowFunction* now_function) {
766 DCHECK(now_function);
767 if (kAllowAlternateTimeSourceHandling)
768 now_function_ = now_function;
769 }
770
771 // static
EnableProfilerTiming()772 void ThreadData::EnableProfilerTiming() {
773 base::subtle::NoBarrier_Store(&g_profiler_timing_enabled, ENABLED_TIMING);
774 }
775
776 // static
Now()777 TrackedTime ThreadData::Now() {
778 if (kAllowAlternateTimeSourceHandling && now_function_)
779 return TrackedTime::FromMilliseconds((*now_function_)());
780 if (IsProfilerTimingEnabled() && TrackingStatus())
781 return TrackedTime::Now();
782 return TrackedTime(); // Super fast when disabled, or not compiled.
783 }
784
785 // static
EnsureCleanupWasCalled(int)786 void ThreadData::EnsureCleanupWasCalled(
787 int /* major_threads_shutdown_count */) {
788 base::AutoLock lock(*list_lock_.Pointer());
789 if (worker_thread_data_creation_count_ == 0)
790 return; // We haven't really run much, and couldn't have leaked.
791
792 // TODO(jar): until this is working on XP, don't run the real test.
793 #if 0
794 // Verify that we've at least shutdown/cleanup the major namesd threads. The
795 // caller should tell us how many thread shutdowns should have taken place by
796 // now.
797 CHECK_GT(cleanup_count_, major_threads_shutdown_count);
798 #endif
799 }
800
801 // static
ShutdownSingleThreadedCleanup(bool leak)802 void ThreadData::ShutdownSingleThreadedCleanup(bool leak) {
803 // This is only called from test code, where we need to cleanup so that
804 // additional tests can be run.
805 // We must be single threaded... but be careful anyway.
806 InitializeAndSetTrackingStatus(DEACTIVATED);
807
808 ThreadData* thread_data_list;
809 {
810 base::AutoLock lock(*list_lock_.Pointer());
811 thread_data_list = all_thread_data_list_head_;
812 all_thread_data_list_head_ = NULL;
813 ++incarnation_counter_;
814 // To be clean, break apart the retired worker list (though we leak them).
815 while (first_retired_worker_) {
816 ThreadData* worker = first_retired_worker_;
817 CHECK_GT(worker->worker_thread_number_, 0);
818 first_retired_worker_ = worker->next_retired_worker_;
819 worker->next_retired_worker_ = NULL;
820 }
821 }
822
823 // Put most global static back in pristine shape.
824 worker_thread_data_creation_count_ = 0;
825 cleanup_count_ = 0;
826 tls_index_.Set(NULL);
827 // Almost UNINITIALIZED.
828 base::subtle::Release_Store(&status_, DORMANT_DURING_TESTS);
829
830 // To avoid any chance of racing in unit tests, which is the only place we
831 // call this function, we may sometimes leak all the data structures we
832 // recovered, as they may still be in use on threads from prior tests!
833 if (leak) {
834 ThreadData* thread_data = thread_data_list;
835 while (thread_data) {
836 thread_data = thread_data->next();
837 }
838 return;
839 }
840
841 // When we want to cleanup (on a single thread), here is what we do.
842
843 // Do actual recursive delete in all ThreadData instances.
844 while (thread_data_list) {
845 ThreadData* next_thread_data = thread_data_list;
846 thread_data_list = thread_data_list->next();
847
848 for (BirthMap::iterator it = next_thread_data->birth_map_.begin();
849 next_thread_data->birth_map_.end() != it; ++it)
850 delete it->second; // Delete the Birth Records.
851 delete next_thread_data; // Includes all Death Records.
852 }
853 }
854
855 //------------------------------------------------------------------------------
TaskStopwatch()856 TaskStopwatch::TaskStopwatch()
857 : wallclock_duration_ms_(0),
858 current_thread_data_(NULL),
859 excluded_duration_ms_(0),
860 parent_(NULL) {
861 #if DCHECK_IS_ON()
862 state_ = CREATED;
863 child_ = NULL;
864 #endif
865 }
866
~TaskStopwatch()867 TaskStopwatch::~TaskStopwatch() {
868 #if DCHECK_IS_ON()
869 DCHECK(state_ != RUNNING);
870 DCHECK(child_ == NULL);
871 #endif
872 }
873
Start()874 void TaskStopwatch::Start() {
875 #if DCHECK_IS_ON()
876 DCHECK(state_ == CREATED);
877 state_ = RUNNING;
878 #endif
879
880 start_time_ = ThreadData::Now();
881
882 current_thread_data_ = ThreadData::Get();
883 if (!current_thread_data_)
884 return;
885
886 parent_ = current_thread_data_->current_stopwatch_;
887 #if DCHECK_IS_ON()
888 if (parent_) {
889 DCHECK(parent_->state_ == RUNNING);
890 DCHECK(parent_->child_ == NULL);
891 parent_->child_ = this;
892 }
893 #endif
894 current_thread_data_->current_stopwatch_ = this;
895 }
896
Stop()897 void TaskStopwatch::Stop() {
898 const TrackedTime end_time = ThreadData::Now();
899 #if DCHECK_IS_ON()
900 DCHECK(state_ == RUNNING);
901 state_ = STOPPED;
902 DCHECK(child_ == NULL);
903 #endif
904
905 if (!start_time_.is_null() && !end_time.is_null()) {
906 wallclock_duration_ms_ = (end_time - start_time_).InMilliseconds();
907 }
908
909 if (!current_thread_data_)
910 return;
911
912 DCHECK(current_thread_data_->current_stopwatch_ == this);
913 current_thread_data_->current_stopwatch_ = parent_;
914 if (!parent_)
915 return;
916
917 #if DCHECK_IS_ON()
918 DCHECK(parent_->state_ == RUNNING);
919 DCHECK(parent_->child_ == this);
920 parent_->child_ = NULL;
921 #endif
922 parent_->excluded_duration_ms_ += wallclock_duration_ms_;
923 parent_ = NULL;
924 }
925
StartTime() const926 TrackedTime TaskStopwatch::StartTime() const {
927 #if DCHECK_IS_ON()
928 DCHECK(state_ != CREATED);
929 #endif
930
931 return start_time_;
932 }
933
RunDurationMs() const934 int32_t TaskStopwatch::RunDurationMs() const {
935 #if DCHECK_IS_ON()
936 DCHECK(state_ == STOPPED);
937 #endif
938
939 return wallclock_duration_ms_ - excluded_duration_ms_;
940 }
941
GetThreadData() const942 ThreadData* TaskStopwatch::GetThreadData() const {
943 #if DCHECK_IS_ON()
944 DCHECK(state_ != CREATED);
945 #endif
946
947 return current_thread_data_;
948 }
949
950 //------------------------------------------------------------------------------
951 // DeathDataPhaseSnapshot
952
DeathDataPhaseSnapshot(int profiling_phase,int count,int32_t run_duration_sum,int32_t run_duration_max,int32_t run_duration_sample,int32_t queue_duration_sum,int32_t queue_duration_max,int32_t queue_duration_sample,const DeathDataPhaseSnapshot * prev)953 DeathDataPhaseSnapshot::DeathDataPhaseSnapshot(
954 int profiling_phase,
955 int count,
956 int32_t run_duration_sum,
957 int32_t run_duration_max,
958 int32_t run_duration_sample,
959 int32_t queue_duration_sum,
960 int32_t queue_duration_max,
961 int32_t queue_duration_sample,
962 const DeathDataPhaseSnapshot* prev)
963 : profiling_phase(profiling_phase),
964 death_data(count,
965 run_duration_sum,
966 run_duration_max,
967 run_duration_sample,
968 queue_duration_sum,
969 queue_duration_max,
970 queue_duration_sample),
971 prev(prev) {}
972
973 //------------------------------------------------------------------------------
974 // TaskSnapshot
975
TaskSnapshot()976 TaskSnapshot::TaskSnapshot() {
977 }
978
TaskSnapshot(const BirthOnThreadSnapshot & birth,const DeathDataSnapshot & death_data,const std::string & death_thread_name)979 TaskSnapshot::TaskSnapshot(const BirthOnThreadSnapshot& birth,
980 const DeathDataSnapshot& death_data,
981 const std::string& death_thread_name)
982 : birth(birth),
983 death_data(death_data),
984 death_thread_name(death_thread_name) {
985 }
986
~TaskSnapshot()987 TaskSnapshot::~TaskSnapshot() {
988 }
989
990 //------------------------------------------------------------------------------
991 // ProcessDataPhaseSnapshot
992
ProcessDataPhaseSnapshot()993 ProcessDataPhaseSnapshot::ProcessDataPhaseSnapshot() {
994 }
995
~ProcessDataPhaseSnapshot()996 ProcessDataPhaseSnapshot::~ProcessDataPhaseSnapshot() {
997 }
998
999 //------------------------------------------------------------------------------
1000 // ProcessDataPhaseSnapshot
1001
ProcessDataSnapshot()1002 ProcessDataSnapshot::ProcessDataSnapshot()
1003 #if !defined(OS_NACL)
1004 : process_id(base::GetCurrentProcId()) {
1005 #else
1006 : process_id(base::kNullProcessId) {
1007 #endif
1008 }
1009
1010 ProcessDataSnapshot::~ProcessDataSnapshot() {
1011 }
1012
1013 } // namespace tracked_objects
1014