1 /*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "thread_list.h"
18
19 #include <dirent.h>
20 #include <nativehelper/scoped_local_ref.h>
21 #include <nativehelper/scoped_utf_chars.h>
22 #include <sys/resource.h> // For getpriority()
23 #include <sys/types.h>
24 #include <unistd.h>
25
26 #include <map>
27 #include <sstream>
28 #include <tuple>
29 #include <vector>
30
31 #include "android-base/properties.h"
32 #include "android-base/stringprintf.h"
33 #include "art_field-inl.h"
34 #include "base/aborting.h"
35 #include "base/histogram-inl.h"
36 #include "base/mutex-inl.h"
37 #include "base/systrace.h"
38 #include "base/time_utils.h"
39 #include "base/timing_logger.h"
40 #include "debugger.h"
41 #include "gc/collector/concurrent_copying.h"
42 #include "gc/gc_pause_listener.h"
43 #include "gc/heap.h"
44 #include "gc/reference_processor.h"
45 #include "gc_root.h"
46 #include "jni/jni_internal.h"
47 #include "lock_word.h"
48 #include "mirror/string.h"
49 #include "monitor.h"
50 #include "native_stack_dump.h"
51 #include "obj_ptr-inl.h"
52 #include "scoped_thread_state_change-inl.h"
53 #include "thread.h"
54 #include "trace.h"
55 #include "unwindstack/AndroidUnwinder.h"
56 #include "well_known_classes.h"
57
58 #if ART_USE_FUTEXES
59 #include <linux/futex.h>
60 #include <sys/syscall.h>
61 #endif // ART_USE_FUTEXES
62
63 namespace art HIDDEN {
64
65 using android::base::StringPrintf;
66
67 static constexpr uint64_t kLongThreadSuspendThreshold = MsToNs(5);
68
69 // Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for
70 // some history.
71 static constexpr bool kDumpUnattachedThreadNativeStackForSigQuit = true;
72
ThreadList(uint64_t thread_suspend_timeout_ns)73 ThreadList::ThreadList(uint64_t thread_suspend_timeout_ns)
74 : suspend_all_count_(0),
75 unregistering_count_(0),
76 suspend_all_histogram_("suspend all histogram", 16, 64),
77 long_suspend_(false),
78 shut_down_(false),
79 thread_suspend_timeout_ns_(thread_suspend_timeout_ns),
80 empty_checkpoint_barrier_(new Barrier(0)) {
81 CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U)));
82 }
83
~ThreadList()84 ThreadList::~ThreadList() {
85 CHECK(shut_down_);
86 }
87
ShutDown()88 void ThreadList::ShutDown() {
89 ScopedTrace trace(__PRETTY_FUNCTION__);
90 // Detach the current thread if necessary. If we failed to start, there might not be any threads.
91 // We need to detach the current thread here in case there's another thread waiting to join with
92 // us.
93 bool contains = false;
94 Thread* self = Thread::Current();
95 {
96 MutexLock mu(self, *Locks::thread_list_lock_);
97 contains = Contains(self);
98 }
99 if (contains) {
100 Runtime::Current()->DetachCurrentThread();
101 }
102 WaitForOtherNonDaemonThreadsToExit();
103 // The only caller of this function, ~Runtime, has already disabled GC and
104 // ensured that the last GC is finished.
105 gc::Heap* const heap = Runtime::Current()->GetHeap();
106 CHECK(heap->IsGCDisabledForShutdown());
107
108 // TODO: there's an unaddressed race here where a thread may attach during shutdown, see
109 // Thread::Init.
110 SuspendAllDaemonThreadsForShutdown();
111
112 shut_down_ = true;
113 }
114
Contains(Thread * thread)115 bool ThreadList::Contains(Thread* thread) {
116 return find(list_.begin(), list_.end(), thread) != list_.end();
117 }
118
GetLockOwner()119 pid_t ThreadList::GetLockOwner() {
120 return Locks::thread_list_lock_->GetExclusiveOwnerTid();
121 }
122
DumpNativeStacks(std::ostream & os)123 void ThreadList::DumpNativeStacks(std::ostream& os) {
124 MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
125 unwindstack::AndroidLocalUnwinder unwinder;
126 for (const auto& thread : list_) {
127 os << "DUMPING THREAD " << thread->GetTid() << "\n";
128 DumpNativeStack(os, unwinder, thread->GetTid(), "\t");
129 os << "\n";
130 }
131 }
132
DumpForSigQuit(std::ostream & os)133 void ThreadList::DumpForSigQuit(std::ostream& os) {
134 {
135 ScopedObjectAccess soa(Thread::Current());
136 // Only print if we have samples.
137 if (suspend_all_histogram_.SampleSize() > 0) {
138 Histogram<uint64_t>::CumulativeData data;
139 suspend_all_histogram_.CreateHistogram(&data);
140 suspend_all_histogram_.PrintConfidenceIntervals(os, 0.99, data); // Dump time to suspend.
141 }
142 }
143 bool dump_native_stack = Runtime::Current()->GetDumpNativeStackOnSigQuit();
144 Dump(os, dump_native_stack);
145 DumpUnattachedThreads(os, dump_native_stack && kDumpUnattachedThreadNativeStackForSigQuit);
146 }
147
DumpUnattachedThread(std::ostream & os,pid_t tid,bool dump_native_stack)148 static void DumpUnattachedThread(std::ostream& os, pid_t tid, bool dump_native_stack)
149 NO_THREAD_SAFETY_ANALYSIS {
150 // TODO: No thread safety analysis as DumpState with a null thread won't access fields, should
151 // refactor DumpState to avoid skipping analysis.
152 Thread::DumpState(os, nullptr, tid);
153 if (dump_native_stack) {
154 DumpNativeStack(os, tid, " native: ");
155 }
156 os << std::endl;
157 }
158
DumpUnattachedThreads(std::ostream & os,bool dump_native_stack)159 void ThreadList::DumpUnattachedThreads(std::ostream& os, bool dump_native_stack) {
160 DIR* d = opendir("/proc/self/task");
161 if (!d) {
162 return;
163 }
164
165 Thread* self = Thread::Current();
166 dirent* e;
167 while ((e = readdir(d)) != nullptr) {
168 char* end;
169 pid_t tid = strtol(e->d_name, &end, 10);
170 if (!*end) {
171 Thread* thread;
172 {
173 MutexLock mu(self, *Locks::thread_list_lock_);
174 thread = FindThreadByTid(tid);
175 }
176 if (thread == nullptr) {
177 DumpUnattachedThread(os, tid, dump_native_stack);
178 }
179 }
180 }
181 closedir(d);
182 }
183
184 // Dump checkpoint timeout in milliseconds. Larger amount on the target, since the device could be
185 // overloaded with ANR dumps.
186 static constexpr uint32_t kDumpWaitTimeout = kIsTargetBuild ? 100000 : 20000;
187
188 // A closure used by Thread::Dump.
189 class DumpCheckpoint final : public Closure {
190 public:
DumpCheckpoint(bool dump_native_stack)191 DumpCheckpoint(bool dump_native_stack)
192 : lock_("Dump checkpoint lock", kGenericBottomLock),
193 os_(),
194 // Avoid verifying count in case a thread doesn't end up passing through the barrier.
195 // This avoids a SIGABRT that would otherwise happen in the destructor.
196 barrier_(0, /*verify_count_on_shutdown=*/false),
197 unwinder_(std::vector<std::string>{}, std::vector<std::string> {"oat", "odex"}),
198 dump_native_stack_(dump_native_stack) {
199 }
200
Run(Thread * thread)201 void Run(Thread* thread) override {
202 // Note thread and self may not be equal if thread was already suspended at the point of the
203 // request.
204 Thread* self = Thread::Current();
205 CHECK(self != nullptr);
206 std::ostringstream local_os;
207 Locks::mutator_lock_->AssertSharedHeld(self);
208 Thread::DumpOrder dump_order = thread->Dump(local_os, unwinder_, dump_native_stack_);
209 {
210 MutexLock mu(self, lock_);
211 // Sort, so that the most interesting threads for ANR are printed first (ANRs can be trimmed).
212 std::pair<Thread::DumpOrder, uint32_t> sort_key(dump_order, thread->GetThreadId());
213 os_.emplace(sort_key, std::move(local_os));
214 }
215 barrier_.Pass(self);
216 }
217
218 // Called at the end to print all the dumps in sequential prioritized order.
Dump(Thread * self,std::ostream & os)219 void Dump(Thread* self, std::ostream& os) {
220 MutexLock mu(self, lock_);
221 for (const auto& it : os_) {
222 os << it.second.str() << std::endl;
223 }
224 }
225
WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint)226 void WaitForThreadsToRunThroughCheckpoint(size_t threads_running_checkpoint) {
227 Thread* self = Thread::Current();
228 ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun);
229 bool timed_out = barrier_.Increment(self, threads_running_checkpoint, kDumpWaitTimeout);
230 if (timed_out) {
231 // Avoid a recursive abort.
232 LOG((kIsDebugBuild && (gAborting == 0)) ? ::android::base::FATAL : ::android::base::ERROR)
233 << "Unexpected time out during dump checkpoint.";
234 }
235 }
236
237 private:
238 // Storage for the per-thread dumps (guarded by lock since they are generated in parallel).
239 // Map is used to obtain sorted order. The key is unique, but use multimap just in case.
240 Mutex lock_;
241 std::multimap<std::pair<Thread::DumpOrder, uint32_t>, std::ostringstream> os_ GUARDED_BY(lock_);
242 // The barrier to be passed through and for the requestor to wait upon.
243 Barrier barrier_;
244 // A backtrace map, so that all threads use a shared info and don't reacquire/parse separately.
245 unwindstack::AndroidLocalUnwinder unwinder_;
246 // Whether we should dump the native stack.
247 const bool dump_native_stack_;
248 };
249
Dump(std::ostream & os,bool dump_native_stack)250 void ThreadList::Dump(std::ostream& os, bool dump_native_stack) {
251 Thread* self = Thread::Current();
252 {
253 MutexLock mu(self, *Locks::thread_list_lock_);
254 os << "DALVIK THREADS (" << list_.size() << "):\n";
255 }
256 if (self != nullptr) {
257 // Dump() can be called in any mutator lock state.
258 bool mutator_lock_held = Locks::mutator_lock_->IsSharedHeld(self);
259 DumpCheckpoint checkpoint(dump_native_stack);
260 // Acquire mutator lock separately for each thread, to avoid long runnable code sequence
261 // without suspend checks.
262 size_t threads_running_checkpoint =
263 RunCheckpoint(&checkpoint,
264 nullptr,
265 true,
266 /* acquire_mutator_lock= */ !mutator_lock_held);
267 if (threads_running_checkpoint != 0) {
268 checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint);
269 }
270 checkpoint.Dump(self, os);
271 } else {
272 DumpUnattachedThreads(os, dump_native_stack);
273 }
274 }
275
AssertOtherThreadsAreSuspended(Thread * self)276 void ThreadList::AssertOtherThreadsAreSuspended(Thread* self) {
277 MutexLock mu(self, *Locks::thread_list_lock_);
278 MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
279 for (const auto& thread : list_) {
280 if (thread != self) {
281 CHECK(thread->IsSuspended())
282 << "\nUnsuspended thread: <<" << *thread << "\n"
283 << "self: <<" << *Thread::Current();
284 }
285 }
286 }
287
288 #if HAVE_TIMED_RWLOCK
289 // Attempt to rectify locks so that we dump thread list with required locks before exiting.
UnsafeLogFatalForThreadSuspendAllTimeout()290 NO_RETURN static void UnsafeLogFatalForThreadSuspendAllTimeout() {
291 // Increment gAborting before doing the thread list dump since we don't want any failures from
292 // AssertThreadSuspensionIsAllowable in cases where thread suspension is not allowed.
293 // See b/69044468.
294 ++gAborting;
295 Runtime* runtime = Runtime::Current();
296 std::ostringstream ss;
297 ss << "Thread suspend timeout\n";
298 Locks::mutator_lock_->Dump(ss);
299 ss << "\n";
300 runtime->GetThreadList()->Dump(ss);
301 --gAborting;
302 LOG(FATAL) << ss.str();
303 exit(0);
304 }
305 #endif
306
RunCheckpoint(Closure * checkpoint_function,Closure * callback,bool allow_lock_checking,bool acquire_mutator_lock)307 size_t ThreadList::RunCheckpoint(Closure* checkpoint_function,
308 Closure* callback,
309 bool allow_lock_checking,
310 bool acquire_mutator_lock) {
311 Thread* self = Thread::Current();
312 Locks::mutator_lock_->AssertNotExclusiveHeld(self);
313 Locks::thread_list_lock_->AssertNotHeld(self);
314 Locks::thread_suspend_count_lock_->AssertNotHeld(self);
315 if (kIsDebugBuild && allow_lock_checking && !acquire_mutator_lock) {
316 // TODO: Consider better checking with acquire_mutator_lock.
317 self->DisallowPreMonitorMutexes();
318 }
319
320 std::vector<Thread*> remaining_threads;
321 size_t count = 0;
322 bool mutator_lock_held = Locks::mutator_lock_->IsSharedHeld(self);
323 ThreadState old_thread_state = self->GetState();
324 DCHECK(!(mutator_lock_held && acquire_mutator_lock));
325
326 // Thread-safety analysis wants the lock state to always be the same at every program point.
327 // Allow us to pretend it is.
328 auto fake_mutator_lock = []() SHARED_LOCK_FUNCTION(Locks::mutator_lock_)
329 NO_THREAD_SAFETY_ANALYSIS {};
330 auto fake_mutator_unlock = []() UNLOCK_FUNCTION(Locks::mutator_lock_)
331 NO_THREAD_SAFETY_ANALYSIS {};
332
333 if (acquire_mutator_lock) {
334 self->TransitionFromSuspendedToRunnable();
335 } else {
336 fake_mutator_lock();
337 }
338 Locks::thread_list_lock_->Lock(self);
339 Locks::thread_suspend_count_lock_->Lock(self);
340
341 // First try to install checkpoint function in each thread. This will succeed only for
342 // runnable threads. Track others in remaining_threads.
343 count = list_.size();
344 for (const auto& thread : list_) {
345 if (thread != self) {
346 if (thread->RequestCheckpoint(checkpoint_function)) {
347 // This thread will run its checkpoint some time in the near future.
348 } else {
349 remaining_threads.push_back(thread);
350 }
351 }
352 // Thread either has honored or will honor the checkpoint, or it has been added to
353 // remaining_threads.
354 }
355
356 // ith entry corresponds to remaining_threads[i]:
357 std::unique_ptr<ThreadExitFlag[]> tefs(new ThreadExitFlag[remaining_threads.size()]);
358
359 // Register a ThreadExitFlag for each remaining thread.
360 for (size_t i = 0; i < remaining_threads.size(); ++i) {
361 remaining_threads[i]->NotifyOnThreadExit(&tefs[i]);
362 }
363
364 // Run the callback to be called inside this critical section.
365 if (callback != nullptr) {
366 callback->Run(self);
367 }
368
369 size_t nthreads = remaining_threads.size();
370 size_t starting_thread = 0;
371 size_t next_starting_thread; // First possible remaining non-null entry in remaining_threads.
372 // Run the checkpoint for the suspended threads.
373 do {
374 // We hold mutator_lock_ (if desired), thread_list_lock_, and suspend_count_lock_
375 next_starting_thread = nthreads;
376 for (size_t i = 0; i < nthreads; ++i) {
377 Thread* thread = remaining_threads[i];
378 if (thread == nullptr) {
379 continue;
380 }
381 if (tefs[i].HasExited()) {
382 remaining_threads[i] = nullptr;
383 --count;
384 continue;
385 }
386 bool was_runnable = thread->RequestCheckpoint(checkpoint_function);
387 if (was_runnable) {
388 // Thread became runnable, and will run the checkpoint; we're done.
389 thread->UnregisterThreadExitFlag(&tefs[i]);
390 remaining_threads[i] = nullptr;
391 continue;
392 }
393 // Thread was still suspended, as expected.
394 // We need to run the checkpoint ourselves. Suspend thread so it stays suspended.
395 thread->IncrementSuspendCount(self);
396 if (LIKELY(thread->IsSuspended())) {
397 // Run the checkpoint function ourselves.
398 // We need to run the checkpoint function without the thread_list and suspend_count locks.
399 Locks::thread_suspend_count_lock_->Unlock(self);
400 Locks::thread_list_lock_->Unlock(self);
401 if (mutator_lock_held || acquire_mutator_lock) {
402 // Make sure there is no pending flip function before running Java-heap-accessing
403 // checkpoint on behalf of thread.
404 Thread::EnsureFlipFunctionStarted(self, thread);
405 if (thread->GetStateAndFlags(std::memory_order_acquire)
406 .IsAnyOfFlagsSet(Thread::FlipFunctionFlags())) {
407 // There is another thread running the flip function for 'thread'.
408 // Instead of waiting for it to complete, move to the next thread.
409 // Retry this one later from scratch.
410 next_starting_thread = std::min(next_starting_thread, i);
411 Locks::thread_list_lock_->Lock(self);
412 Locks::thread_suspend_count_lock_->Lock(self);
413 thread->DecrementSuspendCount(self);
414 Thread::resume_cond_->Broadcast(self);
415 continue;
416 }
417 } // O.w. the checkpoint will not access Java data structures, and doesn't care whether
418 // the flip function has been called.
419 checkpoint_function->Run(thread);
420 if (acquire_mutator_lock) {
421 {
422 MutexLock mu3(self, *Locks::thread_suspend_count_lock_);
423 thread->DecrementSuspendCount(self);
424 // In the case of a thread waiting for IO or the like, there will be no waiters
425 // on resume_cond_, so Broadcast() will not enter the kernel, and thus be cheap.
426 Thread::resume_cond_->Broadcast(self);
427 }
428 {
429 // Allow us to run checkpoints, or be suspended between checkpoint invocations.
430 ScopedThreadSuspension sts(self, old_thread_state);
431 }
432 Locks::thread_list_lock_->Lock(self);
433 Locks::thread_suspend_count_lock_->Lock(self);
434 } else {
435 Locks::thread_list_lock_->Lock(self);
436 Locks::thread_suspend_count_lock_->Lock(self);
437 thread->DecrementSuspendCount(self);
438 Thread::resume_cond_->Broadcast(self);
439 }
440 thread->UnregisterThreadExitFlag(&tefs[i]);
441 remaining_threads[i] = nullptr;
442 } else {
443 // Thread may have become runnable between the time we last checked and
444 // the time we incremented the suspend count. We defer to the next attempt, rather than
445 // waiting for it to suspend. Note that this may still unnecessarily trigger a signal
446 // handler, but it should be exceedingly rare.
447 thread->DecrementSuspendCount(self);
448 Thread::resume_cond_->Broadcast(self);
449 next_starting_thread = std::min(next_starting_thread, i);
450 }
451 }
452 starting_thread = next_starting_thread;
453 } while (starting_thread != nthreads);
454
455 // Finally run the checkpoint on ourself. We will already have run the flip function, if we're
456 // runnable.
457 Locks::thread_list_lock_->Unlock(self);
458 Locks::thread_suspend_count_lock_->Unlock(self);
459 checkpoint_function->Run(self);
460
461 if (acquire_mutator_lock) {
462 self->TransitionFromRunnableToSuspended(old_thread_state);
463 } else {
464 fake_mutator_unlock();
465 }
466
467 DCHECK(std::all_of(remaining_threads.cbegin(), remaining_threads.cend(), [](Thread* thread) {
468 return thread == nullptr;
469 }));
470 Thread::DCheckUnregisteredEverywhere(&tefs[0], &tefs[nthreads - 1]);
471
472 if (kIsDebugBuild && allow_lock_checking & !acquire_mutator_lock) {
473 self->AllowPreMonitorMutexes();
474 }
475 return count;
476 }
477
RunEmptyCheckpoint()478 void ThreadList::RunEmptyCheckpoint() {
479 Thread* self = Thread::Current();
480 Locks::mutator_lock_->AssertNotExclusiveHeld(self);
481 Locks::thread_list_lock_->AssertNotHeld(self);
482 Locks::thread_suspend_count_lock_->AssertNotHeld(self);
483 std::vector<uint32_t> runnable_thread_ids;
484 size_t count = 0;
485 Barrier* barrier = empty_checkpoint_barrier_.get();
486 barrier->Init(self, 0);
487 {
488 MutexLock mu(self, *Locks::thread_list_lock_);
489 MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
490 for (Thread* thread : list_) {
491 if (thread != self) {
492 while (true) {
493 if (thread->RequestEmptyCheckpoint()) {
494 // This thread will run an empty checkpoint (decrement the empty checkpoint barrier)
495 // some time in the near future.
496 ++count;
497 if (kIsDebugBuild) {
498 runnable_thread_ids.push_back(thread->GetThreadId());
499 }
500 break;
501 }
502 if (thread->GetState() != ThreadState::kRunnable) {
503 // It's seen suspended, we are done because it must not be in the middle of a mutator
504 // heap access.
505 break;
506 }
507 }
508 }
509 }
510 }
511
512 // Wake up the threads blocking for weak ref access so that they will respond to the empty
513 // checkpoint request. Otherwise we will hang as they are blocking in the kRunnable state.
514 Runtime::Current()->GetHeap()->GetReferenceProcessor()->BroadcastForSlowPath(self);
515 Runtime::Current()->BroadcastForNewSystemWeaks(/*broadcast_for_checkpoint=*/true);
516 {
517 ScopedThreadStateChange tsc(self, ThreadState::kWaitingForCheckPointsToRun);
518 uint64_t total_wait_time = 0;
519 bool first_iter = true;
520 while (true) {
521 // Wake up the runnable threads blocked on the mutexes that another thread, which is blocked
522 // on a weak ref access, holds (indirectly blocking for weak ref access through another thread
523 // and a mutex.) This needs to be done periodically because the thread may be preempted
524 // between the CheckEmptyCheckpointFromMutex call and the subsequent futex wait in
525 // Mutex::ExclusiveLock, etc. when the wakeup via WakeupToRespondToEmptyCheckpoint
526 // arrives. This could cause a *very rare* deadlock, if not repeated. Most of the cases are
527 // handled in the first iteration.
528 for (BaseMutex* mutex : Locks::expected_mutexes_on_weak_ref_access_) {
529 mutex->WakeupToRespondToEmptyCheckpoint();
530 }
531 static constexpr uint64_t kEmptyCheckpointPeriodicTimeoutMs = 100; // 100ms
532 static constexpr uint64_t kEmptyCheckpointTotalTimeoutMs = 600 * 1000; // 10 minutes.
533 size_t barrier_count = first_iter ? count : 0;
534 first_iter = false; // Don't add to the barrier count from the second iteration on.
535 bool timed_out = barrier->Increment(self, barrier_count, kEmptyCheckpointPeriodicTimeoutMs);
536 if (!timed_out) {
537 break; // Success
538 }
539 // This is a very rare case.
540 total_wait_time += kEmptyCheckpointPeriodicTimeoutMs;
541 if (kIsDebugBuild && total_wait_time > kEmptyCheckpointTotalTimeoutMs) {
542 std::ostringstream ss;
543 ss << "Empty checkpoint timeout\n";
544 ss << "Barrier count " << barrier->GetCount(self) << "\n";
545 ss << "Runnable thread IDs";
546 for (uint32_t tid : runnable_thread_ids) {
547 ss << " " << tid;
548 }
549 ss << "\n";
550 Locks::mutator_lock_->Dump(ss);
551 ss << "\n";
552 LOG(FATAL_WITHOUT_ABORT) << ss.str();
553 // Some threads in 'runnable_thread_ids' are probably stuck. Try to dump their stacks.
554 // Avoid using ThreadList::Dump() initially because it is likely to get stuck as well.
555 {
556 ScopedObjectAccess soa(self);
557 MutexLock mu1(self, *Locks::thread_list_lock_);
558 for (Thread* thread : GetList()) {
559 uint32_t tid = thread->GetThreadId();
560 bool is_in_runnable_thread_ids =
561 std::find(runnable_thread_ids.begin(), runnable_thread_ids.end(), tid) !=
562 runnable_thread_ids.end();
563 if (is_in_runnable_thread_ids &&
564 thread->ReadFlag(ThreadFlag::kEmptyCheckpointRequest, std::memory_order_relaxed)) {
565 // Found a runnable thread that hasn't responded to the empty checkpoint request.
566 // Assume it's stuck and safe to dump its stack.
567 thread->Dump(LOG_STREAM(FATAL_WITHOUT_ABORT),
568 /*dump_native_stack=*/ true,
569 /*force_dump_stack=*/ true);
570 }
571 }
572 }
573 LOG(FATAL_WITHOUT_ABORT)
574 << "Dumped runnable threads that haven't responded to empty checkpoint.";
575 // Now use ThreadList::Dump() to dump more threads, noting it may get stuck.
576 Dump(LOG_STREAM(FATAL_WITHOUT_ABORT));
577 LOG(FATAL) << "Dumped all threads.";
578 }
579 }
580 }
581 }
582
583 // Separate function to disable just the right amount of thread-safety analysis.
AcquireMutatorLockSharedUncontended(Thread * self)584 ALWAYS_INLINE void AcquireMutatorLockSharedUncontended(Thread* self)
585 ACQUIRE_SHARED(*Locks::mutator_lock_) NO_THREAD_SAFETY_ANALYSIS {
586 bool success = Locks::mutator_lock_->SharedTryLock(self, /*check=*/false);
587 CHECK(success);
588 }
589
590 // A checkpoint/suspend-all hybrid to switch thread roots from
591 // from-space to to-space refs. Used to synchronize threads at a point
592 // to mark the initiation of marking while maintaining the to-space
593 // invariant.
FlipThreadRoots(Closure * thread_flip_visitor,Closure * flip_callback,gc::collector::GarbageCollector * collector,gc::GcPauseListener * pause_listener)594 void ThreadList::FlipThreadRoots(Closure* thread_flip_visitor,
595 Closure* flip_callback,
596 gc::collector::GarbageCollector* collector,
597 gc::GcPauseListener* pause_listener) {
598 TimingLogger::ScopedTiming split("ThreadListFlip", collector->GetTimings());
599 Thread* self = Thread::Current();
600 Locks::mutator_lock_->AssertNotHeld(self);
601 Locks::thread_list_lock_->AssertNotHeld(self);
602 Locks::thread_suspend_count_lock_->AssertNotHeld(self);
603 CHECK_NE(self->GetState(), ThreadState::kRunnable);
604
605 collector->GetHeap()->ThreadFlipBegin(self); // Sync with JNI critical calls.
606
607 // ThreadFlipBegin happens before we suspend all the threads, so it does not
608 // count towards the pause.
609 const uint64_t suspend_start_time = NanoTime();
610 VLOG(threads) << "Suspending all for thread flip";
611 {
612 ScopedTrace trace("ThreadFlipSuspendAll");
613 SuspendAllInternal(self);
614 }
615
616 std::vector<Thread*> flipping_threads; // All suspended threads. Includes us.
617 int thread_count;
618 // Flipping threads might exit between the time we resume them and try to run the flip function.
619 // Track that in a parallel vector.
620 std::unique_ptr<ThreadExitFlag[]> exit_flags;
621
622 {
623 TimingLogger::ScopedTiming t("FlipThreadSuspension", collector->GetTimings());
624 if (pause_listener != nullptr) {
625 pause_listener->StartPause();
626 }
627
628 // Run the flip callback for the collector.
629 Locks::mutator_lock_->ExclusiveLock(self);
630 suspend_all_histogram_.AdjustAndAddValue(NanoTime() - suspend_start_time);
631 flip_callback->Run(self);
632
633 {
634 MutexLock mu(self, *Locks::thread_list_lock_);
635 MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
636 thread_count = list_.size();
637 exit_flags.reset(new ThreadExitFlag[thread_count]);
638 flipping_threads.resize(thread_count, nullptr);
639 int i = 1;
640 for (Thread* thread : list_) {
641 // Set the flip function for all threads because once we start resuming any threads,
642 // they may need to run the flip function on behalf of other threads, even this one.
643 DCHECK(thread == self || thread->IsSuspended());
644 thread->SetFlipFunction(thread_flip_visitor);
645 // Put ourselves first, so other threads are more likely to have finished before we get
646 // there.
647 int thread_index = thread == self ? 0 : i++;
648 flipping_threads[thread_index] = thread;
649 thread->NotifyOnThreadExit(&exit_flags[thread_index]);
650 }
651 DCHECK(i == thread_count);
652 }
653
654 if (pause_listener != nullptr) {
655 pause_listener->EndPause();
656 }
657 }
658 // Any new threads created after this will be created by threads that already ran their flip
659 // functions. In the normal GC use case in which the flip function converts all local references
660 // to to-space references, these newly created threads will also see only to-space references.
661
662 // Resume threads, making sure that we do not release suspend_count_lock_ until we've reacquired
663 // the mutator_lock_ in shared mode, and decremented suspend_all_count_. This avoids a
664 // concurrent SuspendAll, and ensures that newly started threads see a correct value of
665 // suspend_all_count.
666 {
667 MutexLock mu(self, *Locks::thread_list_lock_);
668 Locks::thread_suspend_count_lock_->Lock(self);
669 ResumeAllInternal(self);
670 }
671 collector->RegisterPause(NanoTime() - suspend_start_time);
672
673 // Since all threads were suspended, they will attempt to run the flip function before
674 // reentering a runnable state. We will also attempt to run the flip functions ourselves. Any
675 // intervening checkpoint request will do the same. Exactly one of those flip function attempts
676 // will succeed, and the target thread will not be able to reenter a runnable state until one of
677 // them does.
678
679 // Try to run the closure on the other threads.
680 TimingLogger::ScopedTiming split3("RunningThreadFlips", collector->GetTimings());
681 // Reacquire the mutator lock while holding suspend_count_lock. This cannot fail, since we
682 // do not acquire the mutator lock unless suspend_all_count was read as 0 while holding
683 // suspend_count_lock. We did not release suspend_count_lock since releasing the mutator
684 // lock.
685 AcquireMutatorLockSharedUncontended(self);
686
687 Locks::thread_suspend_count_lock_->Unlock(self);
688 // Concurrent SuspendAll may now see zero suspend_all_count_, but block on mutator_lock_.
689
690 collector->GetHeap()->ThreadFlipEnd(self);
691
692 for (int i = 0; i < thread_count; ++i) {
693 bool finished;
694 Thread::EnsureFlipFunctionStarted(
695 self, flipping_threads[i], Thread::StateAndFlags(0), &exit_flags[i], &finished);
696 if (finished) {
697 MutexLock mu2(self, *Locks::thread_list_lock_);
698 flipping_threads[i]->UnregisterThreadExitFlag(&exit_flags[i]);
699 flipping_threads[i] = nullptr;
700 }
701 }
702 // Make sure all flips complete before we return.
703 for (int i = 0; i < thread_count; ++i) {
704 if (UNLIKELY(flipping_threads[i] != nullptr)) {
705 flipping_threads[i]->WaitForFlipFunctionTestingExited(self, &exit_flags[i]);
706 MutexLock mu2(self, *Locks::thread_list_lock_);
707 flipping_threads[i]->UnregisterThreadExitFlag(&exit_flags[i]);
708 }
709 }
710
711 Thread::DCheckUnregisteredEverywhere(&exit_flags[0], &exit_flags[thread_count - 1]);
712
713 Locks::mutator_lock_->SharedUnlock(self);
714 }
715
716 // True only for debugging suspend timeout code. The resulting timeouts are short enough that
717 // failures are expected.
718 static constexpr bool kShortSuspendTimeouts = false;
719
720 static constexpr unsigned kSuspendBarrierIters = kShortSuspendTimeouts ? 5 : 20;
721
722 #if ART_USE_FUTEXES
723
724 // Returns true if it timed out. Times out after timeout_ns/kSuspendBarrierIters nsecs
WaitOnceForSuspendBarrier(AtomicInteger * barrier,int32_t cur_val,uint64_t timeout_ns)725 static bool WaitOnceForSuspendBarrier(AtomicInteger* barrier,
726 int32_t cur_val,
727 uint64_t timeout_ns) {
728 timespec wait_timeout;
729 if (kShortSuspendTimeouts) {
730 timeout_ns = MsToNs(kSuspendBarrierIters);
731 CHECK_GE(NsToMs(timeout_ns / kSuspendBarrierIters), 1ul);
732 } else {
733 DCHECK_GE(NsToMs(timeout_ns / kSuspendBarrierIters), 10ul);
734 }
735 InitTimeSpec(false, CLOCK_MONOTONIC, NsToMs(timeout_ns / kSuspendBarrierIters), 0, &wait_timeout);
736 if (futex(barrier->Address(), FUTEX_WAIT_PRIVATE, cur_val, &wait_timeout, nullptr, 0) != 0) {
737 if (errno == ETIMEDOUT) {
738 return true;
739 } else if (errno != EAGAIN && errno != EINTR) {
740 PLOG(FATAL) << "futex wait for suspend barrier failed";
741 }
742 }
743 return false;
744 }
745
746 #else
747
WaitOnceForSuspendBarrier(AtomicInteger * barrier,int32_t cur_val,uint64_t timeout_ns)748 static bool WaitOnceForSuspendBarrier(AtomicInteger* barrier,
749 int32_t cur_val,
750 uint64_t timeout_ns) {
751 // In the normal case, aim for a couple of hundred milliseconds.
752 static constexpr unsigned kInnerIters =
753 kShortSuspendTimeouts ? 1'000 : (timeout_ns / 1000) / kSuspendBarrierIters;
754 DCHECK_GE(kInnerIters, 1'000u);
755 for (int i = 0; i < kInnerIters; ++i) {
756 sched_yield();
757 if (barrier->load(std::memory_order_acquire) == 0) {
758 return false;
759 }
760 }
761 return true;
762 }
763
764 #endif // ART_USE_FUTEXES
765
WaitForSuspendBarrier(AtomicInteger * barrier,pid_t t,int attempt_of_4)766 std::optional<std::string> ThreadList::WaitForSuspendBarrier(AtomicInteger* barrier,
767 pid_t t,
768 int attempt_of_4) {
769 #if ART_USE_FUTEXES
770 const uint64_t start_time = NanoTime();
771 #endif
772 uint64_t timeout_ns =
773 attempt_of_4 == 0 ? thread_suspend_timeout_ns_ : thread_suspend_timeout_ns_ / 4;
774 static bool is_user_build = (android::base::GetProperty("ro.build.type", "") == "user");
775 // Significantly increase timeouts in user builds, since they result in crashes.
776 // Many of these are likely to turn into ANRs, which are less informative for the developer, but
777 // friendlier to the user. We do not completely suppress timeouts, so that we avoid invisible
778 // problems for cases not covered by ANR detection, e.g. a problem in a clean-up daemon.
779 if (is_user_build) {
780 static constexpr int USER_MULTIPLIER = 2; // Start out small, perhaps increase later if we
781 // still have an issue?
782 timeout_ns *= USER_MULTIPLIER;
783 }
784 uint64_t avg_wait_multiplier = 1;
785 uint64_t wait_multiplier = 1;
786 if (attempt_of_4 != 1) {
787 // TODO: RequestSynchronousCheckpoint routinely passes attempt_of_4 = 0. Can
788 // we avoid the getpriority() call?
789 if (getpriority(PRIO_PROCESS, 0 /* this thread */) > 0) {
790 // We're a low priority thread, and thus have a longer ANR timeout. Increase the suspend
791 // timeout.
792 avg_wait_multiplier = 3;
793 }
794 // To avoid the system calls in the common case, we fail to increase the first of 4 waits, but
795 // then compensate during the last one. This also allows somewhat longer thread monitoring
796 // before we time out.
797 wait_multiplier = attempt_of_4 == 4 ? 2 * avg_wait_multiplier - 1 : avg_wait_multiplier;
798 timeout_ns *= wait_multiplier;
799 }
800 bool collect_state = (t != 0 && (attempt_of_4 == 0 || attempt_of_4 == 4));
801 int32_t cur_val = barrier->load(std::memory_order_acquire);
802 if (cur_val <= 0) {
803 DCHECK_EQ(cur_val, 0);
804 return std::nullopt;
805 }
806 unsigned i = 0;
807 if (WaitOnceForSuspendBarrier(barrier, cur_val, timeout_ns)) {
808 i = 1;
809 }
810 cur_val = barrier->load(std::memory_order_acquire);
811 if (cur_val <= 0) {
812 DCHECK_EQ(cur_val, 0);
813 return std::nullopt;
814 }
815
816 // Extra timeout to compensate for concurrent thread dumps, so that we are less likely to time
817 // out during ANR dumps.
818 uint64_t dump_adjustment_ns = 0;
819 // Total timeout increment if we see a concurrent thread dump. Distributed evenly across
820 // remaining iterations.
821 static constexpr uint64_t kDumpWaitNSecs = 30'000'000'000ull; // 30 seconds
822 // Replacement timeout if thread is stopped for tracing, probably by a debugger.
823 static constexpr uint64_t kTracingWaitNSecs = 7'200'000'000'000ull; // wait a bit < 2 hours;
824
825 // Long wait; gather information in case of timeout.
826 std::string sampled_state = collect_state ? GetOsThreadStatQuick(t) : "";
827 if (collect_state && GetStateFromStatString(sampled_state) == 't') {
828 LOG(WARNING) << "Thread suspension nearly timed out due to Tracing stop (debugger attached?)";
829 timeout_ns = kTracingWaitNSecs;
830 }
831 // Only fail after kSuspendBarrierIters timeouts, to make us robust against app freezing.
832 while (i < kSuspendBarrierIters) {
833 if (WaitOnceForSuspendBarrier(barrier, cur_val, timeout_ns + dump_adjustment_ns)) {
834 ++i;
835 #if ART_USE_FUTEXES
836 if (!kShortSuspendTimeouts) {
837 CHECK_GE(NanoTime() - start_time, i * timeout_ns / kSuspendBarrierIters - 1'000'000);
838 }
839 #endif
840 }
841 cur_val = barrier->load(std::memory_order_acquire);
842 if (cur_val <= 0) {
843 DCHECK_EQ(cur_val, 0);
844 return std::nullopt;
845 }
846 std::optional<uint64_t> last_sigquit_nanotime = Runtime::Current()->SigQuitNanoTime();
847 if (last_sigquit_nanotime.has_value() && i < kSuspendBarrierIters) {
848 // Adjust dump_adjustment_ns to reflect the number of iterations we have left and how long
849 // ago we started dumping threads.
850 uint64_t new_unscaled_adj = kDumpWaitNSecs + last_sigquit_nanotime.value() - NanoTime();
851 // Scale by the fraction of iterations still remaining.
852 dump_adjustment_ns = new_unscaled_adj * kSuspendBarrierIters / kSuspendBarrierIters - i;
853 }
854 // Keep the old dump_adjustment_ns if SigQuitNanoTime() was cleared.
855 }
856 uint64_t final_wait_time = NanoTime() - start_time;
857 uint64_t total_wait_time = attempt_of_4 == 0 ?
858 final_wait_time :
859 4 * final_wait_time * avg_wait_multiplier / wait_multiplier;
860 return collect_state ? "Target states: [" + sampled_state + ", " + GetOsThreadStatQuick(t) + "]" +
861 (cur_val == 0 ? "(barrier now passed)" : "") +
862 " Final wait time: " + PrettyDuration(final_wait_time) +
863 "; appr. total wait time: " + PrettyDuration(total_wait_time) :
864 "";
865 }
866
SuspendAll(const char * cause,bool long_suspend)867 void ThreadList::SuspendAll(const char* cause, bool long_suspend) {
868 Thread* self = Thread::Current();
869
870 if (self != nullptr) {
871 VLOG(threads) << *self << " SuspendAll for " << cause << " starting...";
872 } else {
873 VLOG(threads) << "Thread[null] SuspendAll for " << cause << " starting...";
874 }
875 {
876 ScopedTrace trace("Suspending mutator threads");
877 const uint64_t start_time = NanoTime();
878
879 SuspendAllInternal(self);
880 // All threads are known to have suspended (but a thread may still own the mutator lock)
881 // Make sure this thread grabs exclusive access to the mutator lock and its protected data.
882 #if HAVE_TIMED_RWLOCK
883 while (true) {
884 if (Locks::mutator_lock_->ExclusiveLockWithTimeout(self,
885 NsToMs(thread_suspend_timeout_ns_),
886 0)) {
887 break;
888 } else if (!long_suspend_) {
889 // Reading long_suspend without the mutator lock is slightly racy, in some rare cases, this
890 // could result in a thread suspend timeout.
891 // Timeout if we wait more than thread_suspend_timeout_ns_ nanoseconds.
892 UnsafeLogFatalForThreadSuspendAllTimeout();
893 }
894 }
895 #else
896 Locks::mutator_lock_->ExclusiveLock(self);
897 #endif
898
899 long_suspend_ = long_suspend;
900
901 const uint64_t end_time = NanoTime();
902 const uint64_t suspend_time = end_time - start_time;
903 suspend_all_histogram_.AdjustAndAddValue(suspend_time);
904 if (suspend_time > kLongThreadSuspendThreshold) {
905 LOG(WARNING) << "Suspending all threads took: " << PrettyDuration(suspend_time);
906 }
907
908 if (kDebugLocking) {
909 // Debug check that all threads are suspended.
910 AssertOtherThreadsAreSuspended(self);
911 }
912 }
913
914 // SuspendAllInternal blocks if we are in the middle of a flip.
915 DCHECK(!self->ReadFlag(ThreadFlag::kPendingFlipFunction, std::memory_order_relaxed));
916 DCHECK(!self->ReadFlag(ThreadFlag::kRunningFlipFunction, std::memory_order_relaxed));
917
918 ATraceBegin((std::string("Mutator threads suspended for ") + cause).c_str());
919
920 if (self != nullptr) {
921 VLOG(threads) << *self << " SuspendAll complete";
922 } else {
923 VLOG(threads) << "Thread[null] SuspendAll complete";
924 }
925 }
926
927 // Ensures all threads running Java suspend and that those not running Java don't start.
SuspendAllInternal(Thread * self,SuspendReason reason)928 void ThreadList::SuspendAllInternal(Thread* self, SuspendReason reason) {
929 // self can be nullptr if this is an unregistered thread.
930 Locks::mutator_lock_->AssertNotExclusiveHeld(self);
931 Locks::thread_list_lock_->AssertNotHeld(self);
932 Locks::thread_suspend_count_lock_->AssertNotHeld(self);
933 if (kDebugLocking && self != nullptr) {
934 CHECK_NE(self->GetState(), ThreadState::kRunnable);
935 }
936
937 // First request that all threads suspend, then wait for them to suspend before
938 // returning. This suspension scheme also relies on other behaviour:
939 // 1. Threads cannot be deleted while they are suspended or have a suspend-
940 // request flag set - (see Unregister() below).
941 // 2. When threads are created, they are created in a suspended state (actually
942 // kNative) and will never begin executing Java code without first checking
943 // the suspend-request flag.
944
945 // The atomic counter for number of threads that need to pass the barrier.
946 AtomicInteger pending_threads;
947
948 for (int iter_count = 1;; ++iter_count) {
949 {
950 MutexLock mu(self, *Locks::thread_list_lock_);
951 MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
952 if (suspend_all_count_ == 0) {
953 // Never run multiple SuspendAlls concurrently.
954 // If we are asked to suspend ourselves, we proceed anyway, but must ignore suspend
955 // request from other threads until we resume them.
956 bool found_myself = false;
957 // Update global suspend all state for attaching threads.
958 ++suspend_all_count_;
959 pending_threads.store(list_.size() - (self == nullptr ? 0 : 1), std::memory_order_relaxed);
960 // Increment everybody else's suspend count.
961 for (const auto& thread : list_) {
962 if (thread == self) {
963 found_myself = true;
964 } else {
965 VLOG(threads) << "requesting thread suspend: " << *thread;
966 DCHECK_EQ(suspend_all_count_, 1);
967 thread->IncrementSuspendCount(self, &pending_threads, nullptr, reason);
968 if (thread->IsSuspended()) {
969 // Effectively pass the barrier on behalf of the already suspended thread.
970 // The thread itself cannot yet have acted on our request since we still hold the
971 // suspend_count_lock_, and it will notice that kActiveSuspendBarrier has already
972 // been cleared if and when it acquires the lock in PassActiveSuspendBarriers().
973 DCHECK_EQ(thread->tlsPtr_.active_suspendall_barrier, &pending_threads);
974 pending_threads.fetch_sub(1, std::memory_order_seq_cst);
975 thread->tlsPtr_.active_suspendall_barrier = nullptr;
976 if (!thread->HasActiveSuspendBarrier()) {
977 thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
978 }
979 }
980 // else:
981 // The target thread was not yet suspended, and hence will be forced to execute
982 // TransitionFromRunnableToSuspended shortly. Since we set the kSuspendRequest flag
983 // before checking, and it checks kActiveSuspendBarrier after noticing kSuspendRequest,
984 // it must notice kActiveSuspendBarrier when it does. Thus it is guaranteed to
985 // decrement the suspend barrier. We're relying on store; load ordering here, but
986 // that's not a problem, since state and flags all reside in the same atomic, and
987 // are thus properly ordered, even for relaxed accesses.
988 }
989 }
990 self->AtomicSetFlag(ThreadFlag::kSuspensionImmune, std::memory_order_relaxed);
991 DCHECK(self == nullptr || found_myself);
992 break;
993 }
994 }
995 if (iter_count >= kMaxSuspendRetries) {
996 LOG(FATAL) << "Too many SuspendAll retries: " << iter_count;
997 } else {
998 MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
999 DCHECK_LE(suspend_all_count_, 1);
1000 if (suspend_all_count_ != 0) {
1001 // This may take a while, and we're not runnable, and thus would otherwise not block.
1002 Thread::resume_cond_->WaitHoldingLocks(self);
1003 continue;
1004 }
1005 }
1006 // We're already not runnable, so an attempt to suspend us should succeed.
1007 }
1008
1009 Thread* culprit = nullptr;
1010 pid_t tid = 0;
1011 std::ostringstream oss;
1012 for (int attempt_of_4 = 1; attempt_of_4 <= 4; ++attempt_of_4) {
1013 auto result = WaitForSuspendBarrier(&pending_threads, tid, attempt_of_4);
1014 if (!result.has_value()) {
1015 // Wait succeeded.
1016 break;
1017 }
1018 if (attempt_of_4 == 3) {
1019 // Second to the last attempt; Try to gather more information in case we time out.
1020 MutexLock mu(self, *Locks::thread_list_lock_);
1021 MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1022 oss << "remaining threads: ";
1023 for (const auto& thread : list_) {
1024 if (thread != self && !thread->IsSuspended()) {
1025 culprit = thread;
1026 oss << *thread << ", ";
1027 }
1028 }
1029 if (culprit != nullptr) {
1030 tid = culprit->GetTid();
1031 }
1032 } else if (attempt_of_4 == 4) {
1033 // Final attempt still timed out.
1034 if (culprit == nullptr) {
1035 LOG(FATAL) << "SuspendAll timeout. Couldn't find holdouts.";
1036 } else {
1037 std::string name;
1038 culprit->GetThreadName(name);
1039 oss << "Info for " << name << ": ";
1040 std::string thr_descr =
1041 StringPrintf("state&flags: 0x%x, Java/native priority: %d/%d, barrier value: %d, ",
1042 culprit->GetStateAndFlags(std::memory_order_relaxed).GetValue(),
1043 culprit->GetNativePriority(),
1044 getpriority(PRIO_PROCESS /* really thread */, culprit->GetTid()),
1045 pending_threads.load());
1046 oss << thr_descr << result.value();
1047 culprit->AbortInThis("SuspendAll timeout; " + oss.str());
1048 }
1049 }
1050 }
1051 }
1052
ResumeAll()1053 void ThreadList::ResumeAll() {
1054 Thread* self = Thread::Current();
1055 if (kDebugLocking) {
1056 // Debug check that all threads are suspended.
1057 AssertOtherThreadsAreSuspended(self);
1058 }
1059 MutexLock mu(self, *Locks::thread_list_lock_);
1060 MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1061 ATraceEnd(); // Matching "Mutator threads suspended ..." in SuspendAll.
1062 ResumeAllInternal(self);
1063 }
1064
1065 // Holds thread_list_lock_ and suspend_count_lock_
ResumeAllInternal(Thread * self)1066 void ThreadList::ResumeAllInternal(Thread* self) {
1067 DCHECK_NE(self->GetState(), ThreadState::kRunnable);
1068 if (self != nullptr) {
1069 VLOG(threads) << *self << " ResumeAll starting";
1070 } else {
1071 VLOG(threads) << "Thread[null] ResumeAll starting";
1072 }
1073
1074 ScopedTrace trace("Resuming mutator threads");
1075
1076 long_suspend_ = false;
1077
1078 Locks::mutator_lock_->ExclusiveUnlock(self);
1079
1080 // Decrement the suspend counts for all threads.
1081 for (const auto& thread : list_) {
1082 if (thread != self) {
1083 thread->DecrementSuspendCount(self);
1084 }
1085 }
1086
1087 // Update global suspend all state for attaching threads. Unblocks other SuspendAlls once
1088 // suspend_count_lock_ is released.
1089 --suspend_all_count_;
1090 self->AtomicClearFlag(ThreadFlag::kSuspensionImmune, std::memory_order_relaxed);
1091 // Pending suspend requests for us will be handled when we become Runnable again.
1092
1093 // Broadcast a notification to all suspended threads, some or all of
1094 // which may choose to wake up. No need to wait for them.
1095 if (self != nullptr) {
1096 VLOG(threads) << *self << " ResumeAll waking others";
1097 } else {
1098 VLOG(threads) << "Thread[null] ResumeAll waking others";
1099 }
1100 Thread::resume_cond_->Broadcast(self);
1101
1102 if (self != nullptr) {
1103 VLOG(threads) << *self << " ResumeAll complete";
1104 } else {
1105 VLOG(threads) << "Thread[null] ResumeAll complete";
1106 }
1107 }
1108
Resume(Thread * thread,SuspendReason reason)1109 bool ThreadList::Resume(Thread* thread, SuspendReason reason) {
1110 // This assumes there was an ATraceBegin when we suspended the thread.
1111 ATraceEnd();
1112
1113 Thread* self = Thread::Current();
1114 DCHECK_NE(thread, self);
1115 VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") starting..." << reason;
1116
1117 {
1118 // To check Contains.
1119 MutexLock mu(self, *Locks::thread_list_lock_);
1120 // To check IsSuspended.
1121 MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1122 if (UNLIKELY(!thread->IsSuspended())) {
1123 LOG(reason == SuspendReason::kForUserCode ? ERROR : FATAL)
1124 << "Resume(" << reinterpret_cast<void*>(thread) << ") thread not suspended";
1125 return false;
1126 }
1127 if (!Contains(thread)) {
1128 // We only expect threads within the thread-list to have been suspended otherwise we can't
1129 // stop such threads from delete-ing themselves.
1130 LOG(reason == SuspendReason::kForUserCode ? ERROR : FATAL)
1131 << "Resume(" << reinterpret_cast<void*>(thread) << ") thread not within thread list";
1132 return false;
1133 }
1134 thread->DecrementSuspendCount(self, /*for_user_code=*/(reason == SuspendReason::kForUserCode));
1135 Thread::resume_cond_->Broadcast(self);
1136 }
1137
1138 VLOG(threads) << "Resume(" << reinterpret_cast<void*>(thread) << ") finished waking others";
1139 return true;
1140 }
1141
SuspendThread(Thread * self,Thread * thread,SuspendReason reason,ThreadState self_state,const char * func_name,int attempt_of_4)1142 bool ThreadList::SuspendThread(Thread* self,
1143 Thread* thread,
1144 SuspendReason reason,
1145 ThreadState self_state,
1146 const char* func_name,
1147 int attempt_of_4) {
1148 bool is_suspended = false;
1149 VLOG(threads) << func_name << "starting";
1150 pid_t tid = thread->GetTid();
1151 uint8_t suspended_count;
1152 uint8_t checkpoint_count;
1153 WrappedSuspend1Barrier wrapped_barrier{};
1154 static_assert(sizeof wrapped_barrier.barrier_ == sizeof(uint32_t));
1155 ThreadExitFlag tef;
1156 bool exited = false;
1157 thread->NotifyOnThreadExit(&tef);
1158 int iter_count = 1;
1159 do {
1160 {
1161 Locks::mutator_lock_->AssertSharedHeld(self);
1162 Locks::thread_list_lock_->AssertHeld(self);
1163 // Note: this will transition to runnable and potentially suspend.
1164 DCHECK(Contains(thread));
1165 // This implementation fails if thread == self. Let the clients handle that case
1166 // appropriately.
1167 CHECK_NE(thread, self) << func_name << "(self)";
1168 VLOG(threads) << func_name << " suspending: " << *thread;
1169 {
1170 MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
1171 if (LIKELY(self->GetSuspendCount() == 0)) {
1172 suspended_count = thread->suspended_count_;
1173 checkpoint_count = thread->checkpoint_count_;
1174 thread->IncrementSuspendCount(self, nullptr, &wrapped_barrier, reason);
1175 if (thread->IsSuspended()) {
1176 // See the discussion in mutator_gc_coord.md and SuspendAllInternal for the race here.
1177 thread->RemoveFirstSuspend1Barrier(&wrapped_barrier);
1178 // PassActiveSuspendBarriers couldn't have seen our barrier, since it also acquires
1179 // 'thread_suspend_count_lock_'. `wrapped_barrier` will not be accessed.
1180 if (!thread->HasActiveSuspendBarrier()) {
1181 thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
1182 }
1183 is_suspended = true;
1184 }
1185 DCHECK_GT(thread->GetSuspendCount(), 0);
1186 break;
1187 }
1188 // Else we hold the suspend count lock but another thread is trying to suspend us,
1189 // making it unsafe to try to suspend another thread in case we get a cycle.
1190 // Start the loop again, which will allow this thread to be suspended.
1191 }
1192 }
1193 // All locks are released, and we should quickly exit the suspend-unfriendly state. Retry.
1194 if (iter_count >= kMaxSuspendRetries) {
1195 LOG(FATAL) << "Too many suspend retries";
1196 }
1197 Locks::thread_list_lock_->ExclusiveUnlock(self);
1198 {
1199 ScopedThreadSuspension sts(self, ThreadState::kSuspended);
1200 usleep(kThreadSuspendSleepUs);
1201 ++iter_count;
1202 }
1203 Locks::thread_list_lock_->ExclusiveLock(self);
1204 exited = tef.HasExited();
1205 } while (!exited);
1206 thread->UnregisterThreadExitFlag(&tef);
1207 Locks::thread_list_lock_->ExclusiveUnlock(self);
1208 self->TransitionFromRunnableToSuspended(self_state);
1209 if (exited) {
1210 // This is OK: There's a race in inflating a lock and the owner giving up ownership and then
1211 // dying.
1212 LOG(WARNING) << StringPrintf("Thread with tid %d exited before suspending", tid);
1213 return false;
1214 }
1215 // Now wait for target to decrement suspend barrier.
1216 std::optional<std::string> failure_info;
1217 if (!is_suspended) {
1218 failure_info = WaitForSuspendBarrier(&wrapped_barrier.barrier_, tid, attempt_of_4);
1219 if (!failure_info.has_value()) {
1220 is_suspended = true;
1221 }
1222 }
1223 while (!is_suspended) {
1224 if (attempt_of_4 > 0 && attempt_of_4 < 4) {
1225 // Caller will try again. Give up and resume the thread for now. We need to make sure
1226 // that wrapped_barrier is removed from the list before we deallocate it.
1227 MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
1228 if (wrapped_barrier.barrier_.load() == 0) {
1229 // Succeeded in the meantime.
1230 is_suspended = true;
1231 continue;
1232 }
1233 thread->RemoveSuspend1Barrier(&wrapped_barrier);
1234 if (!thread->HasActiveSuspendBarrier()) {
1235 thread->AtomicClearFlag(ThreadFlag::kActiveSuspendBarrier);
1236 }
1237 // Do not call Resume(), since we are probably not fully suspended.
1238 thread->DecrementSuspendCount(self,
1239 /*for_user_code=*/(reason == SuspendReason::kForUserCode));
1240 Thread::resume_cond_->Broadcast(self);
1241 return false;
1242 }
1243 std::string name;
1244 thread->GetThreadName(name);
1245 WrappedSuspend1Barrier* first_barrier;
1246 {
1247 MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
1248 first_barrier = thread->tlsPtr_.active_suspend1_barriers;
1249 }
1250 // 'thread' should still have a suspend request pending, and hence stick around. Try to abort
1251 // there, since its stack trace is much more interesting than ours.
1252 std::string message = StringPrintf(
1253 "%s timed out: %s: state&flags: 0x%x, Java/native priority: %d/%d,"
1254 " barriers: %p, ours: %p, barrier value: %d, nsusps: %d, ncheckpts: %d, thread_info: %s",
1255 func_name,
1256 name.c_str(),
1257 thread->GetStateAndFlags(std::memory_order_relaxed).GetValue(),
1258 thread->GetNativePriority(),
1259 getpriority(PRIO_PROCESS /* really thread */, thread->GetTid()),
1260 first_barrier,
1261 &wrapped_barrier,
1262 wrapped_barrier.barrier_.load(),
1263 thread->suspended_count_ - suspended_count,
1264 thread->checkpoint_count_ - checkpoint_count,
1265 failure_info.value().c_str());
1266 // Check one last time whether thread passed the suspend barrier. Empirically this seems to
1267 // happen maybe between 1 and 5% of the time.
1268 if (wrapped_barrier.barrier_.load() != 0) {
1269 // thread still has a pointer to wrapped_barrier. Returning and continuing would be unsafe
1270 // without additional cleanup.
1271 thread->AbortInThis(message);
1272 UNREACHABLE();
1273 }
1274 is_suspended = true;
1275 }
1276 // wrapped_barrier.barrier_ will no longer be accessed.
1277 VLOG(threads) << func_name << " suspended: " << *thread;
1278 if (ATraceEnabled()) {
1279 std::string name;
1280 thread->GetThreadName(name);
1281 ATraceBegin(
1282 StringPrintf("%s suspended %s for tid=%d", func_name, name.c_str(), thread->GetTid())
1283 .c_str());
1284 }
1285 if (kIsDebugBuild) {
1286 CHECK(thread->IsSuspended());
1287 MutexLock suspend_count_mu(self, *Locks::thread_suspend_count_lock_);
1288 thread->CheckBarrierInactive(&wrapped_barrier);
1289 }
1290 return true;
1291 }
1292
SuspendThreadByPeer(jobject peer,SuspendReason reason)1293 Thread* ThreadList::SuspendThreadByPeer(jobject peer, SuspendReason reason) {
1294 Thread* const self = Thread::Current();
1295 ThreadState old_self_state = self->GetState();
1296 self->TransitionFromSuspendedToRunnable();
1297 Locks::thread_list_lock_->ExclusiveLock(self);
1298 ObjPtr<mirror::Object> thread_ptr = self->DecodeJObject(peer);
1299 Thread* thread = Thread::FromManagedThread(self, thread_ptr);
1300 if (thread == nullptr || !Contains(thread)) {
1301 if (thread == nullptr) {
1302 ObjPtr<mirror::Object> name = WellKnownClasses::java_lang_Thread_name->GetObject(thread_ptr);
1303 std::string thr_name = (name == nullptr ? "<unknown>" : name->AsString()->ToModifiedUtf8());
1304 LOG(WARNING) << "No such thread for suspend"
1305 << ": " << peer << ":" << thr_name;
1306 } else {
1307 LOG(WARNING) << "SuspendThreadByPeer failed for unattached thread: "
1308 << reinterpret_cast<void*>(thread);
1309 }
1310 Locks::thread_list_lock_->ExclusiveUnlock(self);
1311 self->TransitionFromRunnableToSuspended(old_self_state);
1312 return nullptr;
1313 }
1314 VLOG(threads) << "SuspendThreadByPeer found thread: " << *thread;
1315 // Releases thread_list_lock_ and mutator lock.
1316 bool success = SuspendThread(self, thread, reason, old_self_state, __func__, 0);
1317 Locks::thread_list_lock_->AssertNotHeld(self);
1318 return success ? thread : nullptr;
1319 }
1320
SuspendThreadByThreadId(uint32_t thread_id,SuspendReason reason,int attempt_of_4)1321 Thread* ThreadList::SuspendThreadByThreadId(uint32_t thread_id,
1322 SuspendReason reason,
1323 int attempt_of_4) {
1324 Thread* const self = Thread::Current();
1325 ThreadState old_self_state = self->GetState();
1326 CHECK_NE(thread_id, kInvalidThreadId);
1327 VLOG(threads) << "SuspendThreadByThreadId starting";
1328 self->TransitionFromSuspendedToRunnable();
1329 Locks::thread_list_lock_->ExclusiveLock(self);
1330 Thread* thread = FindThreadByThreadId(thread_id);
1331 if (thread == nullptr) {
1332 // There's a race in inflating a lock and the owner giving up ownership and then dying.
1333 LOG(WARNING) << StringPrintf("No such thread id %d for suspend", thread_id);
1334 Locks::thread_list_lock_->ExclusiveUnlock(self);
1335 self->TransitionFromRunnableToSuspended(old_self_state);
1336 return nullptr;
1337 }
1338 DCHECK(Contains(thread));
1339 VLOG(threads) << "SuspendThreadByThreadId found thread: " << *thread;
1340 // Releases thread_list_lock_ and mutator lock.
1341 bool success = SuspendThread(self, thread, reason, old_self_state, __func__, attempt_of_4);
1342 Locks::thread_list_lock_->AssertNotHeld(self);
1343 return success ? thread : nullptr;
1344 }
1345
FindThreadByThreadId(uint32_t thread_id)1346 Thread* ThreadList::FindThreadByThreadId(uint32_t thread_id) {
1347 for (const auto& thread : list_) {
1348 if (thread->GetThreadId() == thread_id) {
1349 return thread;
1350 }
1351 }
1352 return nullptr;
1353 }
1354
FindThreadByTid(int tid)1355 Thread* ThreadList::FindThreadByTid(int tid) {
1356 for (const auto& thread : list_) {
1357 if (thread->GetTid() == tid) {
1358 return thread;
1359 }
1360 }
1361 return nullptr;
1362 }
1363
WaitForOtherNonDaemonThreadsToExit(bool check_no_birth)1364 void ThreadList::WaitForOtherNonDaemonThreadsToExit(bool check_no_birth) {
1365 ScopedTrace trace(__PRETTY_FUNCTION__);
1366 Thread* self = Thread::Current();
1367 Locks::mutator_lock_->AssertNotHeld(self);
1368 while (true) {
1369 Locks::runtime_shutdown_lock_->Lock(self);
1370 if (check_no_birth) {
1371 // No more threads can be born after we start to shutdown.
1372 CHECK(Runtime::Current()->IsShuttingDownLocked());
1373 CHECK_EQ(Runtime::Current()->NumberOfThreadsBeingBorn(), 0U);
1374 } else {
1375 if (Runtime::Current()->NumberOfThreadsBeingBorn() != 0U) {
1376 // Awkward. Shutdown_cond_ is private, but the only live thread may not be registered yet.
1377 // Fortunately, this is used mostly for testing, and not performance-critical.
1378 Locks::runtime_shutdown_lock_->Unlock(self);
1379 usleep(1000);
1380 continue;
1381 }
1382 }
1383 MutexLock mu(self, *Locks::thread_list_lock_);
1384 Locks::runtime_shutdown_lock_->Unlock(self);
1385 // Also wait for any threads that are unregistering to finish. This is required so that no
1386 // threads access the thread list after it is deleted. TODO: This may not work for user daemon
1387 // threads since they could unregister at the wrong time.
1388 bool done = unregistering_count_ == 0;
1389 if (done) {
1390 for (const auto& thread : list_) {
1391 if (thread != self && !thread->IsDaemon()) {
1392 done = false;
1393 break;
1394 }
1395 }
1396 }
1397 if (done) {
1398 break;
1399 }
1400 // Wait for another thread to exit before re-checking.
1401 Locks::thread_exit_cond_->Wait(self);
1402 }
1403 }
1404
SuspendAllDaemonThreadsForShutdown()1405 void ThreadList::SuspendAllDaemonThreadsForShutdown() {
1406 ScopedTrace trace(__PRETTY_FUNCTION__);
1407 Thread* self = Thread::Current();
1408 size_t daemons_left = 0;
1409 {
1410 // Tell all the daemons it's time to suspend.
1411 MutexLock mu(self, *Locks::thread_list_lock_);
1412 MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1413 for (const auto& thread : list_) {
1414 // This is only run after all non-daemon threads have exited, so the remainder should all be
1415 // daemons.
1416 CHECK(thread->IsDaemon()) << *thread;
1417 if (thread != self) {
1418 thread->IncrementSuspendCount(self);
1419 ++daemons_left;
1420 }
1421 // We are shutting down the runtime, set the JNI functions of all the JNIEnvs to be
1422 // the sleep forever one.
1423 thread->GetJniEnv()->SetFunctionsToRuntimeShutdownFunctions();
1424 }
1425 }
1426 if (daemons_left == 0) {
1427 // No threads left; safe to shut down.
1428 return;
1429 }
1430 // There is not a clean way to shut down if we have daemons left. We have no mechanism for
1431 // killing them and reclaiming thread stacks. We also have no mechanism for waiting until they
1432 // have truly finished touching the memory we are about to deallocate. We do the best we can with
1433 // timeouts.
1434 //
1435 // If we have any daemons left, wait until they are (a) suspended and (b) they are not stuck
1436 // in a place where they are about to access runtime state and are not in a runnable state.
1437 // We attempt to do the latter by just waiting long enough for things to
1438 // quiesce. Examples: Monitor code or waking up from a condition variable.
1439 //
1440 // Give the threads a chance to suspend, complaining if they're slow. (a)
1441 bool have_complained = false;
1442 static constexpr size_t kTimeoutMicroseconds = 2000 * 1000;
1443 static constexpr size_t kSleepMicroseconds = 1000;
1444 bool all_suspended = false;
1445 for (size_t i = 0; !all_suspended && i < kTimeoutMicroseconds / kSleepMicroseconds; ++i) {
1446 bool found_running = false;
1447 {
1448 MutexLock mu(self, *Locks::thread_list_lock_);
1449 for (const auto& thread : list_) {
1450 if (thread != self && thread->GetState() == ThreadState::kRunnable) {
1451 if (!have_complained) {
1452 LOG(WARNING) << "daemon thread not yet suspended: " << *thread;
1453 have_complained = true;
1454 }
1455 found_running = true;
1456 }
1457 }
1458 }
1459 if (found_running) {
1460 // Sleep briefly before checking again. Max total sleep time is kTimeoutMicroseconds.
1461 usleep(kSleepMicroseconds);
1462 } else {
1463 all_suspended = true;
1464 }
1465 }
1466 if (!all_suspended) {
1467 // We can get here if a daemon thread executed a fastnative native call, so that it
1468 // remained in runnable state, and then made a JNI call after we called
1469 // SetFunctionsToRuntimeShutdownFunctions(), causing it to permanently stay in a harmless
1470 // but runnable state. See b/147804269 .
1471 LOG(WARNING) << "timed out suspending all daemon threads";
1472 }
1473 // Assume all threads are either suspended or somehow wedged.
1474 // Wait again for all the now "suspended" threads to actually quiesce. (b)
1475 static constexpr size_t kDaemonSleepTime = 400'000;
1476 usleep(kDaemonSleepTime);
1477 std::list<Thread*> list_copy;
1478 {
1479 MutexLock mu(self, *Locks::thread_list_lock_);
1480 // Half-way through the wait, set the "runtime deleted" flag, causing any newly awoken
1481 // threads to immediately go back to sleep without touching memory. This prevents us from
1482 // touching deallocated memory, but it also prevents mutexes from getting released. Thus we
1483 // only do this once we're reasonably sure that no system mutexes are still held.
1484 for (const auto& thread : list_) {
1485 DCHECK(thread == self || !all_suspended || thread->GetState() != ThreadState::kRunnable);
1486 // In the !all_suspended case, the target is probably sleeping.
1487 thread->GetJniEnv()->SetRuntimeDeleted();
1488 // Possibly contended Mutex acquisitions are unsafe after this.
1489 // Releasing thread_list_lock_ is OK, since it can't block.
1490 }
1491 }
1492 // Finally wait for any threads woken before we set the "runtime deleted" flags to finish
1493 // touching memory.
1494 usleep(kDaemonSleepTime);
1495 #if defined(__has_feature)
1496 #if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer)
1497 // Sleep a bit longer with -fsanitize=address, since everything is slower.
1498 usleep(2 * kDaemonSleepTime);
1499 #endif
1500 #endif
1501 // At this point no threads should be touching our data structures anymore.
1502 }
1503
Register(Thread * self)1504 void ThreadList::Register(Thread* self) {
1505 DCHECK_EQ(self, Thread::Current());
1506 CHECK(!shut_down_);
1507
1508 if (VLOG_IS_ON(threads)) {
1509 std::ostringstream oss;
1510 self->ShortDump(oss); // We don't hold the mutator_lock_ yet and so cannot call Dump.
1511 LOG(INFO) << "ThreadList::Register() " << *self << "\n" << oss.str();
1512 }
1513
1514 // Atomically add self to the thread list and make its thread_suspend_count_ reflect ongoing
1515 // SuspendAll requests.
1516 MutexLock mu(self, *Locks::thread_list_lock_);
1517 MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1518 if (suspend_all_count_ == 1) {
1519 self->IncrementSuspendCount(self);
1520 } else {
1521 DCHECK_EQ(suspend_all_count_, 0);
1522 }
1523 CHECK(!Contains(self));
1524 list_.push_back(self);
1525 if (gUseReadBarrier) {
1526 gc::collector::ConcurrentCopying* const cc =
1527 Runtime::Current()->GetHeap()->ConcurrentCopyingCollector();
1528 // Initialize according to the state of the CC collector.
1529 self->SetIsGcMarkingAndUpdateEntrypoints(cc->IsMarking());
1530 if (cc->IsUsingReadBarrierEntrypoints()) {
1531 self->SetReadBarrierEntrypoints();
1532 }
1533 self->SetWeakRefAccessEnabled(cc->IsWeakRefAccessEnabled());
1534 }
1535 }
1536
Unregister(Thread * self,bool should_run_callbacks)1537 void ThreadList::Unregister(Thread* self, bool should_run_callbacks) {
1538 DCHECK_EQ(self, Thread::Current());
1539 CHECK_NE(self->GetState(), ThreadState::kRunnable);
1540 Locks::mutator_lock_->AssertNotHeld(self);
1541 if (self->tls32_.disable_thread_flip_count != 0) {
1542 LOG(FATAL) << "Incomplete PrimitiveArrayCritical section at exit: " << *self << "count = "
1543 << self->tls32_.disable_thread_flip_count;
1544 }
1545
1546 VLOG(threads) << "ThreadList::Unregister() " << *self;
1547
1548 {
1549 MutexLock mu(self, *Locks::thread_list_lock_);
1550 ++unregistering_count_;
1551 }
1552
1553 // Any time-consuming destruction, plus anything that can call back into managed code or
1554 // suspend and so on, must happen at this point, and not in ~Thread. The self->Destroy is what
1555 // causes the threads to join. It is important to do this after incrementing unregistering_count_
1556 // since we want the runtime to wait for the daemon threads to exit before deleting the thread
1557 // list.
1558 self->Destroy(should_run_callbacks);
1559
1560 uint32_t thin_lock_id = self->GetThreadId();
1561 while (true) {
1562 // Remove and delete the Thread* while holding the thread_list_lock_ and
1563 // thread_suspend_count_lock_ so that the unregistering thread cannot be suspended.
1564 // Note: deliberately not using MutexLock that could hold a stale self pointer.
1565 {
1566 MutexLock mu(self, *Locks::thread_list_lock_);
1567 if (!Contains(self)) {
1568 std::string thread_name;
1569 self->GetThreadName(thread_name);
1570 std::ostringstream os;
1571 DumpNativeStack(os, GetTid(), " native: ", nullptr);
1572 LOG(FATAL) << "Request to unregister unattached thread " << thread_name << "\n" << os.str();
1573 UNREACHABLE();
1574 } else {
1575 MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1576 Thread::StateAndFlags state_and_flags = self->GetStateAndFlags(std::memory_order_acquire);
1577 if (!state_and_flags.IsFlagSet(ThreadFlag::kRunningFlipFunction) &&
1578 !state_and_flags.IsFlagSet(ThreadFlag::kSuspendRequest)) {
1579 list_.remove(self);
1580 self->SignalExitFlags();
1581 break;
1582 }
1583 }
1584 }
1585 // In the case where we are not suspended yet, sleep to leave other threads time to execute.
1586 // This is important if there are realtime threads. b/111277984
1587 usleep(1);
1588 // We failed to remove the thread due to a suspend request or the like, loop and try again.
1589 }
1590
1591 // We flush the trace buffer in Thread::Destroy. We have to check again here because once the
1592 // Thread::Destroy finishes we wait for any active suspend requests to finish before deleting
1593 // the thread. If a new trace was started during the wait period we may allocate the trace buffer
1594 // again. The trace buffer would only contain the method entry events for the methods on the stack
1595 // of an exiting thread. It is not required to flush these entries but we need to release the
1596 // buffer. Ideally we should either not generate trace events for a thread that is exiting or use
1597 // a different mechanism to report the initial events on a trace start that doesn't use per-thread
1598 // buffer. Both these approaches are not trivial to implement, so we are going with the approach
1599 // of just releasing the buffer here.
1600 if (UNLIKELY(self->GetMethodTraceBuffer() != nullptr)) {
1601 Trace::ReleaseThreadBuffer(self);
1602 }
1603 CHECK_EQ(self->GetMethodTraceBuffer(), nullptr) << Trace::GetDebugInformation();
1604 delete self;
1605
1606 // Release the thread ID after the thread is finished and deleted to avoid cases where we can
1607 // temporarily have multiple threads with the same thread id. When this occurs, it causes
1608 // problems in FindThreadByThreadId / SuspendThreadByThreadId.
1609 ReleaseThreadId(nullptr, thin_lock_id);
1610
1611 // Clear the TLS data, so that the underlying native thread is recognizably detached.
1612 // (It may wish to reattach later.)
1613 #ifdef __BIONIC__
1614 __get_tls()[TLS_SLOT_ART_THREAD_SELF] = nullptr;
1615 #else
1616 CHECK_PTHREAD_CALL(pthread_setspecific, (Thread::pthread_key_self_, nullptr), "detach self");
1617 Thread::self_tls_ = nullptr;
1618 #endif
1619
1620 // Signal that a thread just detached.
1621 MutexLock mu(nullptr, *Locks::thread_list_lock_);
1622 --unregistering_count_;
1623 Locks::thread_exit_cond_->Broadcast(nullptr);
1624 }
1625
ForEach(void (* callback)(Thread *,void *),void * context)1626 void ThreadList::ForEach(void (*callback)(Thread*, void*), void* context) {
1627 for (const auto& thread : list_) {
1628 callback(thread, context);
1629 }
1630 }
1631
WaitForUnregisterToComplete(Thread * self)1632 void ThreadList::WaitForUnregisterToComplete(Thread* self) {
1633 // We hold thread_list_lock_ .
1634 while (unregistering_count_ != 0) {
1635 LOG(WARNING) << "Waiting for a thread to finish unregistering";
1636 Locks::thread_exit_cond_->Wait(self);
1637 }
1638 }
1639
VisitRootsForSuspendedThreads(RootVisitor * visitor)1640 void ThreadList::VisitRootsForSuspendedThreads(RootVisitor* visitor) {
1641 Thread* const self = Thread::Current();
1642 std::vector<Thread*> threads_to_visit;
1643
1644 // Tell threads to suspend and copy them into list.
1645 {
1646 MutexLock mu(self, *Locks::thread_list_lock_);
1647 MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1648 for (Thread* thread : list_) {
1649 thread->IncrementSuspendCount(self);
1650 if (thread == self || thread->IsSuspended()) {
1651 threads_to_visit.push_back(thread);
1652 } else {
1653 thread->DecrementSuspendCount(self);
1654 }
1655 }
1656 }
1657
1658 // Visit roots without holding thread_list_lock_ and thread_suspend_count_lock_ to prevent lock
1659 // order violations.
1660 for (Thread* thread : threads_to_visit) {
1661 thread->VisitRoots(visitor, kVisitRootFlagAllRoots);
1662 }
1663
1664 // Restore suspend counts.
1665 {
1666 MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
1667 for (Thread* thread : threads_to_visit) {
1668 thread->DecrementSuspendCount(self);
1669 }
1670 Thread::resume_cond_->Broadcast(self);
1671 }
1672 }
1673
VisitRoots(RootVisitor * visitor,VisitRootFlags flags) const1674 void ThreadList::VisitRoots(RootVisitor* visitor, VisitRootFlags flags) const {
1675 MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
1676 for (const auto& thread : list_) {
1677 thread->VisitRoots(visitor, flags);
1678 }
1679 }
1680
VisitReflectiveTargets(ReflectiveValueVisitor * visitor) const1681 void ThreadList::VisitReflectiveTargets(ReflectiveValueVisitor *visitor) const {
1682 MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
1683 for (const auto& thread : list_) {
1684 thread->VisitReflectiveTargets(visitor);
1685 }
1686 }
1687
SweepInterpreterCaches(IsMarkedVisitor * visitor) const1688 void ThreadList::SweepInterpreterCaches(IsMarkedVisitor* visitor) const {
1689 MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
1690 for (const auto& thread : list_) {
1691 thread->SweepInterpreterCache(visitor);
1692 }
1693 }
1694
ClearInterpreterCaches() const1695 void ThreadList::ClearInterpreterCaches() const {
1696 Thread* self = Thread::Current();
1697 Locks::mutator_lock_->AssertExclusiveHeld(self);
1698 MutexLock mu(self, *Locks::thread_list_lock_);
1699 for (const auto& thread : list_) {
1700 thread->GetInterpreterCache()->Clear(thread);
1701 }
1702 }
1703
AllocThreadId(Thread * self)1704 uint32_t ThreadList::AllocThreadId(Thread* self) {
1705 MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
1706 for (size_t i = 0; i < allocated_ids_.size(); ++i) {
1707 if (!allocated_ids_[i]) {
1708 allocated_ids_.set(i);
1709 return i + 1; // Zero is reserved to mean "invalid".
1710 }
1711 }
1712 LOG(FATAL) << "Out of internal thread ids";
1713 UNREACHABLE();
1714 }
1715
ReleaseThreadId(Thread * self,uint32_t id)1716 void ThreadList::ReleaseThreadId(Thread* self, uint32_t id) {
1717 MutexLock mu(self, *Locks::allocated_thread_ids_lock_);
1718 --id; // Zero is reserved to mean "invalid".
1719 DCHECK(allocated_ids_[id]) << id;
1720 allocated_ids_.reset(id);
1721 }
1722
ScopedSuspendAll(const char * cause,bool long_suspend)1723 ScopedSuspendAll::ScopedSuspendAll(const char* cause, bool long_suspend) {
1724 Runtime::Current()->GetThreadList()->SuspendAll(cause, long_suspend);
1725 }
1726
~ScopedSuspendAll()1727 ScopedSuspendAll::~ScopedSuspendAll() {
1728 Runtime::Current()->GetThreadList()->ResumeAll();
1729 }
1730
1731 } // namespace art
1732