1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ART_RUNTIME_THREAD_LIST_H_ 18 #define ART_RUNTIME_THREAD_LIST_H_ 19 20 #include <bitset> 21 #include <list> 22 #include <vector> 23 24 #include "barrier.h" 25 #include "base/histogram.h" 26 #include "base/mutex.h" 27 #include "base/macros.h" 28 #include "base/value_object.h" 29 #include "jni.h" 30 #include "reflective_handle_scope.h" 31 #include "suspend_reason.h" 32 #include "thread_state.h" 33 34 namespace art HIDDEN { 35 namespace gc { 36 namespace collector { 37 class GarbageCollector; 38 } // namespace collector 39 class GcPauseListener; 40 } // namespace gc 41 class Closure; 42 class IsMarkedVisitor; 43 class RootVisitor; 44 class Thread; 45 class TimingLogger; 46 enum VisitRootFlags : uint8_t; 47 48 class ThreadList { 49 public: 50 static constexpr uint32_t kMaxThreadId = 0xFFFF; 51 static constexpr uint32_t kInvalidThreadId = 0; 52 static constexpr uint32_t kMainThreadId = 1; 53 static constexpr uint64_t kDefaultThreadSuspendTimeout = 54 kIsDebugBuild ? 2'000'000'000ull : 4'000'000'000ull; 55 // We fail more aggressively in debug builds to catch potential issues early. 56 // The number of times we may retry when we find ourselves in a suspend-unfriendly state. 57 static constexpr int kMaxSuspendRetries = kIsDebugBuild ? 500 : 5000; 58 static constexpr useconds_t kThreadSuspendSleepUs = 100; 59 60 explicit ThreadList(uint64_t thread_suspend_timeout_ns); 61 ~ThreadList(); 62 63 void ShutDown(); 64 65 // Dump stacks for all threads. 66 // This version includes some additional data. 67 void DumpForSigQuit(std::ostream& os) REQUIRES(!Locks::thread_list_lock_, !Locks::mutator_lock_); 68 69 // This version is less jank-prone if mutator_lock_ is not held. 70 EXPORT void Dump(std::ostream& os, bool dump_native_stack = true) 71 REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_); 72 73 pid_t GetLockOwner(); // For SignalCatcher. 74 75 // Thread suspension support. 76 EXPORT void ResumeAll() 77 REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_) 78 UNLOCK_FUNCTION(Locks::mutator_lock_); 79 EXPORT bool Resume(Thread* thread, SuspendReason reason = SuspendReason::kInternal) 80 REQUIRES(!Locks::thread_suspend_count_lock_) WARN_UNUSED; 81 82 // Suspends all other threads and gets exclusive access to the mutator lock. 83 // If long_suspend is true, then other threads who try to suspend will never timeout. 84 // long_suspend is currenly used for hprof since large heaps take a long time. 85 EXPORT void SuspendAll(const char* cause, bool long_suspend = false) 86 EXCLUSIVE_LOCK_FUNCTION(Locks::mutator_lock_) 87 REQUIRES(!Locks::thread_list_lock_, 88 !Locks::thread_suspend_count_lock_, 89 !Locks::mutator_lock_); 90 91 // Suspend a thread using a peer, typically used by the debugger. Returns the thread on success, 92 // else null. The peer is used to identify the thread to avoid races with the thread terminating. 93 EXPORT Thread* SuspendThreadByPeer(jobject peer, SuspendReason reason) 94 REQUIRES(!Locks::mutator_lock_, 95 !Locks::thread_list_lock_, 96 !Locks::thread_suspend_count_lock_); 97 98 // Suspend a thread using its thread id, typically used by lock/monitor inflation. Returns the 99 // thread on success else null. The thread id is used to identify the thread to avoid races with 100 // the thread terminating. Note that as thread ids are recycled this may not suspend the expected 101 // thread, that may be terminating. 'attempt_of_4' is zero if this is the only 102 // attempt, or 1..4 to try 4 times with fractional timeouts. 103 // TODO: Reconsider the use of thread_id, now that we have ThreadExitFlag. 104 Thread* SuspendThreadByThreadId(uint32_t thread_id, SuspendReason reason, int attempt_of_4 = 0) 105 REQUIRES(!Locks::mutator_lock_, 106 !Locks::thread_list_lock_, 107 !Locks::thread_suspend_count_lock_); 108 109 // Find an existing thread (or self) by its thread id (not tid). 110 EXPORT Thread* FindThreadByThreadId(uint32_t thread_id) REQUIRES(Locks::thread_list_lock_); 111 112 // Find an existing thread (or self) by its tid (not thread id). 113 Thread* FindThreadByTid(int tid) REQUIRES(Locks::thread_list_lock_); 114 115 // Does the thread list still contain the given thread, or one at the same address? 116 // Used by Monitor to provide (mostly accurate) debugging information. 117 bool Contains(Thread* thread) REQUIRES(Locks::thread_list_lock_); 118 119 // Run a checkpoint on all threads. Return the total number of threads for which the checkpoint 120 // function has been or will be called. 121 // 122 // Running threads are not suspended but run the checkpoint inside of the suspend check. The 123 // return value includes already suspended threads for b/24191051. Runs or requests the 124 // callback, if non-null, inside the thread_list_lock critical section after capturing the list 125 // of threads needing to run the checkpoint. 126 // 127 // Does not wait for completion of the checkpoint function in running threads. 128 // 129 // If the caller holds the mutator lock, or acquire_mutator_lock is true, then all instances of 130 // the checkpoint function are run with the mutator lock. Otherwise, since the checkpoint code 131 // may not acquire or release the mutator lock, the checkpoint will have no way to access Java 132 // data. 133 // 134 // If acquire_mutator_lock is true, it may be acquired repeatedly to avoid holding it for an 135 // extended period without checking for suspension requests. 136 // 137 // We capture a set of threads that simultaneously existed at one point in time, and ensure that 138 // they all run the checkpoint function. We make no guarantees about threads created after this 139 // set of threads was captured. If newly created threads require the effect of the checkpoint, 140 // the caller may update global state indicating that this is necessary, and newly created 141 // threads must act on that. It is possible that on return there will be threads which have not, 142 // and will not, run the checkpoint_function, and neither have/will any of their ancestors. 143 // 144 // We guarantee that if a thread calls RunCheckpoint() then, if at point X RunCheckpoint() has 145 // returned, and all checkpoints have been properly observed to have completed (usually via a 146 // barrier), then every thread has executed a code sequence S during which it remained in a 147 // suspended state, such that the call to `RunCheckpoint` happens-before the end of S, and the 148 // beginning of S happened-before X. Thus after a RunCheckpoint() call, no preexisting 149 // thread can still be relying on global information it caches between suspend points. 150 // 151 // TODO: Is it possible to simplify mutator_lock handling here? Should this wait for completion? 152 EXPORT size_t RunCheckpoint(Closure* checkpoint_function, 153 Closure* callback = nullptr, 154 bool allow_lock_checking = true, 155 bool acquire_mutator_lock = false) 156 REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_); 157 158 // Convenience version of the above to disable lock checking inside Run function. Hopefully this 159 // and the third parameter above will eventually disappear. 160 size_t RunCheckpointUnchecked(Closure* checkpoint_function, Closure* callback = nullptr) 161 REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_) { 162 return RunCheckpoint(checkpoint_function, callback, false); 163 } 164 165 // Run an empty checkpoint on threads. Wait until threads pass the next suspend point or are 166 // suspended. This is used to ensure that the threads finish or aren't in the middle of an 167 // in-flight mutator heap access (eg. a read barrier.) Runnable threads will respond by 168 // decrementing the empty checkpoint barrier count. This works even when the weak ref access is 169 // disabled. Only one concurrent use is currently supported. 170 // TODO(b/382722942): This is intended to guarantee the analogous memory ordering property to 171 // RunCheckpoint(). It over-optimizes by always avoiding thread suspension and hence does not in 172 // fact guarantee this. (See the discussion in `mutator_gc_coord.md`.) Fix this by implementing 173 // this with RunCheckpoint() instead. 174 void RunEmptyCheckpoint() 175 REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_); 176 177 // Used to flip thread roots from from-space refs to to-space refs. Used only by the concurrent 178 // moving collectors during a GC, and hence cannot be called from multiple threads concurrently. 179 // 180 // Briefly suspends all threads to atomically install a checkpoint-like thread_flip_visitor 181 // function to be run on each thread. Run flip_callback while threads are suspended. 182 // Thread_flip_visitors are run by each thread before it becomes runnable, or by us. We do not 183 // return until all thread_flip_visitors have been run. 184 void FlipThreadRoots(Closure* thread_flip_visitor, 185 Closure* flip_callback, 186 gc::collector::GarbageCollector* collector, 187 gc::GcPauseListener* pause_listener) 188 REQUIRES(!Locks::mutator_lock_, 189 !Locks::thread_list_lock_, 190 !Locks::thread_suspend_count_lock_); 191 192 // Iterates over all the threads. 193 EXPORT void ForEach(void (*callback)(Thread*, void*), void* context) 194 REQUIRES(Locks::thread_list_lock_); 195 196 template<typename CallBack> ForEach(CallBack cb)197 void ForEach(CallBack cb) REQUIRES(Locks::thread_list_lock_) { 198 ForEach([](Thread* t, void* ctx) REQUIRES(Locks::thread_list_lock_) { 199 (*reinterpret_cast<CallBack*>(ctx))(t); 200 }, &cb); 201 } 202 203 // Add/remove current thread from list. 204 void Register(Thread* self) 205 REQUIRES(Locks::runtime_shutdown_lock_) 206 REQUIRES(!Locks::mutator_lock_, 207 !Locks::thread_list_lock_, 208 !Locks::thread_suspend_count_lock_); 209 void Unregister(Thread* self, bool should_run_callbacks) 210 REQUIRES(!Locks::mutator_lock_, 211 !Locks::thread_list_lock_, 212 !Locks::thread_suspend_count_lock_); 213 214 // Wait until there are no Unregister() requests in flight. Only makes sense when we know that 215 // no new calls can be made. e.g. because we're the last thread. 216 void WaitForUnregisterToComplete(Thread* self) REQUIRES(Locks::thread_list_lock_); 217 218 void VisitRoots(RootVisitor* visitor, VisitRootFlags flags) const 219 REQUIRES_SHARED(Locks::mutator_lock_); 220 221 void VisitRootsForSuspendedThreads(RootVisitor* visitor) 222 REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_) 223 REQUIRES_SHARED(Locks::mutator_lock_); 224 225 void VisitReflectiveTargets(ReflectiveValueVisitor* visitor) const REQUIRES(Locks::mutator_lock_); 226 227 EXPORT void SweepInterpreterCaches(IsMarkedVisitor* visitor) const 228 REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_); 229 230 void ClearInterpreterCaches() const REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_); 231 // Return a copy of the thread list. GetList()232 std::list<Thread*> GetList() REQUIRES(Locks::thread_list_lock_) { 233 return list_; 234 } 235 Size()236 size_t Size() REQUIRES(Locks::thread_list_lock_) { return list_.size(); } 237 CheckOnly1Thread(Thread * self)238 void CheckOnly1Thread(Thread* self) REQUIRES(!Locks::thread_list_lock_) { 239 MutexLock mu(self, *Locks::thread_list_lock_); 240 CHECK_EQ(Size(), 1u); 241 } 242 243 void DumpNativeStacks(std::ostream& os) 244 REQUIRES(!Locks::thread_list_lock_); 245 EmptyCheckpointBarrier()246 Barrier* EmptyCheckpointBarrier() { 247 return empty_checkpoint_barrier_.get(); 248 } 249 250 void WaitForOtherNonDaemonThreadsToExit(bool check_no_birth = true) 251 REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_, 252 !Locks::mutator_lock_); 253 254 // Wait for suspend barrier to reach zero. Return a string possibly containing diagnostic 255 // information on timeout, nothing on success. The argument t specifies a thread to monitor for 256 // the diagnostic information. If 0 is passed, we return an empty string on timeout. Normally 257 // the caller does not hold the mutator lock. See the comment at the call in 258 // RequestSynchronousCheckpoint for the only exception. 259 std::optional<std::string> WaitForSuspendBarrier(AtomicInteger* barrier, 260 pid_t t = 0, 261 int attempt_of_4 = 0) 262 REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_); 263 264 private: 265 uint32_t AllocThreadId(Thread* self); 266 void ReleaseThreadId(Thread* self, uint32_t id) REQUIRES(!Locks::allocated_thread_ids_lock_); 267 268 void DumpUnattachedThreads(std::ostream& os, bool dump_native_stack) 269 REQUIRES(!Locks::thread_list_lock_); 270 271 void SuspendAllDaemonThreadsForShutdown() 272 REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_); 273 274 void ResumeAllInternal(Thread* self) 275 REQUIRES(Locks::thread_list_lock_, Locks::thread_suspend_count_lock_) 276 UNLOCK_FUNCTION(Locks::mutator_lock_); 277 278 // Helper to actually suspend a single thread. This is called with thread_list_lock_ held and 279 // the caller guarantees that *thread is valid until that is released. We "release the mutator 280 // lock", by switching to self_state. 'attempt_of_4' is 0 if we only attempt once, and 1..4 if 281 // we are going to try 4 times with a quarter of the full timeout. 'func_name' is used only to 282 // identify ourselves for logging. 283 bool SuspendThread(Thread* self, 284 Thread* thread, 285 SuspendReason reason, 286 ThreadState self_state, 287 const char* func_name, 288 int attempt_of_4) RELEASE(Locks::thread_list_lock_) 289 RELEASE_SHARED(Locks::mutator_lock_); 290 291 void SuspendAllInternal(Thread* self, SuspendReason reason = SuspendReason::kInternal) 292 REQUIRES(!Locks::thread_list_lock_, 293 !Locks::thread_suspend_count_lock_, 294 !Locks::mutator_lock_); 295 296 void AssertOtherThreadsAreSuspended(Thread* self) 297 REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_); 298 299 std::bitset<kMaxThreadId> allocated_ids_ GUARDED_BY(Locks::allocated_thread_ids_lock_); 300 301 // The actual list of all threads. 302 std::list<Thread*> list_ GUARDED_BY(Locks::thread_list_lock_); 303 304 // Ongoing suspend all requests, used to ensure threads added to list_ respect SuspendAll, and 305 // to ensure that only one SuspendAll ot FlipThreadRoots call is active at a time. The value is 306 // always either 0 or 1. Thread_suspend_count_lock must be held continuously while these two 307 // functions modify suspend counts of all other threads and modify suspend_all_count_ . 308 int suspend_all_count_ GUARDED_BY(Locks::thread_suspend_count_lock_); 309 310 // Number of threads unregistering, ~ThreadList blocks until this hits 0. 311 int unregistering_count_ GUARDED_BY(Locks::thread_list_lock_); 312 313 // Thread suspend time histogram. Only modified when all the threads are suspended, so guarding 314 // by mutator lock ensures no thread can read when another thread is modifying it. 315 Histogram<uint64_t> suspend_all_histogram_ GUARDED_BY(Locks::mutator_lock_); 316 317 // Whether or not the current thread suspension is long. 318 bool long_suspend_; 319 320 // Whether the shutdown function has been called. This is checked in the destructor. It is an 321 // error to destroy a ThreadList instance without first calling ShutDown(). 322 bool shut_down_; 323 324 // Thread suspension timeout in nanoseconds. 325 const uint64_t thread_suspend_timeout_ns_; 326 327 std::unique_ptr<Barrier> empty_checkpoint_barrier_; 328 329 friend class Thread; 330 331 friend class Mutex; 332 friend class BaseMutex; 333 334 DISALLOW_COPY_AND_ASSIGN(ThreadList); 335 }; 336 337 // Helper for suspending all threads and getting exclusive access to the mutator lock. 338 class ScopedSuspendAll : public ValueObject { 339 public: 340 EXPORT explicit ScopedSuspendAll(const char* cause, bool long_suspend = false) 341 EXCLUSIVE_LOCK_FUNCTION(Locks::mutator_lock_) 342 REQUIRES(!Locks::thread_list_lock_, 343 !Locks::thread_suspend_count_lock_, 344 !Locks::mutator_lock_); 345 // No REQUIRES(mutator_lock_) since the unlock function already asserts this. 346 EXPORT ~ScopedSuspendAll() 347 REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_) 348 UNLOCK_FUNCTION(Locks::mutator_lock_); 349 }; 350 351 } // namespace art 352 353 #endif // ART_RUNTIME_THREAD_LIST_H_ 354