• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2011 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_RUNTIME_THREAD_LIST_H_
18 #define ART_RUNTIME_THREAD_LIST_H_
19 
20 #include <bitset>
21 #include <list>
22 #include <vector>
23 
24 #include "barrier.h"
25 #include "base/histogram.h"
26 #include "base/mutex.h"
27 #include "base/macros.h"
28 #include "base/value_object.h"
29 #include "jni.h"
30 #include "reflective_handle_scope.h"
31 #include "suspend_reason.h"
32 #include "thread_state.h"
33 
34 namespace art HIDDEN {
35 namespace gc {
36 namespace collector {
37 class GarbageCollector;
38 }  // namespace collector
39 class GcPauseListener;
40 }  // namespace gc
41 class Closure;
42 class IsMarkedVisitor;
43 class RootVisitor;
44 class Thread;
45 class TimingLogger;
46 enum VisitRootFlags : uint8_t;
47 
48 class ThreadList {
49  public:
50   static constexpr uint32_t kMaxThreadId = 0xFFFF;
51   static constexpr uint32_t kInvalidThreadId = 0;
52   static constexpr uint32_t kMainThreadId = 1;
53   static constexpr uint64_t kDefaultThreadSuspendTimeout =
54       kIsDebugBuild ? 2'000'000'000ull : 4'000'000'000ull;
55   // We fail more aggressively in debug builds to catch potential issues early.
56   // The number of times we may retry when we find ourselves in a suspend-unfriendly state.
57   static constexpr int kMaxSuspendRetries = kIsDebugBuild ? 500 : 5000;
58   static constexpr useconds_t kThreadSuspendSleepUs = 100;
59 
60   explicit ThreadList(uint64_t thread_suspend_timeout_ns);
61   ~ThreadList();
62 
63   void ShutDown();
64 
65   // Dump stacks for all threads.
66   // This version includes some additional data.
67   void DumpForSigQuit(std::ostream& os) REQUIRES(!Locks::thread_list_lock_, !Locks::mutator_lock_);
68 
69   // This version is less jank-prone if mutator_lock_ is not held.
70   EXPORT void Dump(std::ostream& os, bool dump_native_stack = true)
71       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
72 
73   pid_t GetLockOwner();  // For SignalCatcher.
74 
75   // Thread suspension support.
76   EXPORT void ResumeAll()
77       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_)
78       UNLOCK_FUNCTION(Locks::mutator_lock_);
79   EXPORT bool Resume(Thread* thread, SuspendReason reason = SuspendReason::kInternal)
80       REQUIRES(!Locks::thread_suspend_count_lock_) WARN_UNUSED;
81 
82   // Suspends all other threads and gets exclusive access to the mutator lock.
83   // If long_suspend is true, then other threads who try to suspend will never timeout.
84   // long_suspend is currenly used for hprof since large heaps take a long time.
85   EXPORT void SuspendAll(const char* cause, bool long_suspend = false)
86       EXCLUSIVE_LOCK_FUNCTION(Locks::mutator_lock_)
87       REQUIRES(!Locks::thread_list_lock_,
88                !Locks::thread_suspend_count_lock_,
89                !Locks::mutator_lock_);
90 
91   // Suspend a thread using a peer, typically used by the debugger. Returns the thread on success,
92   // else null. The peer is used to identify the thread to avoid races with the thread terminating.
93   EXPORT Thread* SuspendThreadByPeer(jobject peer, SuspendReason reason)
94       REQUIRES(!Locks::mutator_lock_,
95                !Locks::thread_list_lock_,
96                !Locks::thread_suspend_count_lock_);
97 
98   // Suspend a thread using its thread id, typically used by lock/monitor inflation. Returns the
99   // thread on success else null. The thread id is used to identify the thread to avoid races with
100   // the thread terminating. Note that as thread ids are recycled this may not suspend the expected
101   // thread, that may be terminating. 'attempt_of_4' is zero if this is the only
102   // attempt, or 1..4 to try 4 times with fractional timeouts.
103   // TODO: Reconsider the use of thread_id, now that we have ThreadExitFlag.
104   Thread* SuspendThreadByThreadId(uint32_t thread_id, SuspendReason reason, int attempt_of_4 = 0)
105       REQUIRES(!Locks::mutator_lock_,
106                !Locks::thread_list_lock_,
107                !Locks::thread_suspend_count_lock_);
108 
109   // Find an existing thread (or self) by its thread id (not tid).
110   EXPORT Thread* FindThreadByThreadId(uint32_t thread_id) REQUIRES(Locks::thread_list_lock_);
111 
112   // Find an existing thread (or self) by its tid (not thread id).
113   Thread* FindThreadByTid(int tid) REQUIRES(Locks::thread_list_lock_);
114 
115   // Does the thread list still contain the given thread, or one at the same address?
116   // Used by Monitor to provide (mostly accurate) debugging information.
117   bool Contains(Thread* thread) REQUIRES(Locks::thread_list_lock_);
118 
119   // Run a checkpoint on all threads. Return the total number of threads for which the checkpoint
120   // function has been or will be called.
121   //
122   // Running threads are not suspended but run the checkpoint inside of the suspend check. The
123   // return value includes already suspended threads for b/24191051. Runs or requests the
124   // callback, if non-null, inside the thread_list_lock critical section after capturing the list
125   // of threads needing to run the checkpoint.
126   //
127   // Does not wait for completion of the checkpoint function in running threads.
128   //
129   // If the caller holds the mutator lock, or acquire_mutator_lock is true, then all instances of
130   // the checkpoint function are run with the mutator lock. Otherwise, since the checkpoint code
131   // may not acquire or release the mutator lock, the checkpoint will have no way to access Java
132   // data.
133   //
134   // If acquire_mutator_lock is true, it may be acquired repeatedly to avoid holding it for an
135   // extended period without checking for suspension requests.
136   //
137   // We capture a set of threads that simultaneously existed at one point in time, and ensure that
138   // they all run the checkpoint function. We make no guarantees about threads created after this
139   // set of threads was captured. If newly created threads require the effect of the checkpoint,
140   // the caller may update global state indicating that this is necessary, and newly created
141   // threads must act on that. It is possible that on return there will be threads which have not,
142   // and will not, run the checkpoint_function, and neither have/will any of their ancestors.
143   //
144   // We guarantee that if a thread calls RunCheckpoint() then, if at point X RunCheckpoint() has
145   // returned, and all checkpoints have been properly observed to have completed (usually via a
146   // barrier), then every thread has executed a code sequence S during which it remained in a
147   // suspended state, such that the call to `RunCheckpoint` happens-before the end of S, and the
148   // beginning of S happened-before X.  Thus after a RunCheckpoint() call, no preexisting
149   // thread can still be relying on global information it caches between suspend points.
150   //
151   // TODO: Is it possible to simplify mutator_lock handling here? Should this wait for completion?
152   EXPORT size_t RunCheckpoint(Closure* checkpoint_function,
153                               Closure* callback = nullptr,
154                               bool allow_lock_checking = true,
155                               bool acquire_mutator_lock = false)
156       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
157 
158   // Convenience version of the above to disable lock checking inside Run function. Hopefully this
159   // and the third parameter above will eventually disappear.
160   size_t RunCheckpointUnchecked(Closure* checkpoint_function, Closure* callback = nullptr)
161       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_) {
162     return RunCheckpoint(checkpoint_function, callback, false);
163   }
164 
165   // Run an empty checkpoint on threads. Wait until threads pass the next suspend point or are
166   // suspended. This is used to ensure that the threads finish or aren't in the middle of an
167   // in-flight mutator heap access (eg. a read barrier.) Runnable threads will respond by
168   // decrementing the empty checkpoint barrier count. This works even when the weak ref access is
169   // disabled. Only one concurrent use is currently supported.
170   // TODO(b/382722942): This is intended to guarantee the analogous memory ordering property to
171   // RunCheckpoint(). It over-optimizes by always avoiding thread suspension and hence does not in
172   // fact guarantee this. (See the discussion in `mutator_gc_coord.md`.) Fix this by implementing
173   // this with RunCheckpoint() instead.
174   void RunEmptyCheckpoint()
175       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
176 
177   // Used to flip thread roots from from-space refs to to-space refs. Used only by the concurrent
178   // moving collectors during a GC, and hence cannot be called from multiple threads concurrently.
179   //
180   // Briefly suspends all threads to atomically install a checkpoint-like thread_flip_visitor
181   // function to be run on each thread. Run flip_callback while threads are suspended.
182   // Thread_flip_visitors are run by each thread before it becomes runnable, or by us. We do not
183   // return until all thread_flip_visitors have been run.
184   void FlipThreadRoots(Closure* thread_flip_visitor,
185                        Closure* flip_callback,
186                        gc::collector::GarbageCollector* collector,
187                        gc::GcPauseListener* pause_listener)
188       REQUIRES(!Locks::mutator_lock_,
189                !Locks::thread_list_lock_,
190                !Locks::thread_suspend_count_lock_);
191 
192   // Iterates over all the threads.
193   EXPORT void ForEach(void (*callback)(Thread*, void*), void* context)
194       REQUIRES(Locks::thread_list_lock_);
195 
196   template<typename CallBack>
ForEach(CallBack cb)197   void ForEach(CallBack cb) REQUIRES(Locks::thread_list_lock_) {
198     ForEach([](Thread* t, void* ctx) REQUIRES(Locks::thread_list_lock_) {
199       (*reinterpret_cast<CallBack*>(ctx))(t);
200     }, &cb);
201   }
202 
203   // Add/remove current thread from list.
204   void Register(Thread* self)
205       REQUIRES(Locks::runtime_shutdown_lock_)
206       REQUIRES(!Locks::mutator_lock_,
207                !Locks::thread_list_lock_,
208                !Locks::thread_suspend_count_lock_);
209   void Unregister(Thread* self, bool should_run_callbacks)
210       REQUIRES(!Locks::mutator_lock_,
211                !Locks::thread_list_lock_,
212                !Locks::thread_suspend_count_lock_);
213 
214   // Wait until there are no Unregister() requests in flight. Only makes sense when we know that
215   // no new calls can be made. e.g. because we're the last thread.
216   void WaitForUnregisterToComplete(Thread* self) REQUIRES(Locks::thread_list_lock_);
217 
218   void VisitRoots(RootVisitor* visitor, VisitRootFlags flags) const
219       REQUIRES_SHARED(Locks::mutator_lock_);
220 
221   void VisitRootsForSuspendedThreads(RootVisitor* visitor)
222       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_)
223       REQUIRES_SHARED(Locks::mutator_lock_);
224 
225   void VisitReflectiveTargets(ReflectiveValueVisitor* visitor) const REQUIRES(Locks::mutator_lock_);
226 
227   EXPORT void SweepInterpreterCaches(IsMarkedVisitor* visitor) const
228       REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_);
229 
230   void ClearInterpreterCaches() const REQUIRES(Locks::mutator_lock_, !Locks::thread_list_lock_);
231   // Return a copy of the thread list.
GetList()232   std::list<Thread*> GetList() REQUIRES(Locks::thread_list_lock_) {
233     return list_;
234   }
235 
Size()236   size_t Size() REQUIRES(Locks::thread_list_lock_) { return list_.size(); }
237 
CheckOnly1Thread(Thread * self)238   void CheckOnly1Thread(Thread* self) REQUIRES(!Locks::thread_list_lock_) {
239     MutexLock mu(self, *Locks::thread_list_lock_);
240     CHECK_EQ(Size(), 1u);
241   }
242 
243   void DumpNativeStacks(std::ostream& os)
244       REQUIRES(!Locks::thread_list_lock_);
245 
EmptyCheckpointBarrier()246   Barrier* EmptyCheckpointBarrier() {
247     return empty_checkpoint_barrier_.get();
248   }
249 
250   void WaitForOtherNonDaemonThreadsToExit(bool check_no_birth = true)
251       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_,
252                !Locks::mutator_lock_);
253 
254   // Wait for suspend barrier to reach zero. Return a string possibly containing diagnostic
255   // information on timeout, nothing on success.  The argument t specifies a thread to monitor for
256   // the diagnostic information. If 0 is passed, we return an empty string on timeout.  Normally
257   // the caller does not hold the mutator lock. See the comment at the call in
258   // RequestSynchronousCheckpoint for the only exception.
259   std::optional<std::string> WaitForSuspendBarrier(AtomicInteger* barrier,
260                                                    pid_t t = 0,
261                                                    int attempt_of_4 = 0)
262       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
263 
264  private:
265   uint32_t AllocThreadId(Thread* self);
266   void ReleaseThreadId(Thread* self, uint32_t id) REQUIRES(!Locks::allocated_thread_ids_lock_);
267 
268   void DumpUnattachedThreads(std::ostream& os, bool dump_native_stack)
269       REQUIRES(!Locks::thread_list_lock_);
270 
271   void SuspendAllDaemonThreadsForShutdown()
272       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
273 
274   void ResumeAllInternal(Thread* self)
275       REQUIRES(Locks::thread_list_lock_, Locks::thread_suspend_count_lock_)
276           UNLOCK_FUNCTION(Locks::mutator_lock_);
277 
278   // Helper to actually suspend a single thread. This is called with thread_list_lock_ held and
279   // the caller guarantees that *thread is valid until that is released.  We "release the mutator
280   // lock", by switching to self_state.  'attempt_of_4' is 0 if we only attempt once, and 1..4 if
281   // we are going to try 4 times with a quarter of the full timeout. 'func_name' is used only to
282   // identify ourselves for logging.
283   bool SuspendThread(Thread* self,
284                      Thread* thread,
285                      SuspendReason reason,
286                      ThreadState self_state,
287                      const char* func_name,
288                      int attempt_of_4) RELEASE(Locks::thread_list_lock_)
289       RELEASE_SHARED(Locks::mutator_lock_);
290 
291   void SuspendAllInternal(Thread* self, SuspendReason reason = SuspendReason::kInternal)
292       REQUIRES(!Locks::thread_list_lock_,
293                !Locks::thread_suspend_count_lock_,
294                !Locks::mutator_lock_);
295 
296   void AssertOtherThreadsAreSuspended(Thread* self)
297       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
298 
299   std::bitset<kMaxThreadId> allocated_ids_ GUARDED_BY(Locks::allocated_thread_ids_lock_);
300 
301   // The actual list of all threads.
302   std::list<Thread*> list_ GUARDED_BY(Locks::thread_list_lock_);
303 
304   // Ongoing suspend all requests, used to ensure threads added to list_ respect SuspendAll, and
305   // to ensure that only one SuspendAll ot FlipThreadRoots call is active at a time.  The value is
306   // always either 0 or 1. Thread_suspend_count_lock must be held continuously while these two
307   // functions modify suspend counts of all other threads and modify suspend_all_count_ .
308   int suspend_all_count_ GUARDED_BY(Locks::thread_suspend_count_lock_);
309 
310   // Number of threads unregistering, ~ThreadList blocks until this hits 0.
311   int unregistering_count_ GUARDED_BY(Locks::thread_list_lock_);
312 
313   // Thread suspend time histogram. Only modified when all the threads are suspended, so guarding
314   // by mutator lock ensures no thread can read when another thread is modifying it.
315   Histogram<uint64_t> suspend_all_histogram_ GUARDED_BY(Locks::mutator_lock_);
316 
317   // Whether or not the current thread suspension is long.
318   bool long_suspend_;
319 
320   // Whether the shutdown function has been called. This is checked in the destructor. It is an
321   // error to destroy a ThreadList instance without first calling ShutDown().
322   bool shut_down_;
323 
324   // Thread suspension timeout in nanoseconds.
325   const uint64_t thread_suspend_timeout_ns_;
326 
327   std::unique_ptr<Barrier> empty_checkpoint_barrier_;
328 
329   friend class Thread;
330 
331   friend class Mutex;
332   friend class BaseMutex;
333 
334   DISALLOW_COPY_AND_ASSIGN(ThreadList);
335 };
336 
337 // Helper for suspending all threads and getting exclusive access to the mutator lock.
338 class ScopedSuspendAll : public ValueObject {
339  public:
340   EXPORT explicit ScopedSuspendAll(const char* cause, bool long_suspend = false)
341      EXCLUSIVE_LOCK_FUNCTION(Locks::mutator_lock_)
342      REQUIRES(!Locks::thread_list_lock_,
343               !Locks::thread_suspend_count_lock_,
344               !Locks::mutator_lock_);
345   // No REQUIRES(mutator_lock_) since the unlock function already asserts this.
346   EXPORT ~ScopedSuspendAll()
347       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_)
348       UNLOCK_FUNCTION(Locks::mutator_lock_);
349 };
350 
351 }  // namespace art
352 
353 #endif  // ART_RUNTIME_THREAD_LIST_H_
354