1 //===-- llvm/Support/Threading.h - Control multithreading mode --*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file declares helper functions for running LLVM in a multi-threaded 10 // environment. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_SUPPORT_THREADING_H 15 #define LLVM_SUPPORT_THREADING_H 16 17 #include "llvm/ADT/BitVector.h" 18 #include "llvm/ADT/FunctionExtras.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/Config/llvm-config.h" // for LLVM_ON_UNIX 21 #include "llvm/Support/Compiler.h" 22 #include <ciso646> // So we can check the C++ standard lib macros. 23 #include <functional> 24 25 #if defined(_MSC_VER) 26 // MSVC's call_once implementation worked since VS 2015, which is the minimum 27 // supported version as of this writing. 28 #define LLVM_THREADING_USE_STD_CALL_ONCE 1 29 #elif defined(LLVM_ON_UNIX) && \ 30 (defined(_LIBCPP_VERSION) || \ 31 !(defined(__NetBSD__) || defined(__OpenBSD__) || \ 32 (defined(__ppc__) || defined(__PPC__)))) 33 // std::call_once from libc++ is used on all Unix platforms. Other 34 // implementations like libstdc++ are known to have problems on NetBSD, 35 // OpenBSD and PowerPC. 36 #define LLVM_THREADING_USE_STD_CALL_ONCE 1 37 #elif defined(LLVM_ON_UNIX) && \ 38 ((defined(__ppc__) || defined(__PPC__)) && defined(__LITTLE_ENDIAN__)) 39 #define LLVM_THREADING_USE_STD_CALL_ONCE 1 40 #else 41 #define LLVM_THREADING_USE_STD_CALL_ONCE 0 42 #endif 43 44 #if LLVM_THREADING_USE_STD_CALL_ONCE 45 #include <mutex> 46 #else 47 #include "llvm/Support/Atomic.h" 48 #endif 49 50 namespace llvm { 51 class Twine; 52 53 /// Returns true if LLVM is compiled with support for multi-threading, and 54 /// false otherwise. 55 bool llvm_is_multithreaded(); 56 57 /// Execute the given \p UserFn on a separate thread, passing it the provided \p 58 /// UserData and waits for thread completion. 59 /// 60 /// This function does not guarantee that the code will actually be executed 61 /// on a separate thread or honoring the requested stack size, but tries to do 62 /// so where system support is available. 63 /// 64 /// \param UserFn - The callback to execute. 65 /// \param UserData - An argument to pass to the callback function. 66 /// \param StackSizeInBytes - A requested size (in bytes) for the thread stack 67 /// (or None for default) 68 void llvm_execute_on_thread( 69 void (*UserFn)(void *), void *UserData, 70 llvm::Optional<unsigned> StackSizeInBytes = llvm::None); 71 72 /// Schedule the given \p Func for execution on a separate thread, then return 73 /// to the caller immediately. Roughly equivalent to 74 /// `std::thread(Func).detach()`, except it allows requesting a specific stack 75 /// size, if supported for the platform. 76 /// 77 /// This function would report a fatal error if it can't execute the code 78 /// on a separate thread. 79 /// 80 /// \param Func - The callback to execute. 81 /// \param StackSizeInBytes - A requested size (in bytes) for the thread stack 82 /// (or None for default) 83 void llvm_execute_on_thread_async( 84 llvm::unique_function<void()> Func, 85 llvm::Optional<unsigned> StackSizeInBytes = llvm::None); 86 87 #if LLVM_THREADING_USE_STD_CALL_ONCE 88 89 typedef std::once_flag once_flag; 90 91 #else 92 93 enum InitStatus { Uninitialized = 0, Wait = 1, Done = 2 }; 94 95 /// The llvm::once_flag structure 96 /// 97 /// This type is modeled after std::once_flag to use with llvm::call_once. 98 /// This structure must be used as an opaque object. It is a struct to force 99 /// autoinitialization and behave like std::once_flag. 100 struct once_flag { 101 volatile sys::cas_flag status = Uninitialized; 102 }; 103 104 #endif 105 106 /// Execute the function specified as a parameter once. 107 /// 108 /// Typical usage: 109 /// \code 110 /// void foo() {...}; 111 /// ... 112 /// static once_flag flag; 113 /// call_once(flag, foo); 114 /// \endcode 115 /// 116 /// \param flag Flag used for tracking whether or not this has run. 117 /// \param F Function to call once. 118 template <typename Function, typename... Args> call_once(once_flag & flag,Function && F,Args &&...ArgList)119 void call_once(once_flag &flag, Function &&F, Args &&... ArgList) { 120 #if LLVM_THREADING_USE_STD_CALL_ONCE 121 std::call_once(flag, std::forward<Function>(F), 122 std::forward<Args>(ArgList)...); 123 #else 124 // For other platforms we use a generic (if brittle) version based on our 125 // atomics. 126 sys::cas_flag old_val = sys::CompareAndSwap(&flag.status, Wait, Uninitialized); 127 if (old_val == Uninitialized) { 128 std::forward<Function>(F)(std::forward<Args>(ArgList)...); 129 sys::MemoryFence(); 130 TsanIgnoreWritesBegin(); 131 TsanHappensBefore(&flag.status); 132 flag.status = Done; 133 TsanIgnoreWritesEnd(); 134 } else { 135 // Wait until any thread doing the call has finished. 136 sys::cas_flag tmp = flag.status; 137 sys::MemoryFence(); 138 while (tmp != Done) { 139 tmp = flag.status; 140 sys::MemoryFence(); 141 } 142 } 143 TsanHappensAfter(&flag.status); 144 #endif 145 } 146 147 /// This tells how a thread pool will be used 148 class ThreadPoolStrategy { 149 public: 150 // The default value (0) means all available threads should be used, 151 // taking the affinity mask into account. If set, this value only represents 152 // a suggested high bound, the runtime might choose a lower value (not 153 // higher). 154 unsigned ThreadsRequested = 0; 155 156 // If SMT is active, use hyper threads. If false, there will be only one 157 // std::thread per core. 158 bool UseHyperThreads = true; 159 160 // If set, will constrain 'ThreadsRequested' to the number of hardware 161 // threads, or hardware cores. 162 bool Limit = false; 163 164 /// Retrieves the max available threads for the current strategy. This 165 /// accounts for affinity masks and takes advantage of all CPU sockets. 166 unsigned compute_thread_count() const; 167 168 /// Assign the current thread to an ideal hardware CPU or NUMA node. In a 169 /// multi-socket system, this ensures threads are assigned to all CPU 170 /// sockets. \p ThreadPoolNum represents a number bounded by [0, 171 /// compute_thread_count()). 172 void apply_thread_strategy(unsigned ThreadPoolNum) const; 173 174 /// Finds the CPU socket where a thread should go. Returns 'None' if the 175 /// thread shall remain on the actual CPU socket. 176 Optional<unsigned> compute_cpu_socket(unsigned ThreadPoolNum) const; 177 }; 178 179 /// Build a strategy from a number of threads as a string provided in \p Num. 180 /// When Num is above the max number of threads specified by the \p Default 181 /// strategy, we attempt to equally allocate the threads on all CPU sockets. 182 /// "0" or an empty string will return the \p Default strategy. 183 /// "all" for using all hardware threads. 184 Optional<ThreadPoolStrategy> 185 get_threadpool_strategy(StringRef Num, ThreadPoolStrategy Default = {}); 186 187 /// Returns a thread strategy for tasks requiring significant memory or other 188 /// resources. To be used for workloads where hardware_concurrency() proves to 189 /// be less efficient. Avoid this strategy if doing lots of I/O. Currently 190 /// based on physical cores, if available for the host system, otherwise falls 191 /// back to hardware_concurrency(). Returns 1 when LLVM is configured with 192 /// LLVM_ENABLE_THREADS = OFF. 193 inline ThreadPoolStrategy 194 heavyweight_hardware_concurrency(unsigned ThreadCount = 0) { 195 ThreadPoolStrategy S; 196 S.UseHyperThreads = false; 197 S.ThreadsRequested = ThreadCount; 198 return S; 199 } 200 201 /// Like heavyweight_hardware_concurrency() above, but builds a strategy 202 /// based on the rules described for get_threadpool_strategy(). 203 /// If \p Num is invalid, returns a default strategy where one thread per 204 /// hardware core is used. heavyweight_hardware_concurrency(StringRef Num)205 inline ThreadPoolStrategy heavyweight_hardware_concurrency(StringRef Num) { 206 Optional<ThreadPoolStrategy> S = 207 get_threadpool_strategy(Num, heavyweight_hardware_concurrency()); 208 if (S) 209 return *S; 210 return heavyweight_hardware_concurrency(); 211 } 212 213 /// Returns a default thread strategy where all available hardware resources 214 /// are to be used, except for those initially excluded by an affinity mask. 215 /// This function takes affinity into consideration. Returns 1 when LLVM is 216 /// configured with LLVM_ENABLE_THREADS=OFF. 217 inline ThreadPoolStrategy hardware_concurrency(unsigned ThreadCount = 0) { 218 ThreadPoolStrategy S; 219 S.ThreadsRequested = ThreadCount; 220 return S; 221 } 222 223 /// Returns an optimal thread strategy to execute specified amount of tasks. 224 /// This strategy should prevent us from creating too many threads if we 225 /// occasionaly have an unexpectedly small amount of tasks. 226 inline ThreadPoolStrategy optimal_concurrency(unsigned TaskCount = 0) { 227 ThreadPoolStrategy S; 228 S.Limit = true; 229 S.ThreadsRequested = TaskCount; 230 return S; 231 } 232 233 /// Return the current thread id, as used in various OS system calls. 234 /// Note that not all platforms guarantee that the value returned will be 235 /// unique across the entire system, so portable code should not assume 236 /// this. 237 uint64_t get_threadid(); 238 239 /// Get the maximum length of a thread name on this platform. 240 /// A value of 0 means there is no limit. 241 uint32_t get_max_thread_name_length(); 242 243 /// Set the name of the current thread. Setting a thread's name can 244 /// be helpful for enabling useful diagnostics under a debugger or when 245 /// logging. The level of support for setting a thread's name varies 246 /// wildly across operating systems, and we only make a best effort to 247 /// perform the operation on supported platforms. No indication of success 248 /// or failure is returned. 249 void set_thread_name(const Twine &Name); 250 251 /// Get the name of the current thread. The level of support for 252 /// getting a thread's name varies wildly across operating systems, and it 253 /// is not even guaranteed that if you can successfully set a thread's name 254 /// that you can later get it back. This function is intended for diagnostic 255 /// purposes, and as with setting a thread's name no indication of whether 256 /// the operation succeeded or failed is returned. 257 void get_thread_name(SmallVectorImpl<char> &Name); 258 259 /// Returns a mask that represents on which hardware thread, core, CPU, NUMA 260 /// group, the calling thread can be executed. On Windows, threads cannot 261 /// cross CPU sockets boundaries. 262 llvm::BitVector get_thread_affinity_mask(); 263 264 /// Returns how many physical CPUs or NUMA groups the system has. 265 unsigned get_cpus(); 266 267 enum class ThreadPriority { 268 Background = 0, 269 Default = 1, 270 }; 271 /// If priority is Background tries to lower current threads priority such 272 /// that it does not affect foreground tasks significantly. Can be used for 273 /// long-running, latency-insensitive tasks to make sure cpu is not hogged by 274 /// this task. 275 /// If the priority is default tries to restore current threads priority to 276 /// default scheduling priority. 277 enum class SetThreadPriorityResult { FAILURE, SUCCESS }; 278 SetThreadPriorityResult set_thread_priority(ThreadPriority Priority); 279 } 280 281 #endif 282