1// -*- C++ -*- 2//===----------------------------------------------------------------------===// 3// 4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5// See https://llvm.org/LICENSE.txt for license information. 6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7// 8//===----------------------------------------------------------------------===// 9 10#ifndef _LIBCPP_BARRIER 11#define _LIBCPP_BARRIER 12 13/* 14 barrier synopsis 15 16namespace std 17{ 18 19 template<class CompletionFunction = see below> 20 class barrier 21 { 22 public: 23 using arrival_token = see below; 24 25 static constexpr ptrdiff_t max() noexcept; 26 27 constexpr explicit barrier(ptrdiff_t phase_count, 28 CompletionFunction f = CompletionFunction()); 29 ~barrier(); 30 31 barrier(const barrier&) = delete; 32 barrier& operator=(const barrier&) = delete; 33 34 [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1); 35 void wait(arrival_token&& arrival) const; 36 37 void arrive_and_wait(); 38 void arrive_and_drop(); 39 40 private: 41 CompletionFunction completion; // exposition only 42 }; 43 44} 45 46*/ 47 48#include <__assert> // all public C++ headers provide the assertion handler 49#include <__atomic/atomic_base.h> 50#include <__atomic/memory_order.h> 51#include <__availability> 52#include <__config> 53#include <__memory/unique_ptr.h> 54#include <__thread/poll_with_backoff.h> 55#include <__thread/timed_backoff_policy.h> 56#include <__utility/move.h> 57#include <cstddef> 58#include <cstdint> 59#include <limits> 60#include <version> 61 62#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 63# pragma GCC system_header 64#endif 65 66#ifdef _LIBCPP_HAS_NO_THREADS 67# error "<barrier> is not supported since libc++ has been configured without support for threads." 68#endif 69 70_LIBCPP_PUSH_MACROS 71#include <__undef_macros> 72 73#if _LIBCPP_STD_VER >= 14 74 75_LIBCPP_BEGIN_NAMESPACE_STD 76 77struct __empty_completion 78{ 79 inline _LIBCPP_INLINE_VISIBILITY 80 void operator()() noexcept 81 { 82 } 83}; 84 85#ifndef _LIBCPP_HAS_NO_TREE_BARRIER 86 87/* 88 89The default implementation of __barrier_base is a classic tree barrier. 90 91It looks different from literature pseudocode for two main reasons: 92 1. Threads that call into std::barrier functions do not provide indices, 93 so a numbering step is added before the actual barrier algorithm, 94 appearing as an N+1 round to the N rounds of the tree barrier. 95 2. A great deal of attention has been paid to avoid cache line thrashing 96 by flattening the tree structure into cache-line sized arrays, that 97 are indexed in an efficient way. 98 99*/ 100 101using __barrier_phase_t = uint8_t; 102 103class __barrier_algorithm_base; 104 105_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 106__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected); 107 108_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 109bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier, 110 __barrier_phase_t __old_phase); 111 112_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI 113void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier); 114 115template<class _CompletionF> 116class __barrier_base { 117 ptrdiff_t __expected_; 118 unique_ptr<__barrier_algorithm_base, 119 void (*)(__barrier_algorithm_base*)> __base_; 120 __atomic_base<ptrdiff_t> __expected_adjustment_; 121 _CompletionF __completion_; 122 __atomic_base<__barrier_phase_t> __phase_; 123 124public: 125 using arrival_token = __barrier_phase_t; 126 127 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { 128 return numeric_limits<ptrdiff_t>::max(); 129 } 130 131 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 132 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 133 : __expected_(__expected), __base_(std::__construct_barrier_algorithm_base(this->__expected_), 134 &__destroy_barrier_algorithm_base), 135 __expected_adjustment_(0), __completion_(std::move(__completion)), __phase_(0) 136 { 137 } 138 [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 139 arrival_token arrive(ptrdiff_t __update) 140 { 141 auto const __old_phase = __phase_.load(memory_order_relaxed); 142 for(; __update; --__update) 143 if(__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) { 144 __completion_(); 145 __expected_ += __expected_adjustment_.load(memory_order_relaxed); 146 __expected_adjustment_.store(0, memory_order_relaxed); 147 __phase_.store(__old_phase + 2, memory_order_release); 148 __phase_.notify_all(); 149 } 150 return __old_phase; 151 } 152 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 153 void wait(arrival_token&& __old_phase) const 154 { 155 auto const __test_fn = [this, __old_phase]() -> bool { 156 return __phase_.load(memory_order_acquire) != __old_phase; 157 }; 158 std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 159 } 160 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 161 void arrive_and_drop() 162 { 163 __expected_adjustment_.fetch_sub(1, memory_order_relaxed); 164 (void)arrive(1); 165 } 166}; 167 168#else 169 170/* 171 172The alternative implementation of __barrier_base is a central barrier. 173 174Two versions of this algorithm are provided: 175 1. A fairly straightforward implementation of the litterature for the 176 general case where the completion function is not empty. 177 2. An optimized implementation that exploits 2's complement arithmetic 178 and well-defined overflow in atomic arithmetic, to handle the phase 179 roll-over for free. 180 181*/ 182 183template<class _CompletionF> 184class __barrier_base { 185 186 __atomic_base<ptrdiff_t> __expected; 187 __atomic_base<ptrdiff_t> __arrived; 188 _CompletionF __completion; 189 __atomic_base<bool> __phase; 190public: 191 using arrival_token = bool; 192 193 static constexpr ptrdiff_t max() noexcept { 194 return numeric_limits<ptrdiff_t>::max(); 195 } 196 197 _LIBCPP_INLINE_VISIBILITY 198 __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF()) 199 : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false) 200 { 201 } 202 [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 203 arrival_token arrive(ptrdiff_t update) 204 { 205 auto const __old_phase = __phase.load(memory_order_relaxed); 206 auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update; 207 auto const new_expected = __expected.load(memory_order_relaxed); 208 if(0 == __result) { 209 __completion(); 210 __arrived.store(new_expected, memory_order_relaxed); 211 __phase.store(!__old_phase, memory_order_release); 212 __phase.notify_all(); 213 } 214 return __old_phase; 215 } 216 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 217 void wait(arrival_token&& __old_phase) const 218 { 219 __phase.wait(__old_phase, memory_order_acquire); 220 } 221 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 222 void arrive_and_drop() 223 { 224 __expected.fetch_sub(1, memory_order_relaxed); 225 (void)arrive(1); 226 } 227}; 228 229template<> 230class __barrier_base<__empty_completion> { 231 232 static constexpr uint64_t __expected_unit = 1ull; 233 static constexpr uint64_t __arrived_unit = 1ull << 32; 234 static constexpr uint64_t __expected_mask = __arrived_unit - 1; 235 static constexpr uint64_t __phase_bit = 1ull << 63; 236 static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask; 237 238 __atomic_base<uint64_t> __phase_arrived_expected; 239 240 static _LIBCPP_INLINE_VISIBILITY 241 constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT 242 { 243 return ((uint64_t(1u << 31) - __count) << 32) 244 | (uint64_t(1u << 31) - __count); 245 } 246 247public: 248 using arrival_token = uint64_t; 249 250 static constexpr ptrdiff_t max() noexcept { 251 return ptrdiff_t(1u << 31) - 1; 252 } 253 254 _LIBCPP_INLINE_VISIBILITY 255 explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion()) 256 : __phase_arrived_expected(__init(__count)) 257 { 258 } 259 [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 260 arrival_token arrive(ptrdiff_t update) 261 { 262 auto const __inc = __arrived_unit * update; 263 auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel); 264 if((__old ^ (__old + __inc)) & __phase_bit) { 265 __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed); 266 __phase_arrived_expected.notify_all(); 267 } 268 return __old & __phase_bit; 269 } 270 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 271 void wait(arrival_token&& __phase) const 272 { 273 auto const __test_fn = [=]() -> bool { 274 uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire); 275 return ((__current & __phase_bit) != __phase); 276 }; 277 __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy()); 278 } 279 inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 280 void arrive_and_drop() 281 { 282 __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed); 283 (void)arrive(1); 284 } 285}; 286 287#endif // !_LIBCPP_HAS_NO_TREE_BARRIER 288 289template<class _CompletionF = __empty_completion> 290class barrier { 291 292 __barrier_base<_CompletionF> __b_; 293public: 294 using arrival_token = typename __barrier_base<_CompletionF>::arrival_token; 295 296 static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept { 297 return __barrier_base<_CompletionF>::max(); 298 } 299 300 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 301 barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF()) 302 : __b_(__count, _VSTD::move(__completion)) { 303 } 304 305 barrier(barrier const&) = delete; 306 barrier& operator=(barrier const&) = delete; 307 308 [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 309 arrival_token arrive(ptrdiff_t __update = 1) 310 { 311 return __b_.arrive(__update); 312 } 313 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 314 void wait(arrival_token&& __phase) const 315 { 316 __b_.wait(_VSTD::move(__phase)); 317 } 318 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 319 void arrive_and_wait() 320 { 321 wait(arrive()); 322 } 323 _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY 324 void arrive_and_drop() 325 { 326 __b_.arrive_and_drop(); 327 } 328}; 329 330_LIBCPP_END_NAMESPACE_STD 331 332#endif // _LIBCPP_STD_VER >= 14 333 334_LIBCPP_POP_MACROS 335 336#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 337# include <atomic> 338# include <concepts> 339# include <iterator> 340# include <memory> 341# include <stdexcept> 342# include <variant> 343#endif 344 345#endif //_LIBCPP_BARRIER 346