• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1// -*- C++ -*-
2//===----------------------------------------------------------------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9
10#ifndef _LIBCPP_BARRIER
11#define _LIBCPP_BARRIER
12
13/*
14    barrier synopsis
15
16namespace std
17{
18
19  template<class CompletionFunction = see below>
20  class barrier
21  {
22  public:
23    using arrival_token = see below;
24
25    static constexpr ptrdiff_t max() noexcept;
26
27    constexpr explicit barrier(ptrdiff_t phase_count,
28                               CompletionFunction f = CompletionFunction());
29    ~barrier();
30
31    barrier(const barrier&) = delete;
32    barrier& operator=(const barrier&) = delete;
33
34    [[nodiscard]] arrival_token arrive(ptrdiff_t update = 1);
35    void wait(arrival_token&& arrival) const;
36
37    void arrive_and_wait();
38    void arrive_and_drop();
39
40  private:
41    CompletionFunction completion; // exposition only
42  };
43
44}
45
46*/
47
48#include <__assert> // all public C++ headers provide the assertion handler
49#include <__atomic/atomic_base.h>
50#include <__atomic/memory_order.h>
51#include <__availability>
52#include <__config>
53#include <__memory/unique_ptr.h>
54#include <__thread/poll_with_backoff.h>
55#include <__thread/timed_backoff_policy.h>
56#include <__utility/move.h>
57#include <cstddef>
58#include <cstdint>
59#include <limits>
60#include <version>
61
62#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
63#  pragma GCC system_header
64#endif
65
66#ifdef _LIBCPP_HAS_NO_THREADS
67# error "<barrier> is not supported since libc++ has been configured without support for threads."
68#endif
69
70_LIBCPP_PUSH_MACROS
71#include <__undef_macros>
72
73#if _LIBCPP_STD_VER >= 14
74
75_LIBCPP_BEGIN_NAMESPACE_STD
76
77struct __empty_completion
78{
79    inline _LIBCPP_INLINE_VISIBILITY
80    void operator()() noexcept
81    {
82    }
83};
84
85#ifndef _LIBCPP_HAS_NO_TREE_BARRIER
86
87/*
88
89The default implementation of __barrier_base is a classic tree barrier.
90
91It looks different from literature pseudocode for two main reasons:
92 1. Threads that call into std::barrier functions do not provide indices,
93    so a numbering step is added before the actual barrier algorithm,
94    appearing as an N+1 round to the N rounds of the tree barrier.
95 2. A great deal of attention has been paid to avoid cache line thrashing
96    by flattening the tree structure into cache-line sized arrays, that
97    are indexed in an efficient way.
98
99*/
100
101using __barrier_phase_t = uint8_t;
102
103class __barrier_algorithm_base;
104
105_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
106__barrier_algorithm_base* __construct_barrier_algorithm_base(ptrdiff_t& __expected);
107
108_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
109bool __arrive_barrier_algorithm_base(__barrier_algorithm_base* __barrier,
110                                     __barrier_phase_t __old_phase);
111
112_LIBCPP_AVAILABILITY_SYNC _LIBCPP_EXPORTED_FROM_ABI
113void __destroy_barrier_algorithm_base(__barrier_algorithm_base* __barrier);
114
115template<class _CompletionF>
116class __barrier_base {
117    ptrdiff_t                                               __expected_;
118    unique_ptr<__barrier_algorithm_base,
119               void (*)(__barrier_algorithm_base*)>         __base_;
120    __atomic_base<ptrdiff_t>                                __expected_adjustment_;
121    _CompletionF                                            __completion_;
122    __atomic_base<__barrier_phase_t>                        __phase_;
123
124public:
125    using arrival_token = __barrier_phase_t;
126
127    static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept {
128        return numeric_limits<ptrdiff_t>::max();
129    }
130
131    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
132    __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
133            : __expected_(__expected), __base_(std::__construct_barrier_algorithm_base(this->__expected_),
134                                               &__destroy_barrier_algorithm_base),
135              __expected_adjustment_(0), __completion_(std::move(__completion)), __phase_(0)
136    {
137    }
138    [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
139    arrival_token arrive(ptrdiff_t __update)
140    {
141        auto const __old_phase = __phase_.load(memory_order_relaxed);
142        for(; __update; --__update)
143            if(__arrive_barrier_algorithm_base(__base_.get(), __old_phase)) {
144                __completion_();
145                __expected_ += __expected_adjustment_.load(memory_order_relaxed);
146                __expected_adjustment_.store(0, memory_order_relaxed);
147                __phase_.store(__old_phase + 2, memory_order_release);
148                __phase_.notify_all();
149            }
150        return __old_phase;
151    }
152    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
153    void wait(arrival_token&& __old_phase) const
154    {
155        auto const __test_fn = [this, __old_phase]() -> bool {
156            return __phase_.load(memory_order_acquire) != __old_phase;
157        };
158        std::__libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
159    }
160    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
161    void arrive_and_drop()
162    {
163        __expected_adjustment_.fetch_sub(1, memory_order_relaxed);
164        (void)arrive(1);
165    }
166};
167
168#else
169
170/*
171
172The alternative implementation of __barrier_base is a central barrier.
173
174Two versions of this algorithm are provided:
175 1. A fairly straightforward implementation of the litterature for the
176    general case where the completion function is not empty.
177 2. An optimized implementation that exploits 2's complement arithmetic
178    and well-defined overflow in atomic arithmetic, to handle the phase
179    roll-over for free.
180
181*/
182
183template<class _CompletionF>
184class __barrier_base {
185
186    __atomic_base<ptrdiff_t> __expected;
187    __atomic_base<ptrdiff_t> __arrived;
188    _CompletionF             __completion;
189    __atomic_base<bool>      __phase;
190public:
191    using arrival_token = bool;
192
193    static constexpr ptrdiff_t max() noexcept {
194        return numeric_limits<ptrdiff_t>::max();
195    }
196
197    _LIBCPP_INLINE_VISIBILITY
198    __barrier_base(ptrdiff_t __expected, _CompletionF __completion = _CompletionF())
199        : __expected(__expected), __arrived(__expected), __completion(std::move(__completion)), __phase(false)
200    {
201    }
202    [[nodiscard]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
203    arrival_token arrive(ptrdiff_t update)
204    {
205        auto const __old_phase = __phase.load(memory_order_relaxed);
206        auto const __result = __arrived.fetch_sub(update, memory_order_acq_rel) - update;
207        auto const new_expected = __expected.load(memory_order_relaxed);
208        if(0 == __result) {
209            __completion();
210            __arrived.store(new_expected, memory_order_relaxed);
211            __phase.store(!__old_phase, memory_order_release);
212            __phase.notify_all();
213        }
214        return __old_phase;
215    }
216    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
217    void wait(arrival_token&& __old_phase) const
218    {
219        __phase.wait(__old_phase, memory_order_acquire);
220    }
221    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
222    void arrive_and_drop()
223    {
224        __expected.fetch_sub(1, memory_order_relaxed);
225        (void)arrive(1);
226    }
227};
228
229template<>
230class __barrier_base<__empty_completion> {
231
232    static constexpr uint64_t __expected_unit = 1ull;
233    static constexpr uint64_t __arrived_unit = 1ull << 32;
234    static constexpr uint64_t __expected_mask = __arrived_unit - 1;
235    static constexpr uint64_t __phase_bit = 1ull << 63;
236    static constexpr uint64_t __arrived_mask = (__phase_bit - 1) & ~__expected_mask;
237
238    __atomic_base<uint64_t>   __phase_arrived_expected;
239
240    static _LIBCPP_INLINE_VISIBILITY
241    constexpr uint64_t __init(ptrdiff_t __count) _NOEXCEPT
242    {
243        return ((uint64_t(1u << 31) - __count) << 32)
244              | (uint64_t(1u << 31) - __count);
245    }
246
247public:
248    using arrival_token = uint64_t;
249
250    static constexpr ptrdiff_t max() noexcept {
251        return ptrdiff_t(1u << 31) - 1;
252    }
253
254    _LIBCPP_INLINE_VISIBILITY
255    explicit inline __barrier_base(ptrdiff_t __count, __empty_completion = __empty_completion())
256        : __phase_arrived_expected(__init(__count))
257    {
258    }
259    [[nodiscard]] inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
260    arrival_token arrive(ptrdiff_t update)
261    {
262        auto const __inc = __arrived_unit * update;
263        auto const __old = __phase_arrived_expected.fetch_add(__inc, memory_order_acq_rel);
264        if((__old ^ (__old + __inc)) & __phase_bit) {
265            __phase_arrived_expected.fetch_add((__old & __expected_mask) << 32, memory_order_relaxed);
266            __phase_arrived_expected.notify_all();
267        }
268        return __old & __phase_bit;
269    }
270    inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
271    void wait(arrival_token&& __phase) const
272    {
273        auto const __test_fn = [=]() -> bool {
274            uint64_t const __current = __phase_arrived_expected.load(memory_order_acquire);
275            return ((__current & __phase_bit) != __phase);
276        };
277        __libcpp_thread_poll_with_backoff(__test_fn, __libcpp_timed_backoff_policy());
278    }
279    inline _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
280    void arrive_and_drop()
281    {
282        __phase_arrived_expected.fetch_add(__expected_unit, memory_order_relaxed);
283        (void)arrive(1);
284    }
285};
286
287#endif // !_LIBCPP_HAS_NO_TREE_BARRIER
288
289template<class _CompletionF = __empty_completion>
290class barrier {
291
292    __barrier_base<_CompletionF> __b_;
293public:
294    using arrival_token = typename __barrier_base<_CompletionF>::arrival_token;
295
296    static _LIBCPP_HIDE_FROM_ABI constexpr ptrdiff_t max() noexcept {
297        return __barrier_base<_CompletionF>::max();
298    }
299
300    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
301    barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
302        : __b_(__count, _VSTD::move(__completion)) {
303    }
304
305    barrier(barrier const&) = delete;
306    barrier& operator=(barrier const&) = delete;
307
308    [[__nodiscard__]] _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
309    arrival_token arrive(ptrdiff_t __update = 1)
310    {
311        return __b_.arrive(__update);
312    }
313    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
314    void wait(arrival_token&& __phase) const
315    {
316        __b_.wait(_VSTD::move(__phase));
317    }
318    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
319    void arrive_and_wait()
320    {
321        wait(arrive());
322    }
323    _LIBCPP_AVAILABILITY_SYNC _LIBCPP_INLINE_VISIBILITY
324    void arrive_and_drop()
325    {
326        __b_.arrive_and_drop();
327    }
328};
329
330_LIBCPP_END_NAMESPACE_STD
331
332#endif // _LIBCPP_STD_VER >= 14
333
334_LIBCPP_POP_MACROS
335
336#if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20
337#  include <atomic>
338#  include <concepts>
339#  include <iterator>
340#  include <memory>
341#  include <stdexcept>
342#  include <variant>
343#endif
344
345#endif //_LIBCPP_BARRIER
346