• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  #ifndef Py_ATOMIC_H
2  #define Py_ATOMIC_H
3  #ifdef __cplusplus
4  extern "C" {
5  #endif
6  
7  #ifndef Py_BUILD_CORE
8  #  error "this header requires Py_BUILD_CORE define"
9  #endif
10  
11  #include "dynamic_annotations.h"   /* _Py_ANNOTATE_MEMORY_ORDER */
12  #include "pyconfig.h"
13  
14  #ifdef HAVE_STD_ATOMIC
15  #  include <stdatomic.h>
16  #endif
17  
18  
19  #if defined(_MSC_VER)
20  #include <intrin.h>
21  #if defined(_M_IX86) || defined(_M_X64)
22  #  include <immintrin.h>
23  #endif
24  #endif
25  
26  /* This is modeled after the atomics interface from C1x, according to
27   * the draft at
28   * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
29   * Operations and types are named the same except with a _Py_ prefix
30   * and have the same semantics.
31   *
32   * Beware, the implementations here are deep magic.
33   */
34  
35  #if defined(HAVE_STD_ATOMIC)
36  
37  typedef enum _Py_memory_order {
38      _Py_memory_order_relaxed = memory_order_relaxed,
39      _Py_memory_order_acquire = memory_order_acquire,
40      _Py_memory_order_release = memory_order_release,
41      _Py_memory_order_acq_rel = memory_order_acq_rel,
42      _Py_memory_order_seq_cst = memory_order_seq_cst
43  } _Py_memory_order;
44  
45  typedef struct _Py_atomic_address {
46      atomic_uintptr_t _value;
47  } _Py_atomic_address;
48  
49  typedef struct _Py_atomic_int {
50      atomic_int _value;
51  } _Py_atomic_int;
52  
53  #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
54      atomic_signal_fence(ORDER)
55  
56  #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
57      atomic_thread_fence(ORDER)
58  
59  #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
60      atomic_store_explicit(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER)
61  
62  #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
63      atomic_load_explicit(&((ATOMIC_VAL)->_value), ORDER)
64  
65  // Use builtin atomic operations in GCC >= 4.7 and clang
66  #elif defined(HAVE_BUILTIN_ATOMIC)
67  
68  typedef enum _Py_memory_order {
69      _Py_memory_order_relaxed = __ATOMIC_RELAXED,
70      _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
71      _Py_memory_order_release = __ATOMIC_RELEASE,
72      _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
73      _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
74  } _Py_memory_order;
75  
76  typedef struct _Py_atomic_address {
77      uintptr_t _value;
78  } _Py_atomic_address;
79  
80  typedef struct _Py_atomic_int {
81      int _value;
82  } _Py_atomic_int;
83  
84  #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
85      __atomic_signal_fence(ORDER)
86  
87  #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
88      __atomic_thread_fence(ORDER)
89  
90  #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
91      (assert((ORDER) == __ATOMIC_RELAXED                       \
92              || (ORDER) == __ATOMIC_SEQ_CST                    \
93              || (ORDER) == __ATOMIC_RELEASE),                  \
94       __atomic_store_n(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER))
95  
96  #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER)           \
97      (assert((ORDER) == __ATOMIC_RELAXED                       \
98              || (ORDER) == __ATOMIC_SEQ_CST                    \
99              || (ORDER) == __ATOMIC_ACQUIRE                    \
100              || (ORDER) == __ATOMIC_CONSUME),                  \
101       __atomic_load_n(&((ATOMIC_VAL)->_value), ORDER))
102  
103  /* Only support GCC (for expression statements) and x86 (for simple
104   * atomic semantics) and MSVC x86/x64/ARM */
105  #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
106  typedef enum _Py_memory_order {
107      _Py_memory_order_relaxed,
108      _Py_memory_order_acquire,
109      _Py_memory_order_release,
110      _Py_memory_order_acq_rel,
111      _Py_memory_order_seq_cst
112  } _Py_memory_order;
113  
114  typedef struct _Py_atomic_address {
115      uintptr_t _value;
116  } _Py_atomic_address;
117  
118  typedef struct _Py_atomic_int {
119      int _value;
120  } _Py_atomic_int;
121  
122  
123  static __inline__ void
124  _Py_atomic_signal_fence(_Py_memory_order order)
125  {
126      if (order != _Py_memory_order_relaxed)
127          __asm__ volatile("":::"memory");
128  }
129  
130  static __inline__ void
131  _Py_atomic_thread_fence(_Py_memory_order order)
132  {
133      if (order != _Py_memory_order_relaxed)
134          __asm__ volatile("mfence":::"memory");
135  }
136  
137  /* Tell the race checker about this operation's effects. */
138  static __inline__ void
139  _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
140  {
141      (void)address;              /* shut up -Wunused-parameter */
142      switch(order) {
143      case _Py_memory_order_release:
144      case _Py_memory_order_acq_rel:
145      case _Py_memory_order_seq_cst:
146          _Py_ANNOTATE_HAPPENS_BEFORE(address);
147          break;
148      case _Py_memory_order_relaxed:
149      case _Py_memory_order_acquire:
150          break;
151      }
152      switch(order) {
153      case _Py_memory_order_acquire:
154      case _Py_memory_order_acq_rel:
155      case _Py_memory_order_seq_cst:
156          _Py_ANNOTATE_HAPPENS_AFTER(address);
157          break;
158      case _Py_memory_order_relaxed:
159      case _Py_memory_order_release:
160          break;
161      }
162  }
163  
164  #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
165      __extension__ ({ \
166          __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
167          __typeof__(atomic_val->_value) new_val = NEW_VAL;\
168          volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
169          _Py_memory_order order = ORDER; \
170          _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
171          \
172          /* Perform the operation. */ \
173          _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
174          switch(order) { \
175          case _Py_memory_order_release: \
176              _Py_atomic_signal_fence(_Py_memory_order_release); \
177              /* fallthrough */ \
178          case _Py_memory_order_relaxed: \
179              *volatile_data = new_val; \
180              break; \
181          \
182          case _Py_memory_order_acquire: \
183          case _Py_memory_order_acq_rel: \
184          case _Py_memory_order_seq_cst: \
185              __asm__ volatile("xchg %0, %1" \
186                           : "+r"(new_val) \
187                           : "m"(atomic_val->_value) \
188                           : "memory"); \
189              break; \
190          } \
191          _Py_ANNOTATE_IGNORE_WRITES_END(); \
192      })
193  
194  #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
195      __extension__ ({  \
196          __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
197          __typeof__(atomic_val->_value) result; \
198          volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
199          _Py_memory_order order = ORDER; \
200          _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
201          \
202          /* Perform the operation. */ \
203          _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
204          switch(order) { \
205          case _Py_memory_order_release: \
206          case _Py_memory_order_acq_rel: \
207          case _Py_memory_order_seq_cst: \
208              /* Loads on x86 are not releases by default, so need a */ \
209              /* thread fence. */ \
210              _Py_atomic_thread_fence(_Py_memory_order_release); \
211              break; \
212          default: \
213              /* No fence */ \
214              break; \
215          } \
216          result = *volatile_data; \
217          switch(order) { \
218          case _Py_memory_order_acquire: \
219          case _Py_memory_order_acq_rel: \
220          case _Py_memory_order_seq_cst: \
221              /* Loads on x86 are automatically acquire operations so */ \
222              /* can get by with just a compiler fence. */ \
223              _Py_atomic_signal_fence(_Py_memory_order_acquire); \
224              break; \
225          default: \
226              /* No fence */ \
227              break; \
228          } \
229          _Py_ANNOTATE_IGNORE_READS_END(); \
230          result; \
231      })
232  
233  #elif defined(_MSC_VER)
234  /*  _Interlocked* functions provide a full memory barrier and are therefore
235      enough for acq_rel and seq_cst. If the HLE variants aren't available
236      in hardware they will fall back to a full memory barrier as well.
237  
238      This might affect performance but likely only in some very specific and
239      hard to meassure scenario.
240  */
241  #if defined(_M_IX86) || defined(_M_X64)
242  typedef enum _Py_memory_order {
243      _Py_memory_order_relaxed,
244      _Py_memory_order_acquire,
245      _Py_memory_order_release,
246      _Py_memory_order_acq_rel,
247      _Py_memory_order_seq_cst
248  } _Py_memory_order;
249  
250  typedef struct _Py_atomic_address {
251      volatile uintptr_t _value;
252  } _Py_atomic_address;
253  
254  typedef struct _Py_atomic_int {
255      volatile int _value;
256  } _Py_atomic_int;
257  
258  
259  #if defined(_M_X64)
260  #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
261      switch (ORDER) { \
262      case _Py_memory_order_acquire: \
263        _InterlockedExchange64_HLEAcquire((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
264        break; \
265      case _Py_memory_order_release: \
266        _InterlockedExchange64_HLERelease((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
267        break; \
268      default: \
269        _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \
270        break; \
271    }
272  #else
273  #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
274  #endif
275  
276  #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
277    switch (ORDER) { \
278    case _Py_memory_order_acquire: \
279      _InterlockedExchange_HLEAcquire((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
280      break; \
281    case _Py_memory_order_release: \
282      _InterlockedExchange_HLERelease((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
283      break; \
284    default: \
285      _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \
286      break; \
287    }
288  
289  #if defined(_M_X64)
290  /*  This has to be an intptr_t for now.
291      gil_created() uses -1 as a sentinel value, if this returns
292      a uintptr_t it will do an unsigned compare and crash
293  */
294  inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
295      __int64 old;
296      switch (order) {
297      case _Py_memory_order_acquire:
298      {
299        do {
300          old = *value;
301        } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
302        break;
303      }
304      case _Py_memory_order_release:
305      {
306        do {
307          old = *value;
308        } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
309        break;
310      }
311      case _Py_memory_order_relaxed:
312        old = *value;
313        break;
314      default:
315      {
316        do {
317          old = *value;
318        } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
319        break;
320      }
321      }
322      return old;
323  }
324  
325  #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
326      _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
327  
328  #else
329  #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
330  #endif
331  
332  inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
333      long old;
334      switch (order) {
335      case _Py_memory_order_acquire:
336      {
337        do {
338          old = *value;
339        } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
340        break;
341      }
342      case _Py_memory_order_release:
343      {
344        do {
345          old = *value;
346        } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
347        break;
348      }
349      case _Py_memory_order_relaxed:
350        old = *value;
351        break;
352      default:
353      {
354        do {
355          old = *value;
356        } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
357        break;
358      }
359      }
360      return old;
361  }
362  
363  #define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
364      _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
365  
366  #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
367    if (sizeof((ATOMIC_VAL)->_value) == 8) { \
368      _Py_atomic_store_64bit((ATOMIC_VAL), NEW_VAL, ORDER) } else { \
369      _Py_atomic_store_32bit((ATOMIC_VAL), NEW_VAL, ORDER) }
370  
371  #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
372    ( \
373      sizeof((ATOMIC_VAL)->_value) == 8 ? \
374      _Py_atomic_load_64bit((ATOMIC_VAL), ORDER) : \
375      _Py_atomic_load_32bit((ATOMIC_VAL), ORDER) \
376    )
377  #elif defined(_M_ARM) || defined(_M_ARM64)
378  typedef enum _Py_memory_order {
379      _Py_memory_order_relaxed,
380      _Py_memory_order_acquire,
381      _Py_memory_order_release,
382      _Py_memory_order_acq_rel,
383      _Py_memory_order_seq_cst
384  } _Py_memory_order;
385  
386  typedef struct _Py_atomic_address {
387      volatile uintptr_t _value;
388  } _Py_atomic_address;
389  
390  typedef struct _Py_atomic_int {
391      volatile int _value;
392  } _Py_atomic_int;
393  
394  
395  #if defined(_M_ARM64)
396  #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
397      switch (ORDER) { \
398      case _Py_memory_order_acquire: \
399        _InterlockedExchange64_acq((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
400        break; \
401      case _Py_memory_order_release: \
402        _InterlockedExchange64_rel((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
403        break; \
404      default: \
405        _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \
406        break; \
407    }
408  #else
409  #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
410  #endif
411  
412  #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
413    switch (ORDER) { \
414    case _Py_memory_order_acquire: \
415      _InterlockedExchange_acq((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
416      break; \
417    case _Py_memory_order_release: \
418      _InterlockedExchange_rel((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
419      break; \
420    default: \
421      _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \
422      break; \
423    }
424  
425  #if defined(_M_ARM64)
426  /*  This has to be an intptr_t for now.
427      gil_created() uses -1 as a sentinel value, if this returns
428      a uintptr_t it will do an unsigned compare and crash
429  */
430  inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) {
431      uintptr_t old;
432      switch (order) {
433      case _Py_memory_order_acquire:
434      {
435        do {
436          old = *value;
437        } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
438        break;
439      }
440      case _Py_memory_order_release:
441      {
442        do {
443          old = *value;
444        } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
445        break;
446      }
447      case _Py_memory_order_relaxed:
448        old = *value;
449        break;
450      default:
451      {
452        do {
453          old = *value;
454        } while(_InterlockedCompareExchange64(value, old, old) != old);
455        break;
456      }
457      }
458      return old;
459  }
460  
461  #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \
462      _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER))
463  
464  #else
465  #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value)
466  #endif
467  
468  inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) {
469      int old;
470      switch (order) {
471      case _Py_memory_order_acquire:
472      {
473        do {
474          old = *value;
475        } while(_InterlockedCompareExchange_acq(value, old, old) != old);
476        break;
477      }
478      case _Py_memory_order_release:
479      {
480        do {
481          old = *value;
482        } while(_InterlockedCompareExchange_rel(value, old, old) != old);
483        break;
484      }
485      case _Py_memory_order_relaxed:
486        old = *value;
487        break;
488      default:
489      {
490        do {
491          old = *value;
492        } while(_InterlockedCompareExchange(value, old, old) != old);
493        break;
494      }
495      }
496      return old;
497  }
498  
499  #define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \
500      _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER))
501  
502  #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
503    if (sizeof((ATOMIC_VAL)->_value) == 8) { \
504      _Py_atomic_store_64bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) } else { \
505      _Py_atomic_store_32bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) }
506  
507  #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
508    ( \
509      sizeof((ATOMIC_VAL)->_value) == 8 ? \
510      _Py_atomic_load_64bit((ATOMIC_VAL), (ORDER)) : \
511      _Py_atomic_load_32bit((ATOMIC_VAL), (ORDER)) \
512    )
513  #endif
514  #else  /* !gcc x86  !_msc_ver */
515  typedef enum _Py_memory_order {
516      _Py_memory_order_relaxed,
517      _Py_memory_order_acquire,
518      _Py_memory_order_release,
519      _Py_memory_order_acq_rel,
520      _Py_memory_order_seq_cst
521  } _Py_memory_order;
522  
523  typedef struct _Py_atomic_address {
524      uintptr_t _value;
525  } _Py_atomic_address;
526  
527  typedef struct _Py_atomic_int {
528      int _value;
529  } _Py_atomic_int;
530  /* Fall back to other compilers and processors by assuming that simple
531     volatile accesses are atomic.  This is false, so people should port
532     this. */
533  #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
534  #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
535  #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
536      ((ATOMIC_VAL)->_value = NEW_VAL)
537  #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
538      ((ATOMIC_VAL)->_value)
539  #endif
540  
541  /* Standardized shortcuts. */
542  #define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
543      _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_seq_cst)
544  #define _Py_atomic_load(ATOMIC_VAL) \
545      _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_seq_cst)
546  
547  /* Python-local extensions */
548  
549  #define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
550      _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_relaxed)
551  #define _Py_atomic_load_relaxed(ATOMIC_VAL) \
552      _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_relaxed)
553  
554  #ifdef __cplusplus
555  }
556  #endif
557  #endif  /* Py_ATOMIC_H */
558