1 #ifndef Py_ATOMIC_H
2 #define Py_ATOMIC_H
3 #ifdef Py_BUILD_CORE
4
5 #include "dynamic_annotations.h"
6
7 #include "pyconfig.h"
8
9 #if defined(HAVE_STD_ATOMIC)
10 #include <stdatomic.h>
11 #endif
12
13
14 #if defined(_MSC_VER)
15 #include <intrin.h>
16 #include <immintrin.h>
17 #endif
18
19 /* This is modeled after the atomics interface from C1x, according to
20 * the draft at
21 * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf.
22 * Operations and types are named the same except with a _Py_ prefix
23 * and have the same semantics.
24 *
25 * Beware, the implementations here are deep magic.
26 */
27
28 #if defined(HAVE_STD_ATOMIC)
29
30 typedef enum _Py_memory_order {
31 _Py_memory_order_relaxed = memory_order_relaxed,
32 _Py_memory_order_acquire = memory_order_acquire,
33 _Py_memory_order_release = memory_order_release,
34 _Py_memory_order_acq_rel = memory_order_acq_rel,
35 _Py_memory_order_seq_cst = memory_order_seq_cst
36 } _Py_memory_order;
37
38 typedef struct _Py_atomic_address {
39 atomic_uintptr_t _value;
40 } _Py_atomic_address;
41
42 typedef struct _Py_atomic_int {
43 atomic_int _value;
44 } _Py_atomic_int;
45
46 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
47 atomic_signal_fence(ORDER)
48
49 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
50 atomic_thread_fence(ORDER)
51
52 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
53 atomic_store_explicit(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER)
54
55 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
56 atomic_load_explicit(&(ATOMIC_VAL)->_value, ORDER)
57
58 /* Use builtin atomic operations in GCC >= 4.7 */
59 #elif defined(HAVE_BUILTIN_ATOMIC)
60
61 typedef enum _Py_memory_order {
62 _Py_memory_order_relaxed = __ATOMIC_RELAXED,
63 _Py_memory_order_acquire = __ATOMIC_ACQUIRE,
64 _Py_memory_order_release = __ATOMIC_RELEASE,
65 _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL,
66 _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST
67 } _Py_memory_order;
68
69 typedef struct _Py_atomic_address {
70 uintptr_t _value;
71 } _Py_atomic_address;
72
73 typedef struct _Py_atomic_int {
74 int _value;
75 } _Py_atomic_int;
76
77 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \
78 __atomic_signal_fence(ORDER)
79
80 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \
81 __atomic_thread_fence(ORDER)
82
83 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
84 (assert((ORDER) == __ATOMIC_RELAXED \
85 || (ORDER) == __ATOMIC_SEQ_CST \
86 || (ORDER) == __ATOMIC_RELEASE), \
87 __atomic_store_n(&(ATOMIC_VAL)->_value, NEW_VAL, ORDER))
88
89 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
90 (assert((ORDER) == __ATOMIC_RELAXED \
91 || (ORDER) == __ATOMIC_SEQ_CST \
92 || (ORDER) == __ATOMIC_ACQUIRE \
93 || (ORDER) == __ATOMIC_CONSUME), \
94 __atomic_load_n(&(ATOMIC_VAL)->_value, ORDER))
95
96 /* Only support GCC (for expression statements) and x86 (for simple
97 * atomic semantics) and MSVC x86/x64/ARM */
98 #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64))
99 typedef enum _Py_memory_order {
100 _Py_memory_order_relaxed,
101 _Py_memory_order_acquire,
102 _Py_memory_order_release,
103 _Py_memory_order_acq_rel,
104 _Py_memory_order_seq_cst
105 } _Py_memory_order;
106
107 typedef struct _Py_atomic_address {
108 uintptr_t _value;
109 } _Py_atomic_address;
110
111 typedef struct _Py_atomic_int {
112 int _value;
113 } _Py_atomic_int;
114
115
116 static __inline__ void
_Py_atomic_signal_fence(_Py_memory_order order)117 _Py_atomic_signal_fence(_Py_memory_order order)
118 {
119 if (order != _Py_memory_order_relaxed)
120 __asm__ volatile("":::"memory");
121 }
122
123 static __inline__ void
_Py_atomic_thread_fence(_Py_memory_order order)124 _Py_atomic_thread_fence(_Py_memory_order order)
125 {
126 if (order != _Py_memory_order_relaxed)
127 __asm__ volatile("mfence":::"memory");
128 }
129
130 /* Tell the race checker about this operation's effects. */
131 static __inline__ void
_Py_ANNOTATE_MEMORY_ORDER(const volatile void * address,_Py_memory_order order)132 _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order)
133 {
134 (void)address; /* shut up -Wunused-parameter */
135 switch(order) {
136 case _Py_memory_order_release:
137 case _Py_memory_order_acq_rel:
138 case _Py_memory_order_seq_cst:
139 _Py_ANNOTATE_HAPPENS_BEFORE(address);
140 break;
141 case _Py_memory_order_relaxed:
142 case _Py_memory_order_acquire:
143 break;
144 }
145 switch(order) {
146 case _Py_memory_order_acquire:
147 case _Py_memory_order_acq_rel:
148 case _Py_memory_order_seq_cst:
149 _Py_ANNOTATE_HAPPENS_AFTER(address);
150 break;
151 case _Py_memory_order_relaxed:
152 case _Py_memory_order_release:
153 break;
154 }
155 }
156
157 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
158 __extension__ ({ \
159 __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
160 __typeof__(atomic_val->_value) new_val = NEW_VAL;\
161 volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \
162 _Py_memory_order order = ORDER; \
163 _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
164 \
165 /* Perform the operation. */ \
166 _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \
167 switch(order) { \
168 case _Py_memory_order_release: \
169 _Py_atomic_signal_fence(_Py_memory_order_release); \
170 /* fallthrough */ \
171 case _Py_memory_order_relaxed: \
172 *volatile_data = new_val; \
173 break; \
174 \
175 case _Py_memory_order_acquire: \
176 case _Py_memory_order_acq_rel: \
177 case _Py_memory_order_seq_cst: \
178 __asm__ volatile("xchg %0, %1" \
179 : "+r"(new_val) \
180 : "m"(atomic_val->_value) \
181 : "memory"); \
182 break; \
183 } \
184 _Py_ANNOTATE_IGNORE_WRITES_END(); \
185 })
186
187 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
188 __extension__ ({ \
189 __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \
190 __typeof__(atomic_val->_value) result; \
191 volatile __typeof__(result) *volatile_data = &atomic_val->_value; \
192 _Py_memory_order order = ORDER; \
193 _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \
194 \
195 /* Perform the operation. */ \
196 _Py_ANNOTATE_IGNORE_READS_BEGIN(); \
197 switch(order) { \
198 case _Py_memory_order_release: \
199 case _Py_memory_order_acq_rel: \
200 case _Py_memory_order_seq_cst: \
201 /* Loads on x86 are not releases by default, so need a */ \
202 /* thread fence. */ \
203 _Py_atomic_thread_fence(_Py_memory_order_release); \
204 break; \
205 default: \
206 /* No fence */ \
207 break; \
208 } \
209 result = *volatile_data; \
210 switch(order) { \
211 case _Py_memory_order_acquire: \
212 case _Py_memory_order_acq_rel: \
213 case _Py_memory_order_seq_cst: \
214 /* Loads on x86 are automatically acquire operations so */ \
215 /* can get by with just a compiler fence. */ \
216 _Py_atomic_signal_fence(_Py_memory_order_acquire); \
217 break; \
218 default: \
219 /* No fence */ \
220 break; \
221 } \
222 _Py_ANNOTATE_IGNORE_READS_END(); \
223 result; \
224 })
225
226 #elif defined(_MSC_VER)
227 /* _Interlocked* functions provide a full memory barrier and are therefore
228 enough for acq_rel and seq_cst. If the HLE variants aren't available
229 in hardware they will fall back to a full memory barrier as well.
230
231 This might affect performance but likely only in some very specific and
232 hard to meassure scenario.
233 */
234 #if defined(_M_IX86) || defined(_M_X64)
235 typedef enum _Py_memory_order {
236 _Py_memory_order_relaxed,
237 _Py_memory_order_acquire,
238 _Py_memory_order_release,
239 _Py_memory_order_acq_rel,
240 _Py_memory_order_seq_cst
241 } _Py_memory_order;
242
243 typedef struct _Py_atomic_address {
244 volatile uintptr_t _value;
245 } _Py_atomic_address;
246
247 typedef struct _Py_atomic_int {
248 volatile int _value;
249 } _Py_atomic_int;
250
251
252 #if defined(_M_X64)
253 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
254 switch (ORDER) { \
255 case _Py_memory_order_acquire: \
256 _InterlockedExchange64_HLEAcquire((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
257 break; \
258 case _Py_memory_order_release: \
259 _InterlockedExchange64_HLERelease((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
260 break; \
261 default: \
262 _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
263 break; \
264 }
265 #else
266 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
267 #endif
268
269 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
270 switch (ORDER) { \
271 case _Py_memory_order_acquire: \
272 _InterlockedExchange_HLEAcquire((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
273 break; \
274 case _Py_memory_order_release: \
275 _InterlockedExchange_HLERelease((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
276 break; \
277 default: \
278 _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
279 break; \
280 }
281
282 #if defined(_M_X64)
283 /* This has to be an intptr_t for now.
284 gil_created() uses -1 as a sentinel value, if this returns
285 a uintptr_t it will do an unsigned compare and crash
286 */
_Py_atomic_load_64bit(volatile uintptr_t * value,int order)287 inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
288 __int64 old;
289 switch (order) {
290 case _Py_memory_order_acquire:
291 {
292 do {
293 old = *value;
294 } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old);
295 break;
296 }
297 case _Py_memory_order_release:
298 {
299 do {
300 old = *value;
301 } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old);
302 break;
303 }
304 case _Py_memory_order_relaxed:
305 old = *value;
306 break;
307 default:
308 {
309 do {
310 old = *value;
311 } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old);
312 break;
313 }
314 }
315 return old;
316 }
317
318 #else
319 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
320 #endif
321
_Py_atomic_load_32bit(volatile int * value,int order)322 inline int _Py_atomic_load_32bit(volatile int* value, int order) {
323 long old;
324 switch (order) {
325 case _Py_memory_order_acquire:
326 {
327 do {
328 old = *value;
329 } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old);
330 break;
331 }
332 case _Py_memory_order_release:
333 {
334 do {
335 old = *value;
336 } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old);
337 break;
338 }
339 case _Py_memory_order_relaxed:
340 old = *value;
341 break;
342 default:
343 {
344 do {
345 old = *value;
346 } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old);
347 break;
348 }
349 }
350 return old;
351 }
352
353 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
354 if (sizeof(*ATOMIC_VAL._value) == 8) { \
355 _Py_atomic_store_64bit((volatile long long*)ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
356 _Py_atomic_store_32bit((volatile long*)ATOMIC_VAL._value, NEW_VAL, ORDER) }
357
358 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
359 ( \
360 sizeof(*(ATOMIC_VAL._value)) == 8 ? \
361 _Py_atomic_load_64bit((volatile long long*)ATOMIC_VAL._value, ORDER) : \
362 _Py_atomic_load_32bit((volatile long*)ATOMIC_VAL._value, ORDER) \
363 )
364 #elif defined(_M_ARM) || defined(_M_ARM64)
365 typedef enum _Py_memory_order {
366 _Py_memory_order_relaxed,
367 _Py_memory_order_acquire,
368 _Py_memory_order_release,
369 _Py_memory_order_acq_rel,
370 _Py_memory_order_seq_cst
371 } _Py_memory_order;
372
373 typedef struct _Py_atomic_address {
374 volatile uintptr_t _value;
375 } _Py_atomic_address;
376
377 typedef struct _Py_atomic_int {
378 volatile int _value;
379 } _Py_atomic_int;
380
381
382 #if defined(_M_ARM64)
383 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \
384 switch (ORDER) { \
385 case _Py_memory_order_acquire: \
386 _InterlockedExchange64_acq((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
387 break; \
388 case _Py_memory_order_release: \
389 _InterlockedExchange64_rel((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
390 break; \
391 default: \
392 _InterlockedExchange64((__int64 volatile*)ATOMIC_VAL, (__int64)NEW_VAL); \
393 break; \
394 }
395 #else
396 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0);
397 #endif
398
399 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \
400 switch (ORDER) { \
401 case _Py_memory_order_acquire: \
402 _InterlockedExchange_acq((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
403 break; \
404 case _Py_memory_order_release: \
405 _InterlockedExchange_rel((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
406 break; \
407 default: \
408 _InterlockedExchange((volatile long*)ATOMIC_VAL, (int)NEW_VAL); \
409 break; \
410 }
411
412 #if defined(_M_ARM64)
413 /* This has to be an intptr_t for now.
414 gil_created() uses -1 as a sentinel value, if this returns
415 a uintptr_t it will do an unsigned compare and crash
416 */
_Py_atomic_load_64bit(volatile uintptr_t * value,int order)417 inline intptr_t _Py_atomic_load_64bit(volatile uintptr_t* value, int order) {
418 uintptr_t old;
419 switch (order) {
420 case _Py_memory_order_acquire:
421 {
422 do {
423 old = *value;
424 } while(_InterlockedCompareExchange64_acq(value, old, old) != old);
425 break;
426 }
427 case _Py_memory_order_release:
428 {
429 do {
430 old = *value;
431 } while(_InterlockedCompareExchange64_rel(value, old, old) != old);
432 break;
433 }
434 case _Py_memory_order_relaxed:
435 old = *value;
436 break;
437 default:
438 {
439 do {
440 old = *value;
441 } while(_InterlockedCompareExchange64(value, old, old) != old);
442 break;
443 }
444 }
445 return old;
446 }
447
448 #else
449 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) *ATOMIC_VAL
450 #endif
451
_Py_atomic_load_32bit(volatile int * value,int order)452 inline int _Py_atomic_load_32bit(volatile int* value, int order) {
453 int old;
454 switch (order) {
455 case _Py_memory_order_acquire:
456 {
457 do {
458 old = *value;
459 } while(_InterlockedCompareExchange_acq(value, old, old) != old);
460 break;
461 }
462 case _Py_memory_order_release:
463 {
464 do {
465 old = *value;
466 } while(_InterlockedCompareExchange_rel(value, old, old) != old);
467 break;
468 }
469 case _Py_memory_order_relaxed:
470 old = *value;
471 break;
472 default:
473 {
474 do {
475 old = *value;
476 } while(_InterlockedCompareExchange(value, old, old) != old);
477 break;
478 }
479 }
480 return old;
481 }
482
483 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
484 if (sizeof(*ATOMIC_VAL._value) == 8) { \
485 _Py_atomic_store_64bit(ATOMIC_VAL._value, NEW_VAL, ORDER) } else { \
486 _Py_atomic_store_32bit(ATOMIC_VAL._value, NEW_VAL, ORDER) }
487
488 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
489 ( \
490 sizeof(*(ATOMIC_VAL._value)) == 8 ? \
491 _Py_atomic_load_64bit(ATOMIC_VAL._value, ORDER) : \
492 _Py_atomic_load_32bit(ATOMIC_VAL._value, ORDER) \
493 )
494 #endif
495 #else /* !gcc x86 !_msc_ver */
496 typedef enum _Py_memory_order {
497 _Py_memory_order_relaxed,
498 _Py_memory_order_acquire,
499 _Py_memory_order_release,
500 _Py_memory_order_acq_rel,
501 _Py_memory_order_seq_cst
502 } _Py_memory_order;
503
504 typedef struct _Py_atomic_address {
505 uintptr_t _value;
506 } _Py_atomic_address;
507
508 typedef struct _Py_atomic_int {
509 int _value;
510 } _Py_atomic_int;
511 /* Fall back to other compilers and processors by assuming that simple
512 volatile accesses are atomic. This is false, so people should port
513 this. */
514 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0)
515 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0)
516 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \
517 ((ATOMIC_VAL)->_value = NEW_VAL)
518 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \
519 ((ATOMIC_VAL)->_value)
520 #endif
521
522 /* Standardized shortcuts. */
523 #define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \
524 _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_seq_cst)
525 #define _Py_atomic_load(ATOMIC_VAL) \
526 _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_seq_cst)
527
528 /* Python-local extensions */
529
530 #define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \
531 _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, _Py_memory_order_relaxed)
532 #define _Py_atomic_load_relaxed(ATOMIC_VAL) \
533 _Py_atomic_load_explicit(ATOMIC_VAL, _Py_memory_order_relaxed)
534 #endif /* Py_BUILD_CORE */
535 #endif /* Py_ATOMIC_H */
536