1 #ifndef Py_ATOMIC_H 2 #define Py_ATOMIC_H 3 #ifdef __cplusplus 4 extern "C" { 5 #endif 6 7 #ifndef Py_BUILD_CORE 8 # error "this header requires Py_BUILD_CORE define" 9 #endif 10 11 #include "dynamic_annotations.h" /* _Py_ANNOTATE_MEMORY_ORDER */ 12 #include "pyconfig.h" 13 14 #ifdef HAVE_STD_ATOMIC 15 # include <stdatomic.h> 16 #endif 17 18 19 #if defined(_MSC_VER) 20 #include <intrin.h> 21 #if defined(_M_IX86) || defined(_M_X64) 22 # include <immintrin.h> 23 #endif 24 #endif 25 26 /* This is modeled after the atomics interface from C1x, according to 27 * the draft at 28 * http://www.open-std.org/JTC1/SC22/wg14/www/docs/n1425.pdf. 29 * Operations and types are named the same except with a _Py_ prefix 30 * and have the same semantics. 31 * 32 * Beware, the implementations here are deep magic. 33 */ 34 35 #if defined(HAVE_STD_ATOMIC) 36 37 typedef enum _Py_memory_order { 38 _Py_memory_order_relaxed = memory_order_relaxed, 39 _Py_memory_order_acquire = memory_order_acquire, 40 _Py_memory_order_release = memory_order_release, 41 _Py_memory_order_acq_rel = memory_order_acq_rel, 42 _Py_memory_order_seq_cst = memory_order_seq_cst 43 } _Py_memory_order; 44 45 typedef struct _Py_atomic_address { 46 atomic_uintptr_t _value; 47 } _Py_atomic_address; 48 49 typedef struct _Py_atomic_int { 50 atomic_int _value; 51 } _Py_atomic_int; 52 53 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \ 54 atomic_signal_fence(ORDER) 55 56 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \ 57 atomic_thread_fence(ORDER) 58 59 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 60 atomic_store_explicit(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER) 61 62 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 63 atomic_load_explicit(&((ATOMIC_VAL)->_value), ORDER) 64 65 // Use builtin atomic operations in GCC >= 4.7 and clang 66 #elif defined(HAVE_BUILTIN_ATOMIC) 67 68 typedef enum _Py_memory_order { 69 _Py_memory_order_relaxed = __ATOMIC_RELAXED, 70 _Py_memory_order_acquire = __ATOMIC_ACQUIRE, 71 _Py_memory_order_release = __ATOMIC_RELEASE, 72 _Py_memory_order_acq_rel = __ATOMIC_ACQ_REL, 73 _Py_memory_order_seq_cst = __ATOMIC_SEQ_CST 74 } _Py_memory_order; 75 76 typedef struct _Py_atomic_address { 77 uintptr_t _value; 78 } _Py_atomic_address; 79 80 typedef struct _Py_atomic_int { 81 int _value; 82 } _Py_atomic_int; 83 84 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) \ 85 __atomic_signal_fence(ORDER) 86 87 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) \ 88 __atomic_thread_fence(ORDER) 89 90 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 91 (assert((ORDER) == __ATOMIC_RELAXED \ 92 || (ORDER) == __ATOMIC_SEQ_CST \ 93 || (ORDER) == __ATOMIC_RELEASE), \ 94 __atomic_store_n(&((ATOMIC_VAL)->_value), NEW_VAL, ORDER)) 95 96 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 97 (assert((ORDER) == __ATOMIC_RELAXED \ 98 || (ORDER) == __ATOMIC_SEQ_CST \ 99 || (ORDER) == __ATOMIC_ACQUIRE \ 100 || (ORDER) == __ATOMIC_CONSUME), \ 101 __atomic_load_n(&((ATOMIC_VAL)->_value), ORDER)) 102 103 /* Only support GCC (for expression statements) and x86 (for simple 104 * atomic semantics) and MSVC x86/x64/ARM */ 105 #elif defined(__GNUC__) && (defined(__i386__) || defined(__amd64)) 106 typedef enum _Py_memory_order { 107 _Py_memory_order_relaxed, 108 _Py_memory_order_acquire, 109 _Py_memory_order_release, 110 _Py_memory_order_acq_rel, 111 _Py_memory_order_seq_cst 112 } _Py_memory_order; 113 114 typedef struct _Py_atomic_address { 115 uintptr_t _value; 116 } _Py_atomic_address; 117 118 typedef struct _Py_atomic_int { 119 int _value; 120 } _Py_atomic_int; 121 122 123 static __inline__ void 124 _Py_atomic_signal_fence(_Py_memory_order order) 125 { 126 if (order != _Py_memory_order_relaxed) 127 __asm__ volatile("":::"memory"); 128 } 129 130 static __inline__ void 131 _Py_atomic_thread_fence(_Py_memory_order order) 132 { 133 if (order != _Py_memory_order_relaxed) 134 __asm__ volatile("mfence":::"memory"); 135 } 136 137 /* Tell the race checker about this operation's effects. */ 138 static __inline__ void 139 _Py_ANNOTATE_MEMORY_ORDER(const volatile void *address, _Py_memory_order order) 140 { 141 (void)address; /* shut up -Wunused-parameter */ 142 switch(order) { 143 case _Py_memory_order_release: 144 case _Py_memory_order_acq_rel: 145 case _Py_memory_order_seq_cst: 146 _Py_ANNOTATE_HAPPENS_BEFORE(address); 147 break; 148 case _Py_memory_order_relaxed: 149 case _Py_memory_order_acquire: 150 break; 151 } 152 switch(order) { 153 case _Py_memory_order_acquire: 154 case _Py_memory_order_acq_rel: 155 case _Py_memory_order_seq_cst: 156 _Py_ANNOTATE_HAPPENS_AFTER(address); 157 break; 158 case _Py_memory_order_relaxed: 159 case _Py_memory_order_release: 160 break; 161 } 162 } 163 164 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 165 __extension__ ({ \ 166 __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \ 167 __typeof__(atomic_val->_value) new_val = NEW_VAL;\ 168 volatile __typeof__(new_val) *volatile_data = &atomic_val->_value; \ 169 _Py_memory_order order = ORDER; \ 170 _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \ 171 \ 172 /* Perform the operation. */ \ 173 _Py_ANNOTATE_IGNORE_WRITES_BEGIN(); \ 174 switch(order) { \ 175 case _Py_memory_order_release: \ 176 _Py_atomic_signal_fence(_Py_memory_order_release); \ 177 /* fallthrough */ \ 178 case _Py_memory_order_relaxed: \ 179 *volatile_data = new_val; \ 180 break; \ 181 \ 182 case _Py_memory_order_acquire: \ 183 case _Py_memory_order_acq_rel: \ 184 case _Py_memory_order_seq_cst: \ 185 __asm__ volatile("xchg %0, %1" \ 186 : "+r"(new_val) \ 187 : "m"(atomic_val->_value) \ 188 : "memory"); \ 189 break; \ 190 } \ 191 _Py_ANNOTATE_IGNORE_WRITES_END(); \ 192 }) 193 194 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 195 __extension__ ({ \ 196 __typeof__(ATOMIC_VAL) atomic_val = ATOMIC_VAL; \ 197 __typeof__(atomic_val->_value) result; \ 198 volatile __typeof__(result) *volatile_data = &atomic_val->_value; \ 199 _Py_memory_order order = ORDER; \ 200 _Py_ANNOTATE_MEMORY_ORDER(atomic_val, order); \ 201 \ 202 /* Perform the operation. */ \ 203 _Py_ANNOTATE_IGNORE_READS_BEGIN(); \ 204 switch(order) { \ 205 case _Py_memory_order_release: \ 206 case _Py_memory_order_acq_rel: \ 207 case _Py_memory_order_seq_cst: \ 208 /* Loads on x86 are not releases by default, so need a */ \ 209 /* thread fence. */ \ 210 _Py_atomic_thread_fence(_Py_memory_order_release); \ 211 break; \ 212 default: \ 213 /* No fence */ \ 214 break; \ 215 } \ 216 result = *volatile_data; \ 217 switch(order) { \ 218 case _Py_memory_order_acquire: \ 219 case _Py_memory_order_acq_rel: \ 220 case _Py_memory_order_seq_cst: \ 221 /* Loads on x86 are automatically acquire operations so */ \ 222 /* can get by with just a compiler fence. */ \ 223 _Py_atomic_signal_fence(_Py_memory_order_acquire); \ 224 break; \ 225 default: \ 226 /* No fence */ \ 227 break; \ 228 } \ 229 _Py_ANNOTATE_IGNORE_READS_END(); \ 230 result; \ 231 }) 232 233 #elif defined(_MSC_VER) 234 /* _Interlocked* functions provide a full memory barrier and are therefore 235 enough for acq_rel and seq_cst. If the HLE variants aren't available 236 in hardware they will fall back to a full memory barrier as well. 237 238 This might affect performance but likely only in some very specific and 239 hard to meassure scenario. 240 */ 241 #if defined(_M_IX86) || defined(_M_X64) 242 typedef enum _Py_memory_order { 243 _Py_memory_order_relaxed, 244 _Py_memory_order_acquire, 245 _Py_memory_order_release, 246 _Py_memory_order_acq_rel, 247 _Py_memory_order_seq_cst 248 } _Py_memory_order; 249 250 typedef struct _Py_atomic_address { 251 volatile uintptr_t _value; 252 } _Py_atomic_address; 253 254 typedef struct _Py_atomic_int { 255 volatile int _value; 256 } _Py_atomic_int; 257 258 259 #if defined(_M_X64) 260 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ 261 switch (ORDER) { \ 262 case _Py_memory_order_acquire: \ 263 _InterlockedExchange64_HLEAcquire((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \ 264 break; \ 265 case _Py_memory_order_release: \ 266 _InterlockedExchange64_HLERelease((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \ 267 break; \ 268 default: \ 269 _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)(NEW_VAL)); \ 270 break; \ 271 } 272 #else 273 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); 274 #endif 275 276 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ 277 switch (ORDER) { \ 278 case _Py_memory_order_acquire: \ 279 _InterlockedExchange_HLEAcquire((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \ 280 break; \ 281 case _Py_memory_order_release: \ 282 _InterlockedExchange_HLERelease((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \ 283 break; \ 284 default: \ 285 _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)(NEW_VAL)); \ 286 break; \ 287 } 288 289 #if defined(_M_X64) 290 /* This has to be an intptr_t for now. 291 gil_created() uses -1 as a sentinel value, if this returns 292 a uintptr_t it will do an unsigned compare and crash 293 */ 294 inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) { 295 __int64 old; 296 switch (order) { 297 case _Py_memory_order_acquire: 298 { 299 do { 300 old = *value; 301 } while(_InterlockedCompareExchange64_HLEAcquire((volatile __int64*)value, old, old) != old); 302 break; 303 } 304 case _Py_memory_order_release: 305 { 306 do { 307 old = *value; 308 } while(_InterlockedCompareExchange64_HLERelease((volatile __int64*)value, old, old) != old); 309 break; 310 } 311 case _Py_memory_order_relaxed: 312 old = *value; 313 break; 314 default: 315 { 316 do { 317 old = *value; 318 } while(_InterlockedCompareExchange64((volatile __int64*)value, old, old) != old); 319 break; 320 } 321 } 322 return old; 323 } 324 325 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \ 326 _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER)) 327 328 #else 329 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value) 330 #endif 331 332 inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) { 333 long old; 334 switch (order) { 335 case _Py_memory_order_acquire: 336 { 337 do { 338 old = *value; 339 } while(_InterlockedCompareExchange_HLEAcquire((volatile long*)value, old, old) != old); 340 break; 341 } 342 case _Py_memory_order_release: 343 { 344 do { 345 old = *value; 346 } while(_InterlockedCompareExchange_HLERelease((volatile long*)value, old, old) != old); 347 break; 348 } 349 case _Py_memory_order_relaxed: 350 old = *value; 351 break; 352 default: 353 { 354 do { 355 old = *value; 356 } while(_InterlockedCompareExchange((volatile long*)value, old, old) != old); 357 break; 358 } 359 } 360 return old; 361 } 362 363 #define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \ 364 _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER)) 365 366 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 367 if (sizeof((ATOMIC_VAL)->_value) == 8) { \ 368 _Py_atomic_store_64bit((ATOMIC_VAL), NEW_VAL, ORDER) } else { \ 369 _Py_atomic_store_32bit((ATOMIC_VAL), NEW_VAL, ORDER) } 370 371 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 372 ( \ 373 sizeof((ATOMIC_VAL)->_value) == 8 ? \ 374 _Py_atomic_load_64bit((ATOMIC_VAL), ORDER) : \ 375 _Py_atomic_load_32bit((ATOMIC_VAL), ORDER) \ 376 ) 377 #elif defined(_M_ARM) || defined(_M_ARM64) 378 typedef enum _Py_memory_order { 379 _Py_memory_order_relaxed, 380 _Py_memory_order_acquire, 381 _Py_memory_order_release, 382 _Py_memory_order_acq_rel, 383 _Py_memory_order_seq_cst 384 } _Py_memory_order; 385 386 typedef struct _Py_atomic_address { 387 volatile uintptr_t _value; 388 } _Py_atomic_address; 389 390 typedef struct _Py_atomic_int { 391 volatile int _value; 392 } _Py_atomic_int; 393 394 395 #if defined(_M_ARM64) 396 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) \ 397 switch (ORDER) { \ 398 case _Py_memory_order_acquire: \ 399 _InterlockedExchange64_acq((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \ 400 break; \ 401 case _Py_memory_order_release: \ 402 _InterlockedExchange64_rel((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \ 403 break; \ 404 default: \ 405 _InterlockedExchange64((__int64 volatile*)&((ATOMIC_VAL)->_value), (__int64)NEW_VAL); \ 406 break; \ 407 } 408 #else 409 #define _Py_atomic_store_64bit(ATOMIC_VAL, NEW_VAL, ORDER) ((void)0); 410 #endif 411 412 #define _Py_atomic_store_32bit(ATOMIC_VAL, NEW_VAL, ORDER) \ 413 switch (ORDER) { \ 414 case _Py_memory_order_acquire: \ 415 _InterlockedExchange_acq((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \ 416 break; \ 417 case _Py_memory_order_release: \ 418 _InterlockedExchange_rel((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \ 419 break; \ 420 default: \ 421 _InterlockedExchange((volatile long*)&((ATOMIC_VAL)->_value), (int)NEW_VAL); \ 422 break; \ 423 } 424 425 #if defined(_M_ARM64) 426 /* This has to be an intptr_t for now. 427 gil_created() uses -1 as a sentinel value, if this returns 428 a uintptr_t it will do an unsigned compare and crash 429 */ 430 inline intptr_t _Py_atomic_load_64bit_impl(volatile uintptr_t* value, int order) { 431 uintptr_t old; 432 switch (order) { 433 case _Py_memory_order_acquire: 434 { 435 do { 436 old = *value; 437 } while(_InterlockedCompareExchange64_acq(value, old, old) != old); 438 break; 439 } 440 case _Py_memory_order_release: 441 { 442 do { 443 old = *value; 444 } while(_InterlockedCompareExchange64_rel(value, old, old) != old); 445 break; 446 } 447 case _Py_memory_order_relaxed: 448 old = *value; 449 break; 450 default: 451 { 452 do { 453 old = *value; 454 } while(_InterlockedCompareExchange64(value, old, old) != old); 455 break; 456 } 457 } 458 return old; 459 } 460 461 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) \ 462 _Py_atomic_load_64bit_impl((volatile uintptr_t*)&((ATOMIC_VAL)->_value), (ORDER)) 463 464 #else 465 #define _Py_atomic_load_64bit(ATOMIC_VAL, ORDER) ((ATOMIC_VAL)->_value) 466 #endif 467 468 inline int _Py_atomic_load_32bit_impl(volatile int* value, int order) { 469 int old; 470 switch (order) { 471 case _Py_memory_order_acquire: 472 { 473 do { 474 old = *value; 475 } while(_InterlockedCompareExchange_acq(value, old, old) != old); 476 break; 477 } 478 case _Py_memory_order_release: 479 { 480 do { 481 old = *value; 482 } while(_InterlockedCompareExchange_rel(value, old, old) != old); 483 break; 484 } 485 case _Py_memory_order_relaxed: 486 old = *value; 487 break; 488 default: 489 { 490 do { 491 old = *value; 492 } while(_InterlockedCompareExchange(value, old, old) != old); 493 break; 494 } 495 } 496 return old; 497 } 498 499 #define _Py_atomic_load_32bit(ATOMIC_VAL, ORDER) \ 500 _Py_atomic_load_32bit_impl((volatile int*)&((ATOMIC_VAL)->_value), (ORDER)) 501 502 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 503 if (sizeof((ATOMIC_VAL)->_value) == 8) { \ 504 _Py_atomic_store_64bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) } else { \ 505 _Py_atomic_store_32bit((ATOMIC_VAL), (NEW_VAL), (ORDER)) } 506 507 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 508 ( \ 509 sizeof((ATOMIC_VAL)->_value) == 8 ? \ 510 _Py_atomic_load_64bit((ATOMIC_VAL), (ORDER)) : \ 511 _Py_atomic_load_32bit((ATOMIC_VAL), (ORDER)) \ 512 ) 513 #endif 514 #else /* !gcc x86 !_msc_ver */ 515 typedef enum _Py_memory_order { 516 _Py_memory_order_relaxed, 517 _Py_memory_order_acquire, 518 _Py_memory_order_release, 519 _Py_memory_order_acq_rel, 520 _Py_memory_order_seq_cst 521 } _Py_memory_order; 522 523 typedef struct _Py_atomic_address { 524 uintptr_t _value; 525 } _Py_atomic_address; 526 527 typedef struct _Py_atomic_int { 528 int _value; 529 } _Py_atomic_int; 530 /* Fall back to other compilers and processors by assuming that simple 531 volatile accesses are atomic. This is false, so people should port 532 this. */ 533 #define _Py_atomic_signal_fence(/*memory_order*/ ORDER) ((void)0) 534 #define _Py_atomic_thread_fence(/*memory_order*/ ORDER) ((void)0) 535 #define _Py_atomic_store_explicit(ATOMIC_VAL, NEW_VAL, ORDER) \ 536 ((ATOMIC_VAL)->_value = NEW_VAL) 537 #define _Py_atomic_load_explicit(ATOMIC_VAL, ORDER) \ 538 ((ATOMIC_VAL)->_value) 539 #endif 540 541 /* Standardized shortcuts. */ 542 #define _Py_atomic_store(ATOMIC_VAL, NEW_VAL) \ 543 _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_seq_cst) 544 #define _Py_atomic_load(ATOMIC_VAL) \ 545 _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_seq_cst) 546 547 /* Python-local extensions */ 548 549 #define _Py_atomic_store_relaxed(ATOMIC_VAL, NEW_VAL) \ 550 _Py_atomic_store_explicit((ATOMIC_VAL), (NEW_VAL), _Py_memory_order_relaxed) 551 #define _Py_atomic_load_relaxed(ATOMIC_VAL) \ 552 _Py_atomic_load_explicit((ATOMIC_VAL), _Py_memory_order_relaxed) 553 554 #ifdef __cplusplus 555 } 556 #endif 557 #endif /* Py_ATOMIC_H */ 558