1 #pragma once 2 3 #include <stdbool.h> 4 #include <stddef.h> 5 #include <stdint.h> 6 7 /* SSE-specific headers */ 8 #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) 9 #include <xmmintrin.h> 10 #endif 11 12 /* ARM-specific headers */ 13 #if defined(__ARM_ACLE) 14 #include <arm_acle.h> 15 #endif 16 17 /* MSVC-specific headers */ 18 #ifdef _MSC_VER 19 #include <intrin.h> 20 #endif 21 22 23 #if defined(__wasm__) && defined(__clang__) 24 /* 25 * Clang for WebAssembly target lacks stdatomic.h header, 26 * even though it supports the necessary low-level intrinsics. 27 * Thus, we implement pthreadpool atomic functions on top of 28 * low-level Clang-specific interfaces for this target. 29 */ 30 31 typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t; 32 typedef _Atomic(size_t) pthreadpool_atomic_size_t; 33 typedef _Atomic(void*) pthreadpool_atomic_void_p; 34 pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)35 static inline uint32_t pthreadpool_load_relaxed_uint32_t( 36 pthreadpool_atomic_uint32_t* address) 37 { 38 return __c11_atomic_load(address, __ATOMIC_RELAXED); 39 } 40 pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)41 static inline size_t pthreadpool_load_relaxed_size_t( 42 pthreadpool_atomic_size_t* address) 43 { 44 return __c11_atomic_load(address, __ATOMIC_RELAXED); 45 } 46 pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)47 static inline void* pthreadpool_load_relaxed_void_p( 48 pthreadpool_atomic_void_p* address) 49 { 50 return __c11_atomic_load(address, __ATOMIC_RELAXED); 51 } 52 pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)53 static inline uint32_t pthreadpool_load_acquire_uint32_t( 54 pthreadpool_atomic_uint32_t* address) 55 { 56 return __c11_atomic_load(address, __ATOMIC_ACQUIRE); 57 } 58 pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)59 static inline size_t pthreadpool_load_acquire_size_t( 60 pthreadpool_atomic_size_t* address) 61 { 62 return __c11_atomic_load(address, __ATOMIC_ACQUIRE); 63 } 64 pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)65 static inline void pthreadpool_store_relaxed_uint32_t( 66 pthreadpool_atomic_uint32_t* address, 67 uint32_t value) 68 { 69 __c11_atomic_store(address, value, __ATOMIC_RELAXED); 70 } 71 pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)72 static inline void pthreadpool_store_relaxed_size_t( 73 pthreadpool_atomic_size_t* address, 74 size_t value) 75 { 76 __c11_atomic_store(address, value, __ATOMIC_RELAXED); 77 } 78 pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)79 static inline void pthreadpool_store_relaxed_void_p( 80 pthreadpool_atomic_void_p* address, 81 void* value) 82 { 83 __c11_atomic_store(address, value, __ATOMIC_RELAXED); 84 } 85 pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)86 static inline void pthreadpool_store_release_uint32_t( 87 pthreadpool_atomic_uint32_t* address, 88 uint32_t value) 89 { 90 __c11_atomic_store(address, value, __ATOMIC_RELEASE); 91 } 92 pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)93 static inline void pthreadpool_store_release_size_t( 94 pthreadpool_atomic_size_t* address, 95 size_t value) 96 { 97 __c11_atomic_store(address, value, __ATOMIC_RELEASE); 98 } 99 pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)100 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 101 pthreadpool_atomic_size_t* address) 102 { 103 return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELAXED) - 1; 104 } 105 pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)106 static inline size_t pthreadpool_decrement_fetch_release_size_t( 107 pthreadpool_atomic_size_t* address) 108 { 109 return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELEASE) - 1; 110 } 111 pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)112 static inline bool pthreadpool_try_decrement_relaxed_size_t( 113 pthreadpool_atomic_size_t* value) 114 { 115 size_t actual_value = __c11_atomic_load(value, __ATOMIC_RELAXED); 116 while (actual_value != 0) { 117 if (__c11_atomic_compare_exchange_weak( 118 value, &actual_value, actual_value - 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 119 { 120 return true; 121 } 122 } 123 return false; 124 } 125 pthreadpool_fence_acquire()126 static inline void pthreadpool_fence_acquire() { 127 __c11_atomic_thread_fence(__ATOMIC_ACQUIRE); 128 } 129 pthreadpool_fence_release()130 static inline void pthreadpool_fence_release() { 131 __c11_atomic_thread_fence(__ATOMIC_RELEASE); 132 } 133 #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) 134 #include <stdatomic.h> 135 136 typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t; 137 typedef _Atomic(size_t) pthreadpool_atomic_size_t; 138 typedef _Atomic(void*) pthreadpool_atomic_void_p; 139 pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)140 static inline uint32_t pthreadpool_load_relaxed_uint32_t( 141 pthreadpool_atomic_uint32_t* address) 142 { 143 return atomic_load_explicit(address, memory_order_relaxed); 144 } 145 pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)146 static inline size_t pthreadpool_load_relaxed_size_t( 147 pthreadpool_atomic_size_t* address) 148 { 149 return atomic_load_explicit(address, memory_order_relaxed); 150 } 151 pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)152 static inline void* pthreadpool_load_relaxed_void_p( 153 pthreadpool_atomic_void_p* address) 154 { 155 return atomic_load_explicit(address, memory_order_relaxed); 156 } 157 pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)158 static inline uint32_t pthreadpool_load_acquire_uint32_t( 159 pthreadpool_atomic_uint32_t* address) 160 { 161 return atomic_load_explicit(address, memory_order_acquire); 162 } 163 pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)164 static inline size_t pthreadpool_load_acquire_size_t( 165 pthreadpool_atomic_size_t* address) 166 { 167 return atomic_load_explicit(address, memory_order_acquire); 168 } 169 pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)170 static inline void pthreadpool_store_relaxed_uint32_t( 171 pthreadpool_atomic_uint32_t* address, 172 uint32_t value) 173 { 174 atomic_store_explicit(address, value, memory_order_relaxed); 175 } 176 pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)177 static inline void pthreadpool_store_relaxed_size_t( 178 pthreadpool_atomic_size_t* address, 179 size_t value) 180 { 181 atomic_store_explicit(address, value, memory_order_relaxed); 182 } 183 pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)184 static inline void pthreadpool_store_relaxed_void_p( 185 pthreadpool_atomic_void_p* address, 186 void* value) 187 { 188 atomic_store_explicit(address, value, memory_order_relaxed); 189 } 190 pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)191 static inline void pthreadpool_store_release_uint32_t( 192 pthreadpool_atomic_uint32_t* address, 193 uint32_t value) 194 { 195 atomic_store_explicit(address, value, memory_order_release); 196 } 197 pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)198 static inline void pthreadpool_store_release_size_t( 199 pthreadpool_atomic_size_t* address, 200 size_t value) 201 { 202 atomic_store_explicit(address, value, memory_order_release); 203 } 204 pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)205 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 206 pthreadpool_atomic_size_t* address) 207 { 208 return atomic_fetch_sub_explicit(address, 1, memory_order_relaxed) - 1; 209 } 210 pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)211 static inline size_t pthreadpool_decrement_fetch_release_size_t( 212 pthreadpool_atomic_size_t* address) 213 { 214 return atomic_fetch_sub_explicit(address, 1, memory_order_release) - 1; 215 } 216 pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)217 static inline bool pthreadpool_try_decrement_relaxed_size_t( 218 pthreadpool_atomic_size_t* value) 219 { 220 #if defined(__clang__) && (defined(__arm__) || defined(__aarch64__)) 221 size_t actual_value; 222 do { 223 actual_value = __builtin_arm_ldrex((const volatile size_t*) value); 224 if (actual_value == 0) { 225 __builtin_arm_clrex(); 226 return false; 227 } 228 } while (__builtin_arm_strex(actual_value - 1, (volatile size_t*) value) != 0); 229 return true; 230 #else 231 size_t actual_value = pthreadpool_load_relaxed_size_t(value); 232 while (actual_value != 0) { 233 if (atomic_compare_exchange_weak_explicit( 234 value, &actual_value, actual_value - 1, memory_order_relaxed, memory_order_relaxed)) 235 { 236 return true; 237 } 238 } 239 return false; 240 #endif 241 } 242 pthreadpool_fence_acquire()243 static inline void pthreadpool_fence_acquire() { 244 atomic_thread_fence(memory_order_acquire); 245 } 246 pthreadpool_fence_release()247 static inline void pthreadpool_fence_release() { 248 atomic_thread_fence(memory_order_release); 249 } 250 #elif defined(__GNUC__) 251 typedef uint32_t volatile pthreadpool_atomic_uint32_t; 252 typedef size_t volatile pthreadpool_atomic_size_t; 253 typedef void* volatile pthreadpool_atomic_void_p; 254 pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)255 static inline uint32_t pthreadpool_load_relaxed_uint32_t( 256 pthreadpool_atomic_uint32_t* address) 257 { 258 return *address; 259 } 260 pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)261 static inline size_t pthreadpool_load_relaxed_size_t( 262 pthreadpool_atomic_size_t* address) 263 { 264 return *address; 265 } 266 pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)267 static inline void* pthreadpool_load_relaxed_void_p( 268 pthreadpool_atomic_void_p* address) 269 { 270 return *address; 271 } 272 pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)273 static inline uint32_t pthreadpool_load_acquire_uint32_t( 274 pthreadpool_atomic_uint32_t* address) 275 { 276 return *address; 277 } 278 pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)279 static inline size_t pthreadpool_load_acquire_size_t( 280 pthreadpool_atomic_size_t* address) 281 { 282 return *address; 283 } 284 pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)285 static inline void pthreadpool_store_relaxed_uint32_t( 286 pthreadpool_atomic_uint32_t* address, 287 uint32_t value) 288 { 289 *address = value; 290 } 291 pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)292 static inline void pthreadpool_store_relaxed_size_t( 293 pthreadpool_atomic_size_t* address, 294 size_t value) 295 { 296 *address = value; 297 } 298 pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)299 static inline void pthreadpool_store_relaxed_void_p( 300 pthreadpool_atomic_void_p* address, 301 void* value) 302 { 303 *address = value; 304 } 305 pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)306 static inline void pthreadpool_store_release_uint32_t( 307 pthreadpool_atomic_uint32_t* address, 308 uint32_t value) 309 { 310 *address = value; 311 } 312 pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)313 static inline void pthreadpool_store_release_size_t( 314 pthreadpool_atomic_size_t* address, 315 size_t value) 316 { 317 *address = value; 318 } 319 pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)320 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 321 pthreadpool_atomic_size_t* address) 322 { 323 return __sync_sub_and_fetch(address, 1); 324 } 325 pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)326 static inline size_t pthreadpool_decrement_fetch_release_size_t( 327 pthreadpool_atomic_size_t* address) 328 { 329 return __sync_sub_and_fetch(address, 1); 330 } 331 pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)332 static inline bool pthreadpool_try_decrement_relaxed_size_t( 333 pthreadpool_atomic_size_t* value) 334 { 335 size_t actual_value = *value; 336 while (actual_value != 0) { 337 const size_t new_value = actual_value - 1; 338 const size_t expected_value = actual_value; 339 actual_value = __sync_val_compare_and_swap(value, expected_value, new_value); 340 if (actual_value == expected_value) { 341 return true; 342 } 343 } 344 return false; 345 } 346 pthreadpool_fence_acquire()347 static inline void pthreadpool_fence_acquire() { 348 __sync_synchronize(); 349 } 350 pthreadpool_fence_release()351 static inline void pthreadpool_fence_release() { 352 __sync_synchronize(); 353 } 354 #elif defined(_MSC_VER) && defined(_M_X64) 355 typedef volatile uint32_t pthreadpool_atomic_uint32_t; 356 typedef volatile size_t pthreadpool_atomic_size_t; 357 typedef void *volatile pthreadpool_atomic_void_p; 358 pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)359 static inline uint32_t pthreadpool_load_relaxed_uint32_t( 360 pthreadpool_atomic_uint32_t* address) 361 { 362 return *address; 363 } 364 pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)365 static inline size_t pthreadpool_load_relaxed_size_t( 366 pthreadpool_atomic_size_t* address) 367 { 368 return *address; 369 } 370 pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)371 static inline void* pthreadpool_load_relaxed_void_p( 372 pthreadpool_atomic_void_p* address) 373 { 374 return *address; 375 } 376 pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)377 static inline uint32_t pthreadpool_load_acquire_uint32_t( 378 pthreadpool_atomic_uint32_t* address) 379 { 380 /* x86-64 loads always have acquire semantics; use only a compiler barrier */ 381 const uint32_t value = *address; 382 _ReadBarrier(); 383 return value; 384 } 385 pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)386 static inline size_t pthreadpool_load_acquire_size_t( 387 pthreadpool_atomic_size_t* address) 388 { 389 /* x86-64 loads always have acquire semantics; use only a compiler barrier */ 390 const size_t value = *address; 391 _ReadBarrier(); 392 return value; 393 } 394 pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)395 static inline void pthreadpool_store_relaxed_uint32_t( 396 pthreadpool_atomic_uint32_t* address, 397 uint32_t value) 398 { 399 *address = value; 400 } 401 pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)402 static inline void pthreadpool_store_relaxed_size_t( 403 pthreadpool_atomic_size_t* address, 404 size_t value) 405 { 406 *address = value; 407 } 408 pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)409 static inline void pthreadpool_store_relaxed_void_p( 410 pthreadpool_atomic_void_p* address, 411 void* value) 412 { 413 *address = value; 414 } 415 pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)416 static inline void pthreadpool_store_release_uint32_t( 417 pthreadpool_atomic_uint32_t* address, 418 uint32_t value) 419 { 420 /* x86-64 stores always have release semantics; use only a compiler barrier */ 421 _WriteBarrier(); 422 *address = value; 423 } 424 pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)425 static inline void pthreadpool_store_release_size_t( 426 pthreadpool_atomic_size_t* address, 427 size_t value) 428 { 429 /* x86-64 stores always have release semantics; use only a compiler barrier */ 430 _WriteBarrier(); 431 *address = value; 432 } 433 pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)434 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 435 pthreadpool_atomic_size_t* address) 436 { 437 return (size_t) _InterlockedDecrement64((volatile __int64*) address); 438 } 439 pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)440 static inline size_t pthreadpool_decrement_fetch_release_size_t( 441 pthreadpool_atomic_size_t* address) 442 { 443 return (size_t) _InterlockedDecrement64((volatile __int64*) address); 444 } 445 pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)446 static inline bool pthreadpool_try_decrement_relaxed_size_t( 447 pthreadpool_atomic_size_t* value) 448 { 449 size_t actual_value = *value; 450 while (actual_value != 0) { 451 const size_t new_value = actual_value - 1; 452 const size_t expected_value = actual_value; 453 actual_value = _InterlockedCompareExchange64( 454 (volatile __int64*) value, (__int64) new_value, (__int64) expected_value); 455 if (actual_value == expected_value) { 456 return true; 457 } 458 } 459 return false; 460 } 461 pthreadpool_fence_acquire()462 static inline void pthreadpool_fence_acquire() { 463 _mm_lfence(); 464 _ReadBarrier(); 465 } 466 pthreadpool_fence_release()467 static inline void pthreadpool_fence_release() { 468 _WriteBarrier(); 469 _mm_sfence(); 470 } 471 #elif defined(_MSC_VER) && defined(_M_IX86) 472 typedef volatile uint32_t pthreadpool_atomic_uint32_t; 473 typedef volatile size_t pthreadpool_atomic_size_t; 474 typedef void *volatile pthreadpool_atomic_void_p; 475 pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)476 static inline uint32_t pthreadpool_load_relaxed_uint32_t( 477 pthreadpool_atomic_uint32_t* address) 478 { 479 return *address; 480 } 481 pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)482 static inline size_t pthreadpool_load_relaxed_size_t( 483 pthreadpool_atomic_size_t* address) 484 { 485 return *address; 486 } 487 pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)488 static inline void* pthreadpool_load_relaxed_void_p( 489 pthreadpool_atomic_void_p* address) 490 { 491 return *address; 492 } 493 pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)494 static inline uint32_t pthreadpool_load_acquire_uint32_t( 495 pthreadpool_atomic_uint32_t* address) 496 { 497 /* x86 loads always have acquire semantics; use only a compiler barrier */ 498 const uint32_t value = *address; 499 _ReadBarrier(); 500 return value; 501 } 502 pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)503 static inline size_t pthreadpool_load_acquire_size_t( 504 pthreadpool_atomic_size_t* address) 505 { 506 /* x86 loads always have acquire semantics; use only a compiler barrier */ 507 const size_t value = *address; 508 _ReadBarrier(); 509 return value; 510 } 511 pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)512 static inline void pthreadpool_store_relaxed_uint32_t( 513 pthreadpool_atomic_uint32_t* address, 514 uint32_t value) 515 { 516 *address = value; 517 } 518 pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)519 static inline void pthreadpool_store_relaxed_size_t( 520 pthreadpool_atomic_size_t* address, 521 size_t value) 522 { 523 *address = value; 524 } 525 pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)526 static inline void pthreadpool_store_relaxed_void_p( 527 pthreadpool_atomic_void_p* address, 528 void* value) 529 { 530 *address = value; 531 } 532 pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)533 static inline void pthreadpool_store_release_uint32_t( 534 pthreadpool_atomic_uint32_t* address, 535 uint32_t value) 536 { 537 /* x86 stores always have release semantics; use only a compiler barrier */ 538 _WriteBarrier(); 539 *address = value; 540 } 541 pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)542 static inline void pthreadpool_store_release_size_t( 543 pthreadpool_atomic_size_t* address, 544 size_t value) 545 { 546 /* x86 stores always have release semantics; use only a compiler barrier */ 547 _WriteBarrier(); 548 *address = value; 549 } 550 pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)551 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 552 pthreadpool_atomic_size_t* address) 553 { 554 return (size_t) _InterlockedDecrement((volatile long*) address); 555 } 556 pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)557 static inline size_t pthreadpool_decrement_fetch_release_size_t( 558 pthreadpool_atomic_size_t* address) 559 { 560 return (size_t) _InterlockedDecrement((volatile long*) address); 561 } 562 pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)563 static inline bool pthreadpool_try_decrement_relaxed_size_t( 564 pthreadpool_atomic_size_t* value) 565 { 566 size_t actual_value = *value; 567 while (actual_value != 0) { 568 const size_t new_value = actual_value - 1; 569 const size_t expected_value = actual_value; 570 actual_value = _InterlockedCompareExchange( 571 (volatile long*) value, (long) new_value, (long) expected_value); 572 if (actual_value == expected_value) { 573 return true; 574 } 575 } 576 return false; 577 } 578 pthreadpool_fence_acquire()579 static inline void pthreadpool_fence_acquire() { 580 _mm_lfence(); 581 } 582 pthreadpool_fence_release()583 static inline void pthreadpool_fence_release() { 584 _mm_sfence(); 585 } 586 #elif defined(_MSC_VER) && defined(_M_ARM64) 587 typedef volatile uint32_t pthreadpool_atomic_uint32_t; 588 typedef volatile size_t pthreadpool_atomic_size_t; 589 typedef void *volatile pthreadpool_atomic_void_p; 590 pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)591 static inline uint32_t pthreadpool_load_relaxed_uint32_t( 592 pthreadpool_atomic_uint32_t* address) 593 { 594 return (uint32_t) __iso_volatile_load32((const volatile __int32*) address); 595 } 596 pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)597 static inline size_t pthreadpool_load_relaxed_size_t( 598 pthreadpool_atomic_size_t* address) 599 { 600 return (size_t) __iso_volatile_load64((const volatile __int64*) address); 601 } 602 pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)603 static inline void* pthreadpool_load_relaxed_void_p( 604 pthreadpool_atomic_void_p* address) 605 { 606 return (void*) __iso_volatile_load64((const volatile __int64*) address); 607 } 608 pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)609 static inline uint32_t pthreadpool_load_acquire_uint32_t( 610 pthreadpool_atomic_uint32_t* address) 611 { 612 return (uint32_t) __ldar32((volatile unsigned __int32*) address); 613 } 614 pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)615 static inline size_t pthreadpool_load_acquire_size_t( 616 pthreadpool_atomic_size_t* address) 617 { 618 return (size_t) __ldar64((volatile unsigned __int64*) address); 619 } 620 pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)621 static inline void pthreadpool_store_relaxed_uint32_t( 622 pthreadpool_atomic_uint32_t* address, 623 uint32_t value) 624 { 625 __iso_volatile_store32((volatile __int32*) address, (__int32) value); 626 } 627 pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)628 static inline void pthreadpool_store_relaxed_size_t( 629 pthreadpool_atomic_size_t* address, 630 size_t value) 631 { 632 __iso_volatile_store64((volatile __int64*) address, (__int64) value); 633 } 634 pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)635 static inline void pthreadpool_store_relaxed_void_p( 636 pthreadpool_atomic_void_p* address, 637 void* value) 638 { 639 __iso_volatile_store64((volatile __int64*) address, (__int64) value); 640 } 641 pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)642 static inline void pthreadpool_store_release_uint32_t( 643 pthreadpool_atomic_uint32_t* address, 644 uint32_t value) 645 { 646 _WriteBarrier(); 647 __stlr32((unsigned __int32 volatile*) address, (unsigned __int32) value); 648 } 649 pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)650 static inline void pthreadpool_store_release_size_t( 651 pthreadpool_atomic_size_t* address, 652 size_t value) 653 { 654 _WriteBarrier(); 655 __stlr64((unsigned __int64 volatile*) address, (unsigned __int64) value); 656 } 657 pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)658 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 659 pthreadpool_atomic_size_t* address) 660 { 661 return (size_t) _InterlockedDecrement64_nf((volatile __int64*) address); 662 } 663 pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)664 static inline size_t pthreadpool_decrement_fetch_release_size_t( 665 pthreadpool_atomic_size_t* address) 666 { 667 return (size_t) _InterlockedDecrement64_rel((volatile __int64*) address); 668 } 669 pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)670 static inline bool pthreadpool_try_decrement_relaxed_size_t( 671 pthreadpool_atomic_size_t* value) 672 { 673 size_t actual_value = (size_t) __iso_volatile_load64((const volatile __int64*) value); 674 while (actual_value != 0) { 675 const size_t new_value = actual_value - 1; 676 const size_t expected_value = actual_value; 677 actual_value = _InterlockedCompareExchange64_nf( 678 (volatile __int64*) value, (__int64) new_value, (__int64) expected_value); 679 if (actual_value == expected_value) { 680 return true; 681 } 682 } 683 return false; 684 } 685 pthreadpool_fence_acquire()686 static inline void pthreadpool_fence_acquire() { 687 __dmb(_ARM64_BARRIER_ISHLD); 688 _ReadBarrier(); 689 } 690 pthreadpool_fence_release()691 static inline void pthreadpool_fence_release() { 692 _WriteBarrier(); 693 __dmb(_ARM64_BARRIER_ISH); 694 } 695 #elif defined(_MSC_VER) && defined(_M_ARM) 696 typedef volatile uint32_t pthreadpool_atomic_uint32_t; 697 typedef volatile size_t pthreadpool_atomic_size_t; 698 typedef void *volatile pthreadpool_atomic_void_p; 699 pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)700 static inline uint32_t pthreadpool_load_relaxed_uint32_t( 701 pthreadpool_atomic_uint32_t* address) 702 { 703 return (uint32_t) __iso_volatile_load32((const volatile __int32*) address); 704 } 705 pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)706 static inline size_t pthreadpool_load_relaxed_size_t( 707 pthreadpool_atomic_size_t* address) 708 { 709 return (size_t) __iso_volatile_load32((const volatile __int32*) address); 710 } 711 pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)712 static inline void* pthreadpool_load_relaxed_void_p( 713 pthreadpool_atomic_void_p* address) 714 { 715 return (void*) __iso_volatile_load32((const volatile __int32*) address); 716 } 717 pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)718 static inline uint32_t pthreadpool_load_acquire_uint32_t( 719 pthreadpool_atomic_uint32_t* address) 720 { 721 const uint32_t value = (uint32_t) __iso_volatile_load32((const volatile __int32*) address); 722 __dmb(_ARM_BARRIER_ISH); 723 _ReadBarrier(); 724 return value; 725 } 726 pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)727 static inline size_t pthreadpool_load_acquire_size_t( 728 pthreadpool_atomic_size_t* address) 729 { 730 const size_t value = (size_t) __iso_volatile_load32((const volatile __int32*) address); 731 __dmb(_ARM_BARRIER_ISH); 732 _ReadBarrier(); 733 return value; 734 } 735 pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)736 static inline void pthreadpool_store_relaxed_uint32_t( 737 pthreadpool_atomic_uint32_t* address, 738 uint32_t value) 739 { 740 __iso_volatile_store32((volatile __int32*) address, (__int32) value); 741 } 742 pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)743 static inline void pthreadpool_store_relaxed_size_t( 744 pthreadpool_atomic_size_t* address, 745 size_t value) 746 { 747 __iso_volatile_store32((volatile __int32*) address, (__int32) value); 748 } 749 pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)750 static inline void pthreadpool_store_relaxed_void_p( 751 pthreadpool_atomic_void_p* address, 752 void* value) 753 { 754 __iso_volatile_store32((volatile __int32*) address, (__int32) value); 755 } 756 pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)757 static inline void pthreadpool_store_release_uint32_t( 758 pthreadpool_atomic_uint32_t* address, 759 uint32_t value) 760 { 761 _WriteBarrier(); 762 __dmb(_ARM_BARRIER_ISH); 763 __iso_volatile_store32((volatile __int32*) address, (__int32) value); 764 } 765 pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)766 static inline void pthreadpool_store_release_size_t( 767 pthreadpool_atomic_size_t* address, 768 size_t value) 769 { 770 _WriteBarrier(); 771 __dmb(_ARM_BARRIER_ISH); 772 __iso_volatile_store32((volatile __int32*) address, (__int32) value); 773 } 774 pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)775 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 776 pthreadpool_atomic_size_t* address) 777 { 778 return (size_t) _InterlockedDecrement_nf((volatile long*) address); 779 } 780 pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)781 static inline size_t pthreadpool_decrement_fetch_release_size_t( 782 pthreadpool_atomic_size_t* address) 783 { 784 return (size_t) _InterlockedDecrement_rel((volatile long*) address); 785 } 786 pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)787 static inline bool pthreadpool_try_decrement_relaxed_size_t( 788 pthreadpool_atomic_size_t* value) 789 { 790 size_t actual_value = (size_t) __iso_volatile_load32((const volatile __int32*) value); 791 while (actual_value != 0) { 792 const size_t new_value = actual_value - 1; 793 const size_t expected_value = actual_value; 794 actual_value = _InterlockedCompareExchange_nf( 795 (volatile long*) value, (long) new_value, (long) expected_value); 796 if (actual_value == expected_value) { 797 return true; 798 } 799 } 800 return false; 801 } 802 pthreadpool_fence_acquire()803 static inline void pthreadpool_fence_acquire() { 804 __dmb(_ARM_BARRIER_ISH); 805 _ReadBarrier(); 806 } 807 pthreadpool_fence_release()808 static inline void pthreadpool_fence_release() { 809 _WriteBarrier(); 810 __dmb(_ARM_BARRIER_ISH); 811 } 812 #else 813 #error "Platform-specific implementation of threadpool-atomics.h required" 814 #endif 815 816 #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) pthreadpool_yield()817 static inline void pthreadpool_yield() { 818 _mm_pause(); 819 } 820 #elif defined(__ARM_ACLE) || defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) pthreadpool_yield()821 static inline void pthreadpool_yield() { 822 __yield(); 823 } 824 #elif defined(__GNUC__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 7) || (defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6KZ__)) && !defined(__thumb__)) pthreadpool_yield()825 static inline void pthreadpool_yield() { 826 __asm__ __volatile__("yield"); 827 } 828 #else pthreadpool_yield()829 static inline void pthreadpool_yield() { 830 pthreadpool_fence_acquire(); 831 } 832 #endif 833