1 #pragma once 2 3 #include <stdbool.h> 4 #include <stddef.h> 5 #include <stdint.h> 6 7 /* SSE-specific headers */ 8 #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) && !defined(_M_ARM64EC) 9 #include <xmmintrin.h> 10 #endif 11 12 /* ARM-specific headers */ 13 #if defined(__ARM_ACLE) 14 #include <arm_acle.h> 15 #endif 16 17 /* MSVC-specific headers */ 18 #ifdef _MSC_VER 19 #include <intrin.h> 20 #endif 21 22 23 #if defined(__wasm__) && defined(__clang__) 24 /* 25 * Clang for WebAssembly target lacks stdatomic.h header, 26 * even though it supports the necessary low-level intrinsics. 27 * Thus, we implement pthreadpool atomic functions on top of 28 * low-level Clang-specific interfaces for this target. 29 */ 30 31 typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t; 32 typedef _Atomic(size_t) pthreadpool_atomic_size_t; 33 typedef _Atomic(void*) pthreadpool_atomic_void_p; 34 pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)35 static inline uint32_t pthreadpool_load_relaxed_uint32_t( 36 pthreadpool_atomic_uint32_t* address) 37 { 38 return __c11_atomic_load(address, __ATOMIC_RELAXED); 39 } 40 pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)41 static inline size_t pthreadpool_load_relaxed_size_t( 42 pthreadpool_atomic_size_t* address) 43 { 44 return __c11_atomic_load(address, __ATOMIC_RELAXED); 45 } 46 pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)47 static inline void* pthreadpool_load_relaxed_void_p( 48 pthreadpool_atomic_void_p* address) 49 { 50 return __c11_atomic_load(address, __ATOMIC_RELAXED); 51 } 52 pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)53 static inline uint32_t pthreadpool_load_acquire_uint32_t( 54 pthreadpool_atomic_uint32_t* address) 55 { 56 return __c11_atomic_load(address, __ATOMIC_ACQUIRE); 57 } 58 pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)59 static inline size_t pthreadpool_load_acquire_size_t( 60 pthreadpool_atomic_size_t* address) 61 { 62 return __c11_atomic_load(address, __ATOMIC_ACQUIRE); 63 } 64 pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)65 static inline void pthreadpool_store_relaxed_uint32_t( 66 pthreadpool_atomic_uint32_t* address, 67 uint32_t value) 68 { 69 __c11_atomic_store(address, value, __ATOMIC_RELAXED); 70 } 71 pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)72 static inline void pthreadpool_store_relaxed_size_t( 73 pthreadpool_atomic_size_t* address, 74 size_t value) 75 { 76 __c11_atomic_store(address, value, __ATOMIC_RELAXED); 77 } 78 pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)79 static inline void pthreadpool_store_relaxed_void_p( 80 pthreadpool_atomic_void_p* address, 81 void* value) 82 { 83 __c11_atomic_store(address, value, __ATOMIC_RELAXED); 84 } 85 pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)86 static inline void pthreadpool_store_release_uint32_t( 87 pthreadpool_atomic_uint32_t* address, 88 uint32_t value) 89 { 90 __c11_atomic_store(address, value, __ATOMIC_RELEASE); 91 } 92 pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)93 static inline void pthreadpool_store_release_size_t( 94 pthreadpool_atomic_size_t* address, 95 size_t value) 96 { 97 __c11_atomic_store(address, value, __ATOMIC_RELEASE); 98 } 99 pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)100 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 101 pthreadpool_atomic_size_t* address) 102 { 103 return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELAXED) - 1; 104 } 105 pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)106 static inline size_t pthreadpool_decrement_fetch_release_size_t( 107 pthreadpool_atomic_size_t* address) 108 { 109 return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELEASE) - 1; 110 } 111 pthreadpool_decrement_fetch_acquire_release_size_t(pthreadpool_atomic_size_t * address)112 static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t( 113 pthreadpool_atomic_size_t* address) 114 { 115 return __c11_atomic_fetch_sub(address, 1, __ATOMIC_ACQ_REL) - 1; 116 } 117 pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)118 static inline bool pthreadpool_try_decrement_relaxed_size_t( 119 pthreadpool_atomic_size_t* value) 120 { 121 size_t actual_value = __c11_atomic_load(value, __ATOMIC_RELAXED); 122 while (actual_value != 0) { 123 if (__c11_atomic_compare_exchange_weak( 124 value, &actual_value, actual_value - 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) 125 { 126 return true; 127 } 128 } 129 return false; 130 } 131 pthreadpool_fence_acquire()132 static inline void pthreadpool_fence_acquire() { 133 __c11_atomic_thread_fence(__ATOMIC_ACQUIRE); 134 } 135 pthreadpool_fence_release()136 static inline void pthreadpool_fence_release() { 137 __c11_atomic_thread_fence(__ATOMIC_RELEASE); 138 } 139 #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) 140 #include <stdatomic.h> 141 142 typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t; 143 typedef _Atomic(size_t) pthreadpool_atomic_size_t; 144 typedef _Atomic(void*) pthreadpool_atomic_void_p; 145 pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)146 static inline uint32_t pthreadpool_load_relaxed_uint32_t( 147 pthreadpool_atomic_uint32_t* address) 148 { 149 return atomic_load_explicit(address, memory_order_relaxed); 150 } 151 pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)152 static inline size_t pthreadpool_load_relaxed_size_t( 153 pthreadpool_atomic_size_t* address) 154 { 155 return atomic_load_explicit(address, memory_order_relaxed); 156 } 157 pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)158 static inline void* pthreadpool_load_relaxed_void_p( 159 pthreadpool_atomic_void_p* address) 160 { 161 return atomic_load_explicit(address, memory_order_relaxed); 162 } 163 pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)164 static inline uint32_t pthreadpool_load_acquire_uint32_t( 165 pthreadpool_atomic_uint32_t* address) 166 { 167 return atomic_load_explicit(address, memory_order_acquire); 168 } 169 pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)170 static inline size_t pthreadpool_load_acquire_size_t( 171 pthreadpool_atomic_size_t* address) 172 { 173 return atomic_load_explicit(address, memory_order_acquire); 174 } 175 pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)176 static inline void pthreadpool_store_relaxed_uint32_t( 177 pthreadpool_atomic_uint32_t* address, 178 uint32_t value) 179 { 180 atomic_store_explicit(address, value, memory_order_relaxed); 181 } 182 pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)183 static inline void pthreadpool_store_relaxed_size_t( 184 pthreadpool_atomic_size_t* address, 185 size_t value) 186 { 187 atomic_store_explicit(address, value, memory_order_relaxed); 188 } 189 pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)190 static inline void pthreadpool_store_relaxed_void_p( 191 pthreadpool_atomic_void_p* address, 192 void* value) 193 { 194 atomic_store_explicit(address, value, memory_order_relaxed); 195 } 196 pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)197 static inline void pthreadpool_store_release_uint32_t( 198 pthreadpool_atomic_uint32_t* address, 199 uint32_t value) 200 { 201 atomic_store_explicit(address, value, memory_order_release); 202 } 203 pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)204 static inline void pthreadpool_store_release_size_t( 205 pthreadpool_atomic_size_t* address, 206 size_t value) 207 { 208 atomic_store_explicit(address, value, memory_order_release); 209 } 210 pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)211 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 212 pthreadpool_atomic_size_t* address) 213 { 214 return atomic_fetch_sub_explicit(address, 1, memory_order_relaxed) - 1; 215 } 216 pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)217 static inline size_t pthreadpool_decrement_fetch_release_size_t( 218 pthreadpool_atomic_size_t* address) 219 { 220 return atomic_fetch_sub_explicit(address, 1, memory_order_release) - 1; 221 } 222 pthreadpool_decrement_fetch_acquire_release_size_t(pthreadpool_atomic_size_t * address)223 static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t( 224 pthreadpool_atomic_size_t* address) 225 { 226 return atomic_fetch_sub_explicit(address, 1, memory_order_acq_rel) - 1; 227 } 228 pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)229 static inline bool pthreadpool_try_decrement_relaxed_size_t( 230 pthreadpool_atomic_size_t* value) 231 { 232 #if defined(__clang__) && (defined(__arm__) || defined(__aarch64__)) 233 size_t actual_value; 234 do { 235 actual_value = __builtin_arm_ldrex((const volatile size_t*) value); 236 if (actual_value == 0) { 237 __builtin_arm_clrex(); 238 return false; 239 } 240 } while (__builtin_arm_strex(actual_value - 1, (volatile size_t*) value) != 0); 241 return true; 242 #else 243 size_t actual_value = pthreadpool_load_relaxed_size_t(value); 244 while (actual_value != 0) { 245 if (atomic_compare_exchange_weak_explicit( 246 value, &actual_value, actual_value - 1, memory_order_relaxed, memory_order_relaxed)) 247 { 248 return true; 249 } 250 } 251 return false; 252 #endif 253 } 254 pthreadpool_fence_acquire()255 static inline void pthreadpool_fence_acquire() { 256 atomic_thread_fence(memory_order_acquire); 257 } 258 pthreadpool_fence_release()259 static inline void pthreadpool_fence_release() { 260 atomic_thread_fence(memory_order_release); 261 } 262 #elif defined(__GNUC__) 263 typedef uint32_t volatile pthreadpool_atomic_uint32_t; 264 typedef size_t volatile pthreadpool_atomic_size_t; 265 typedef void* volatile pthreadpool_atomic_void_p; 266 pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)267 static inline uint32_t pthreadpool_load_relaxed_uint32_t( 268 pthreadpool_atomic_uint32_t* address) 269 { 270 return *address; 271 } 272 pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)273 static inline size_t pthreadpool_load_relaxed_size_t( 274 pthreadpool_atomic_size_t* address) 275 { 276 return *address; 277 } 278 pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)279 static inline void* pthreadpool_load_relaxed_void_p( 280 pthreadpool_atomic_void_p* address) 281 { 282 return *address; 283 } 284 pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)285 static inline uint32_t pthreadpool_load_acquire_uint32_t( 286 pthreadpool_atomic_uint32_t* address) 287 { 288 return *address; 289 } 290 pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)291 static inline size_t pthreadpool_load_acquire_size_t( 292 pthreadpool_atomic_size_t* address) 293 { 294 return *address; 295 } 296 pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)297 static inline void pthreadpool_store_relaxed_uint32_t( 298 pthreadpool_atomic_uint32_t* address, 299 uint32_t value) 300 { 301 *address = value; 302 } 303 pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)304 static inline void pthreadpool_store_relaxed_size_t( 305 pthreadpool_atomic_size_t* address, 306 size_t value) 307 { 308 *address = value; 309 } 310 pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)311 static inline void pthreadpool_store_relaxed_void_p( 312 pthreadpool_atomic_void_p* address, 313 void* value) 314 { 315 *address = value; 316 } 317 pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)318 static inline void pthreadpool_store_release_uint32_t( 319 pthreadpool_atomic_uint32_t* address, 320 uint32_t value) 321 { 322 *address = value; 323 } 324 pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)325 static inline void pthreadpool_store_release_size_t( 326 pthreadpool_atomic_size_t* address, 327 size_t value) 328 { 329 *address = value; 330 } 331 pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)332 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 333 pthreadpool_atomic_size_t* address) 334 { 335 return __sync_sub_and_fetch(address, 1); 336 } 337 pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)338 static inline size_t pthreadpool_decrement_fetch_release_size_t( 339 pthreadpool_atomic_size_t* address) 340 { 341 return __sync_sub_and_fetch(address, 1); 342 } 343 pthreadpool_decrement_fetch_acquire_release_size_t(pthreadpool_atomic_size_t * address)344 static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t( 345 pthreadpool_atomic_size_t* address) 346 { 347 return __sync_sub_and_fetch(address, 1); 348 } 349 pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)350 static inline bool pthreadpool_try_decrement_relaxed_size_t( 351 pthreadpool_atomic_size_t* value) 352 { 353 size_t actual_value = *value; 354 while (actual_value != 0) { 355 const size_t new_value = actual_value - 1; 356 const size_t expected_value = actual_value; 357 actual_value = __sync_val_compare_and_swap(value, expected_value, new_value); 358 if (actual_value == expected_value) { 359 return true; 360 } 361 } 362 return false; 363 } 364 pthreadpool_fence_acquire()365 static inline void pthreadpool_fence_acquire() { 366 __sync_synchronize(); 367 } 368 pthreadpool_fence_release()369 static inline void pthreadpool_fence_release() { 370 __sync_synchronize(); 371 } 372 #elif defined(_MSC_VER) && defined(_M_ARM) 373 typedef volatile uint32_t pthreadpool_atomic_uint32_t; 374 typedef volatile size_t pthreadpool_atomic_size_t; 375 typedef void *volatile pthreadpool_atomic_void_p; 376 pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)377 static inline uint32_t pthreadpool_load_relaxed_uint32_t( 378 pthreadpool_atomic_uint32_t* address) 379 { 380 return (uint32_t) __iso_volatile_load32((const volatile __int32*) address); 381 } 382 pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)383 static inline size_t pthreadpool_load_relaxed_size_t( 384 pthreadpool_atomic_size_t* address) 385 { 386 return (size_t) __iso_volatile_load32((const volatile __int32*) address); 387 } 388 pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)389 static inline void* pthreadpool_load_relaxed_void_p( 390 pthreadpool_atomic_void_p* address) 391 { 392 return (void*) __iso_volatile_load32((const volatile __int32*) address); 393 } 394 pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)395 static inline uint32_t pthreadpool_load_acquire_uint32_t( 396 pthreadpool_atomic_uint32_t* address) 397 { 398 const uint32_t value = (uint32_t) __iso_volatile_load32((const volatile __int32*) address); 399 __dmb(_ARM_BARRIER_ISH); 400 _ReadBarrier(); 401 return value; 402 } 403 pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)404 static inline size_t pthreadpool_load_acquire_size_t( 405 pthreadpool_atomic_size_t* address) 406 { 407 const size_t value = (size_t) __iso_volatile_load32((const volatile __int32*) address); 408 __dmb(_ARM_BARRIER_ISH); 409 _ReadBarrier(); 410 return value; 411 } 412 pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)413 static inline void pthreadpool_store_relaxed_uint32_t( 414 pthreadpool_atomic_uint32_t* address, 415 uint32_t value) 416 { 417 __iso_volatile_store32((volatile __int32*) address, (__int32) value); 418 } 419 pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)420 static inline void pthreadpool_store_relaxed_size_t( 421 pthreadpool_atomic_size_t* address, 422 size_t value) 423 { 424 __iso_volatile_store32((volatile __int32*) address, (__int32) value); 425 } 426 pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)427 static inline void pthreadpool_store_relaxed_void_p( 428 pthreadpool_atomic_void_p* address, 429 void* value) 430 { 431 __iso_volatile_store32((volatile __int32*) address, (__int32) value); 432 } 433 pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)434 static inline void pthreadpool_store_release_uint32_t( 435 pthreadpool_atomic_uint32_t* address, 436 uint32_t value) 437 { 438 _WriteBarrier(); 439 __dmb(_ARM_BARRIER_ISH); 440 __iso_volatile_store32((volatile __int32*) address, (__int32) value); 441 } 442 pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)443 static inline void pthreadpool_store_release_size_t( 444 pthreadpool_atomic_size_t* address, 445 size_t value) 446 { 447 _WriteBarrier(); 448 __dmb(_ARM_BARRIER_ISH); 449 __iso_volatile_store32((volatile __int32*) address, (__int32) value); 450 } 451 pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)452 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 453 pthreadpool_atomic_size_t* address) 454 { 455 return (size_t) _InterlockedDecrement_nf((volatile long*) address); 456 } 457 pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)458 static inline size_t pthreadpool_decrement_fetch_release_size_t( 459 pthreadpool_atomic_size_t* address) 460 { 461 return (size_t) _InterlockedDecrement_rel((volatile long*) address); 462 } 463 pthreadpool_decrement_fetch_acquire_release_size_t(pthreadpool_atomic_size_t * address)464 static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t( 465 pthreadpool_atomic_size_t* address) 466 { 467 return (size_t) _InterlockedDecrement((volatile long*) address); 468 } 469 pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)470 static inline bool pthreadpool_try_decrement_relaxed_size_t( 471 pthreadpool_atomic_size_t* value) 472 { 473 size_t actual_value = (size_t) __iso_volatile_load32((const volatile __int32*) value); 474 while (actual_value != 0) { 475 const size_t new_value = actual_value - 1; 476 const size_t expected_value = actual_value; 477 actual_value = _InterlockedCompareExchange_nf( 478 (volatile long*) value, (long) new_value, (long) expected_value); 479 if (actual_value == expected_value) { 480 return true; 481 } 482 } 483 return false; 484 } 485 pthreadpool_fence_acquire()486 static inline void pthreadpool_fence_acquire() { 487 __dmb(_ARM_BARRIER_ISH); 488 _ReadBarrier(); 489 } 490 pthreadpool_fence_release()491 static inline void pthreadpool_fence_release() { 492 _WriteBarrier(); 493 __dmb(_ARM_BARRIER_ISH); 494 } 495 #elif defined(_MSC_VER) && defined(_M_ARM64) 496 typedef volatile uint32_t pthreadpool_atomic_uint32_t; 497 typedef volatile size_t pthreadpool_atomic_size_t; 498 typedef void *volatile pthreadpool_atomic_void_p; 499 pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)500 static inline uint32_t pthreadpool_load_relaxed_uint32_t( 501 pthreadpool_atomic_uint32_t* address) 502 { 503 return (uint32_t) __iso_volatile_load32((const volatile __int32*) address); 504 } 505 pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)506 static inline size_t pthreadpool_load_relaxed_size_t( 507 pthreadpool_atomic_size_t* address) 508 { 509 return (size_t) __iso_volatile_load64((const volatile __int64*) address); 510 } 511 pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)512 static inline void* pthreadpool_load_relaxed_void_p( 513 pthreadpool_atomic_void_p* address) 514 { 515 return (void*) __iso_volatile_load64((const volatile __int64*) address); 516 } 517 pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)518 static inline uint32_t pthreadpool_load_acquire_uint32_t( 519 pthreadpool_atomic_uint32_t* address) 520 { 521 return (uint32_t) __ldar32((volatile unsigned __int32*) address); 522 } 523 pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)524 static inline size_t pthreadpool_load_acquire_size_t( 525 pthreadpool_atomic_size_t* address) 526 { 527 return (size_t) __ldar64((volatile unsigned __int64*) address); 528 } 529 pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)530 static inline void pthreadpool_store_relaxed_uint32_t( 531 pthreadpool_atomic_uint32_t* address, 532 uint32_t value) 533 { 534 __iso_volatile_store32((volatile __int32*) address, (__int32) value); 535 } 536 pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)537 static inline void pthreadpool_store_relaxed_size_t( 538 pthreadpool_atomic_size_t* address, 539 size_t value) 540 { 541 __iso_volatile_store64((volatile __int64*) address, (__int64) value); 542 } 543 pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)544 static inline void pthreadpool_store_relaxed_void_p( 545 pthreadpool_atomic_void_p* address, 546 void* value) 547 { 548 __iso_volatile_store64((volatile __int64*) address, (__int64) value); 549 } 550 pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)551 static inline void pthreadpool_store_release_uint32_t( 552 pthreadpool_atomic_uint32_t* address, 553 uint32_t value) 554 { 555 _WriteBarrier(); 556 __stlr32((unsigned __int32 volatile*) address, (unsigned __int32) value); 557 } 558 pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)559 static inline void pthreadpool_store_release_size_t( 560 pthreadpool_atomic_size_t* address, 561 size_t value) 562 { 563 _WriteBarrier(); 564 __stlr64((unsigned __int64 volatile*) address, (unsigned __int64) value); 565 } 566 pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)567 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 568 pthreadpool_atomic_size_t* address) 569 { 570 return (size_t) _InterlockedDecrement64_nf((volatile __int64*) address); 571 } 572 pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)573 static inline size_t pthreadpool_decrement_fetch_release_size_t( 574 pthreadpool_atomic_size_t* address) 575 { 576 return (size_t) _InterlockedDecrement64_rel((volatile __int64*) address); 577 } 578 pthreadpool_decrement_fetch_acquire_release_size_t(pthreadpool_atomic_size_t * address)579 static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t( 580 pthreadpool_atomic_size_t* address) 581 { 582 return (size_t) _InterlockedDecrement64((volatile __int64*) address); 583 } 584 pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)585 static inline bool pthreadpool_try_decrement_relaxed_size_t( 586 pthreadpool_atomic_size_t* value) 587 { 588 size_t actual_value = (size_t) __iso_volatile_load64((const volatile __int64*) value); 589 while (actual_value != 0) { 590 const size_t new_value = actual_value - 1; 591 const size_t expected_value = actual_value; 592 actual_value = _InterlockedCompareExchange64_nf( 593 (volatile __int64*) value, (__int64) new_value, (__int64) expected_value); 594 if (actual_value == expected_value) { 595 return true; 596 } 597 } 598 return false; 599 } 600 pthreadpool_fence_acquire()601 static inline void pthreadpool_fence_acquire() { 602 __dmb(_ARM64_BARRIER_ISHLD); 603 _ReadBarrier(); 604 } 605 pthreadpool_fence_release()606 static inline void pthreadpool_fence_release() { 607 _WriteBarrier(); 608 __dmb(_ARM64_BARRIER_ISH); 609 } 610 #elif defined(_MSC_VER) && defined(_M_IX86) 611 typedef volatile uint32_t pthreadpool_atomic_uint32_t; 612 typedef volatile size_t pthreadpool_atomic_size_t; 613 typedef void *volatile pthreadpool_atomic_void_p; 614 pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)615 static inline uint32_t pthreadpool_load_relaxed_uint32_t( 616 pthreadpool_atomic_uint32_t* address) 617 { 618 return *address; 619 } 620 pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)621 static inline size_t pthreadpool_load_relaxed_size_t( 622 pthreadpool_atomic_size_t* address) 623 { 624 return *address; 625 } 626 pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)627 static inline void* pthreadpool_load_relaxed_void_p( 628 pthreadpool_atomic_void_p* address) 629 { 630 return *address; 631 } 632 pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)633 static inline uint32_t pthreadpool_load_acquire_uint32_t( 634 pthreadpool_atomic_uint32_t* address) 635 { 636 /* x86 loads always have acquire semantics; use only a compiler barrier */ 637 const uint32_t value = *address; 638 _ReadBarrier(); 639 return value; 640 } 641 pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)642 static inline size_t pthreadpool_load_acquire_size_t( 643 pthreadpool_atomic_size_t* address) 644 { 645 /* x86 loads always have acquire semantics; use only a compiler barrier */ 646 const size_t value = *address; 647 _ReadBarrier(); 648 return value; 649 } 650 pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)651 static inline void pthreadpool_store_relaxed_uint32_t( 652 pthreadpool_atomic_uint32_t* address, 653 uint32_t value) 654 { 655 *address = value; 656 } 657 pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)658 static inline void pthreadpool_store_relaxed_size_t( 659 pthreadpool_atomic_size_t* address, 660 size_t value) 661 { 662 *address = value; 663 } 664 pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)665 static inline void pthreadpool_store_relaxed_void_p( 666 pthreadpool_atomic_void_p* address, 667 void* value) 668 { 669 *address = value; 670 } 671 pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)672 static inline void pthreadpool_store_release_uint32_t( 673 pthreadpool_atomic_uint32_t* address, 674 uint32_t value) 675 { 676 /* x86 stores always have release semantics; use only a compiler barrier */ 677 _WriteBarrier(); 678 *address = value; 679 } 680 pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)681 static inline void pthreadpool_store_release_size_t( 682 pthreadpool_atomic_size_t* address, 683 size_t value) 684 { 685 /* x86 stores always have release semantics; use only a compiler barrier */ 686 _WriteBarrier(); 687 *address = value; 688 } 689 pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)690 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 691 pthreadpool_atomic_size_t* address) 692 { 693 return (size_t) _InterlockedDecrement((volatile long*) address); 694 } 695 pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)696 static inline size_t pthreadpool_decrement_fetch_release_size_t( 697 pthreadpool_atomic_size_t* address) 698 { 699 return (size_t) _InterlockedDecrement((volatile long*) address); 700 } 701 pthreadpool_decrement_fetch_acquire_release_size_t(pthreadpool_atomic_size_t * address)702 static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t( 703 pthreadpool_atomic_size_t* address) 704 { 705 return (size_t) _InterlockedDecrement((volatile long*) address); 706 } 707 pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)708 static inline bool pthreadpool_try_decrement_relaxed_size_t( 709 pthreadpool_atomic_size_t* value) 710 { 711 size_t actual_value = *value; 712 while (actual_value != 0) { 713 const size_t new_value = actual_value - 1; 714 const size_t expected_value = actual_value; 715 actual_value = _InterlockedCompareExchange( 716 (volatile long*) value, (long) new_value, (long) expected_value); 717 if (actual_value == expected_value) { 718 return true; 719 } 720 } 721 return false; 722 } 723 pthreadpool_fence_acquire()724 static inline void pthreadpool_fence_acquire() { 725 _mm_lfence(); 726 } 727 pthreadpool_fence_release()728 static inline void pthreadpool_fence_release() { 729 _mm_sfence(); 730 } 731 #elif defined(_MSC_VER) && defined(_M_X64) 732 typedef volatile uint32_t pthreadpool_atomic_uint32_t; 733 typedef volatile size_t pthreadpool_atomic_size_t; 734 typedef void *volatile pthreadpool_atomic_void_p; 735 pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)736 static inline uint32_t pthreadpool_load_relaxed_uint32_t( 737 pthreadpool_atomic_uint32_t* address) 738 { 739 return *address; 740 } 741 pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)742 static inline size_t pthreadpool_load_relaxed_size_t( 743 pthreadpool_atomic_size_t* address) 744 { 745 return *address; 746 } 747 pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)748 static inline void* pthreadpool_load_relaxed_void_p( 749 pthreadpool_atomic_void_p* address) 750 { 751 return *address; 752 } 753 pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)754 static inline uint32_t pthreadpool_load_acquire_uint32_t( 755 pthreadpool_atomic_uint32_t* address) 756 { 757 /* x86-64 loads always have acquire semantics; use only a compiler barrier */ 758 const uint32_t value = *address; 759 _ReadBarrier(); 760 return value; 761 } 762 pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)763 static inline size_t pthreadpool_load_acquire_size_t( 764 pthreadpool_atomic_size_t* address) 765 { 766 /* x86-64 loads always have acquire semantics; use only a compiler barrier */ 767 const size_t value = *address; 768 _ReadBarrier(); 769 return value; 770 } 771 pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)772 static inline void pthreadpool_store_relaxed_uint32_t( 773 pthreadpool_atomic_uint32_t* address, 774 uint32_t value) 775 { 776 *address = value; 777 } 778 pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)779 static inline void pthreadpool_store_relaxed_size_t( 780 pthreadpool_atomic_size_t* address, 781 size_t value) 782 { 783 *address = value; 784 } 785 pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)786 static inline void pthreadpool_store_relaxed_void_p( 787 pthreadpool_atomic_void_p* address, 788 void* value) 789 { 790 *address = value; 791 } 792 pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)793 static inline void pthreadpool_store_release_uint32_t( 794 pthreadpool_atomic_uint32_t* address, 795 uint32_t value) 796 { 797 /* x86-64 stores always have release semantics; use only a compiler barrier */ 798 _WriteBarrier(); 799 *address = value; 800 } 801 pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)802 static inline void pthreadpool_store_release_size_t( 803 pthreadpool_atomic_size_t* address, 804 size_t value) 805 { 806 /* x86-64 stores always have release semantics; use only a compiler barrier */ 807 _WriteBarrier(); 808 *address = value; 809 } 810 pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)811 static inline size_t pthreadpool_decrement_fetch_relaxed_size_t( 812 pthreadpool_atomic_size_t* address) 813 { 814 return (size_t) _InterlockedDecrement64((volatile __int64*) address); 815 } 816 pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)817 static inline size_t pthreadpool_decrement_fetch_release_size_t( 818 pthreadpool_atomic_size_t* address) 819 { 820 return (size_t) _InterlockedDecrement64((volatile __int64*) address); 821 } 822 pthreadpool_decrement_fetch_acquire_release_size_t(pthreadpool_atomic_size_t * address)823 static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t( 824 pthreadpool_atomic_size_t* address) 825 { 826 return (size_t) _InterlockedDecrement64((volatile __int64*) address); 827 } 828 pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)829 static inline bool pthreadpool_try_decrement_relaxed_size_t( 830 pthreadpool_atomic_size_t* value) 831 { 832 size_t actual_value = *value; 833 while (actual_value != 0) { 834 const size_t new_value = actual_value - 1; 835 const size_t expected_value = actual_value; 836 actual_value = _InterlockedCompareExchange64( 837 (volatile __int64*) value, (__int64) new_value, (__int64) expected_value); 838 if (actual_value == expected_value) { 839 return true; 840 } 841 } 842 return false; 843 } 844 pthreadpool_fence_acquire()845 static inline void pthreadpool_fence_acquire() { 846 _mm_lfence(); 847 _ReadBarrier(); 848 } 849 pthreadpool_fence_release()850 static inline void pthreadpool_fence_release() { 851 _WriteBarrier(); 852 _mm_sfence(); 853 } 854 #else 855 #error "Platform-specific implementation of threadpool-atomics.h required" 856 #endif 857 858 #if defined(__ARM_ACLE) || defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC)) pthreadpool_yield()859 static inline void pthreadpool_yield() { 860 __yield(); 861 } 862 #elif defined(__GNUC__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 7) || (defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6KZ__)) && !defined(__thumb__)) pthreadpool_yield()863 static inline void pthreadpool_yield() { 864 __asm__ __volatile__("yield"); 865 } 866 #elif defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) pthreadpool_yield()867 static inline void pthreadpool_yield() { 868 _mm_pause(); 869 } 870 #else pthreadpool_yield()871 static inline void pthreadpool_yield() { 872 pthreadpool_fence_acquire(); 873 } 874 #endif 875