• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 #pragma once
2 
3 #include <stdbool.h>
4 #include <stddef.h>
5 #include <stdint.h>
6 
7 /* SSE-specific headers */
8 #if defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) && !defined(_M_ARM64EC)
9 	#include <xmmintrin.h>
10 #endif
11 
12 /* ARM-specific headers */
13 #if defined(__ARM_ACLE)
14 	#include <arm_acle.h>
15 #endif
16 
17 /* MSVC-specific headers */
18 #ifdef _MSC_VER
19 	#include <intrin.h>
20 #endif
21 
22 
23 #if defined(__wasm__) && defined(__clang__)
24 	/*
25 	 * Clang for WebAssembly target lacks stdatomic.h header,
26 	 * even though it supports the necessary low-level intrinsics.
27 	 * Thus, we implement pthreadpool atomic functions on top of
28 	 * low-level Clang-specific interfaces for this target.
29 	 */
30 
31 	typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t;
32 	typedef _Atomic(size_t)   pthreadpool_atomic_size_t;
33 	typedef _Atomic(void*)    pthreadpool_atomic_void_p;
34 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)35 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
36 		pthreadpool_atomic_uint32_t* address)
37 	{
38 		return __c11_atomic_load(address, __ATOMIC_RELAXED);
39 	}
40 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)41 	static inline size_t pthreadpool_load_relaxed_size_t(
42 		pthreadpool_atomic_size_t* address)
43 	{
44 		return __c11_atomic_load(address, __ATOMIC_RELAXED);
45 	}
46 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)47 	static inline void* pthreadpool_load_relaxed_void_p(
48 		pthreadpool_atomic_void_p* address)
49 	{
50 		return __c11_atomic_load(address, __ATOMIC_RELAXED);
51 	}
52 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)53 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
54 		pthreadpool_atomic_uint32_t* address)
55 	{
56 		return __c11_atomic_load(address, __ATOMIC_ACQUIRE);
57 	}
58 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)59 	static inline size_t pthreadpool_load_acquire_size_t(
60 		pthreadpool_atomic_size_t* address)
61 	{
62 		return __c11_atomic_load(address, __ATOMIC_ACQUIRE);
63 	}
64 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)65 	static inline void pthreadpool_store_relaxed_uint32_t(
66 		pthreadpool_atomic_uint32_t* address,
67 		uint32_t value)
68 	{
69 		__c11_atomic_store(address, value, __ATOMIC_RELAXED);
70 	}
71 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)72 	static inline void pthreadpool_store_relaxed_size_t(
73 		pthreadpool_atomic_size_t* address,
74 		size_t value)
75 	{
76 		__c11_atomic_store(address, value, __ATOMIC_RELAXED);
77 	}
78 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)79 	static inline void pthreadpool_store_relaxed_void_p(
80 		pthreadpool_atomic_void_p* address,
81 		void* value)
82 	{
83 		__c11_atomic_store(address, value, __ATOMIC_RELAXED);
84 	}
85 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)86 	static inline void pthreadpool_store_release_uint32_t(
87 		pthreadpool_atomic_uint32_t* address,
88 		uint32_t value)
89 	{
90 		__c11_atomic_store(address, value, __ATOMIC_RELEASE);
91 	}
92 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)93 	static inline void pthreadpool_store_release_size_t(
94 		pthreadpool_atomic_size_t* address,
95 		size_t value)
96 	{
97 		__c11_atomic_store(address, value, __ATOMIC_RELEASE);
98 	}
99 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)100 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
101 		pthreadpool_atomic_size_t* address)
102 	{
103 		return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELAXED) - 1;
104 	}
105 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)106 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
107 		pthreadpool_atomic_size_t* address)
108 	{
109 		return __c11_atomic_fetch_sub(address, 1, __ATOMIC_RELEASE) - 1;
110 	}
111 
pthreadpool_decrement_fetch_acquire_release_size_t(pthreadpool_atomic_size_t * address)112 	static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
113 		pthreadpool_atomic_size_t* address)
114 	{
115 		return __c11_atomic_fetch_sub(address, 1, __ATOMIC_ACQ_REL) - 1;
116 	}
117 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)118 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
119 		pthreadpool_atomic_size_t* value)
120 	{
121 		size_t actual_value = __c11_atomic_load(value, __ATOMIC_RELAXED);
122 		while (actual_value != 0) {
123 			if (__c11_atomic_compare_exchange_weak(
124 				value, &actual_value, actual_value - 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED))
125 			{
126 				return true;
127 			}
128 		}
129 		return false;
130 	}
131 
pthreadpool_fence_acquire()132 	static inline void pthreadpool_fence_acquire() {
133 		__c11_atomic_thread_fence(__ATOMIC_ACQUIRE);
134 	}
135 
pthreadpool_fence_release()136 	static inline void pthreadpool_fence_release() {
137 		__c11_atomic_thread_fence(__ATOMIC_RELEASE);
138 	}
139 #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__)
140 	#include <stdatomic.h>
141 
142 	typedef _Atomic(uint32_t) pthreadpool_atomic_uint32_t;
143 	typedef _Atomic(size_t)   pthreadpool_atomic_size_t;
144 	typedef _Atomic(void*)    pthreadpool_atomic_void_p;
145 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)146 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
147 		pthreadpool_atomic_uint32_t* address)
148 	{
149 		return atomic_load_explicit(address, memory_order_relaxed);
150 	}
151 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)152 	static inline size_t pthreadpool_load_relaxed_size_t(
153 		pthreadpool_atomic_size_t* address)
154 	{
155 		return atomic_load_explicit(address, memory_order_relaxed);
156 	}
157 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)158 	static inline void* pthreadpool_load_relaxed_void_p(
159 		pthreadpool_atomic_void_p* address)
160 	{
161 		return atomic_load_explicit(address, memory_order_relaxed);
162 	}
163 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)164 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
165 		pthreadpool_atomic_uint32_t* address)
166 	{
167 		return atomic_load_explicit(address, memory_order_acquire);
168 	}
169 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)170 	static inline size_t pthreadpool_load_acquire_size_t(
171 		pthreadpool_atomic_size_t* address)
172 	{
173 		return atomic_load_explicit(address, memory_order_acquire);
174 	}
175 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)176 	static inline void pthreadpool_store_relaxed_uint32_t(
177 		pthreadpool_atomic_uint32_t* address,
178 		uint32_t value)
179 	{
180 		atomic_store_explicit(address, value, memory_order_relaxed);
181 	}
182 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)183 	static inline void pthreadpool_store_relaxed_size_t(
184 		pthreadpool_atomic_size_t* address,
185 		size_t value)
186 	{
187 		atomic_store_explicit(address, value, memory_order_relaxed);
188 	}
189 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)190 	static inline void pthreadpool_store_relaxed_void_p(
191 		pthreadpool_atomic_void_p* address,
192 		void* value)
193 	{
194 		atomic_store_explicit(address, value, memory_order_relaxed);
195 	}
196 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)197 	static inline void pthreadpool_store_release_uint32_t(
198 		pthreadpool_atomic_uint32_t* address,
199 		uint32_t value)
200 	{
201 		atomic_store_explicit(address, value, memory_order_release);
202 	}
203 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)204 	static inline void pthreadpool_store_release_size_t(
205 		pthreadpool_atomic_size_t* address,
206 		size_t value)
207 	{
208 		atomic_store_explicit(address, value, memory_order_release);
209 	}
210 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)211 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
212 		pthreadpool_atomic_size_t* address)
213 	{
214 		return atomic_fetch_sub_explicit(address, 1, memory_order_relaxed) - 1;
215 	}
216 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)217 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
218 		pthreadpool_atomic_size_t* address)
219 	{
220 		return atomic_fetch_sub_explicit(address, 1, memory_order_release) - 1;
221 	}
222 
pthreadpool_decrement_fetch_acquire_release_size_t(pthreadpool_atomic_size_t * address)223 	static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
224 		pthreadpool_atomic_size_t* address)
225 	{
226 		return atomic_fetch_sub_explicit(address, 1, memory_order_acq_rel) - 1;
227 	}
228 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)229 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
230 		pthreadpool_atomic_size_t* value)
231 	{
232 		#if defined(__clang__) && (defined(__arm__) || defined(__aarch64__))
233 			size_t actual_value;
234 			do {
235 				actual_value = __builtin_arm_ldrex((const volatile size_t*) value);
236 				if (actual_value == 0) {
237 					__builtin_arm_clrex();
238 					return false;
239 				}
240 			} while (__builtin_arm_strex(actual_value - 1, (volatile size_t*) value) != 0);
241 			return true;
242 		#else
243 			size_t actual_value = pthreadpool_load_relaxed_size_t(value);
244 			while (actual_value != 0) {
245 				if (atomic_compare_exchange_weak_explicit(
246 					value, &actual_value, actual_value - 1, memory_order_relaxed, memory_order_relaxed))
247 				{
248 					return true;
249 				}
250 			}
251 			return false;
252 		#endif
253 	}
254 
pthreadpool_fence_acquire()255 	static inline void pthreadpool_fence_acquire() {
256 		atomic_thread_fence(memory_order_acquire);
257 	}
258 
pthreadpool_fence_release()259 	static inline void pthreadpool_fence_release() {
260 		atomic_thread_fence(memory_order_release);
261 	}
262 #elif defined(__GNUC__)
263 	typedef uint32_t volatile pthreadpool_atomic_uint32_t;
264 	typedef size_t volatile   pthreadpool_atomic_size_t;
265 	typedef void* volatile    pthreadpool_atomic_void_p;
266 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)267 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
268 		pthreadpool_atomic_uint32_t* address)
269 	{
270 		return *address;
271 	}
272 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)273 	static inline size_t pthreadpool_load_relaxed_size_t(
274 		pthreadpool_atomic_size_t* address)
275 	{
276 		return *address;
277 	}
278 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)279 	static inline void* pthreadpool_load_relaxed_void_p(
280 		pthreadpool_atomic_void_p* address)
281 	{
282 		return *address;
283 	}
284 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)285 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
286 		pthreadpool_atomic_uint32_t* address)
287 	{
288 		return *address;
289 	}
290 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)291 	static inline size_t pthreadpool_load_acquire_size_t(
292 		pthreadpool_atomic_size_t* address)
293 	{
294 		return *address;
295 	}
296 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)297 	static inline void pthreadpool_store_relaxed_uint32_t(
298 		pthreadpool_atomic_uint32_t* address,
299 		uint32_t value)
300 	{
301 		*address = value;
302 	}
303 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)304 	static inline void pthreadpool_store_relaxed_size_t(
305 		pthreadpool_atomic_size_t* address,
306 		size_t value)
307 	{
308 		*address = value;
309 	}
310 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)311 	static inline void pthreadpool_store_relaxed_void_p(
312 		pthreadpool_atomic_void_p* address,
313 		void* value)
314 	{
315 		*address = value;
316 	}
317 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)318 	static inline void pthreadpool_store_release_uint32_t(
319 		pthreadpool_atomic_uint32_t* address,
320 		uint32_t value)
321 	{
322 		*address = value;
323 	}
324 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)325 	static inline void pthreadpool_store_release_size_t(
326 		pthreadpool_atomic_size_t* address,
327 		size_t value)
328 	{
329 		*address = value;
330 	}
331 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)332 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
333 		pthreadpool_atomic_size_t* address)
334 	{
335 		return __sync_sub_and_fetch(address, 1);
336 	}
337 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)338 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
339 		pthreadpool_atomic_size_t* address)
340 	{
341 		return __sync_sub_and_fetch(address, 1);
342 	}
343 
pthreadpool_decrement_fetch_acquire_release_size_t(pthreadpool_atomic_size_t * address)344 	static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
345 		pthreadpool_atomic_size_t* address)
346 	{
347 		return __sync_sub_and_fetch(address, 1);
348 	}
349 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)350 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
351 		pthreadpool_atomic_size_t* value)
352 	{
353 		size_t actual_value = *value;
354 		while (actual_value != 0) {
355 			const size_t new_value = actual_value - 1;
356 			const size_t expected_value = actual_value;
357 			actual_value = __sync_val_compare_and_swap(value, expected_value, new_value);
358 			if (actual_value == expected_value) {
359 				return true;
360 			}
361 		}
362 		return false;
363 	}
364 
pthreadpool_fence_acquire()365 	static inline void pthreadpool_fence_acquire() {
366 		__sync_synchronize();
367 	}
368 
pthreadpool_fence_release()369 	static inline void pthreadpool_fence_release() {
370 		__sync_synchronize();
371 	}
372 #elif defined(_MSC_VER) && defined(_M_ARM)
373 	typedef volatile uint32_t pthreadpool_atomic_uint32_t;
374 	typedef volatile size_t   pthreadpool_atomic_size_t;
375 	typedef void *volatile    pthreadpool_atomic_void_p;
376 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)377 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
378 		pthreadpool_atomic_uint32_t* address)
379 	{
380 		return (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
381 	}
382 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)383 	static inline size_t pthreadpool_load_relaxed_size_t(
384 		pthreadpool_atomic_size_t* address)
385 	{
386 		return (size_t) __iso_volatile_load32((const volatile __int32*) address);
387 	}
388 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)389 	static inline void* pthreadpool_load_relaxed_void_p(
390 		pthreadpool_atomic_void_p* address)
391 	{
392 		return (void*) __iso_volatile_load32((const volatile __int32*) address);
393 	}
394 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)395 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
396 		pthreadpool_atomic_uint32_t* address)
397 	{
398 		const uint32_t value = (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
399 		__dmb(_ARM_BARRIER_ISH);
400 		_ReadBarrier();
401 		return value;
402 	}
403 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)404 	static inline size_t pthreadpool_load_acquire_size_t(
405 		pthreadpool_atomic_size_t* address)
406 	{
407 		const size_t value = (size_t) __iso_volatile_load32((const volatile __int32*) address);
408 		__dmb(_ARM_BARRIER_ISH);
409 		_ReadBarrier();
410 		return value;
411 	}
412 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)413 	static inline void pthreadpool_store_relaxed_uint32_t(
414 		pthreadpool_atomic_uint32_t* address,
415 		uint32_t value)
416 	{
417 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
418 	}
419 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)420 	static inline void pthreadpool_store_relaxed_size_t(
421 		pthreadpool_atomic_size_t* address,
422 		size_t value)
423 	{
424 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
425 	}
426 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)427 	static inline void pthreadpool_store_relaxed_void_p(
428 		pthreadpool_atomic_void_p* address,
429 		void* value)
430 	{
431 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
432 	}
433 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)434 	static inline void pthreadpool_store_release_uint32_t(
435 		pthreadpool_atomic_uint32_t* address,
436 		uint32_t value)
437 	{
438 		_WriteBarrier();
439 		__dmb(_ARM_BARRIER_ISH);
440 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
441 	}
442 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)443 	static inline void pthreadpool_store_release_size_t(
444 		pthreadpool_atomic_size_t* address,
445 		size_t value)
446 	{
447 		_WriteBarrier();
448 		__dmb(_ARM_BARRIER_ISH);
449 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
450 	}
451 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)452 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
453 		pthreadpool_atomic_size_t* address)
454 	{
455 		return (size_t) _InterlockedDecrement_nf((volatile long*) address);
456 	}
457 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)458 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
459 		pthreadpool_atomic_size_t* address)
460 	{
461 		return (size_t) _InterlockedDecrement_rel((volatile long*) address);
462 	}
463 
pthreadpool_decrement_fetch_acquire_release_size_t(pthreadpool_atomic_size_t * address)464 	static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
465 		pthreadpool_atomic_size_t* address)
466 	{
467 		return (size_t) _InterlockedDecrement((volatile long*) address);
468 	}
469 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)470 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
471 		pthreadpool_atomic_size_t* value)
472 	{
473 		size_t actual_value = (size_t) __iso_volatile_load32((const volatile __int32*) value);
474 		while (actual_value != 0) {
475 			const size_t new_value = actual_value - 1;
476 			const size_t expected_value = actual_value;
477 			actual_value = _InterlockedCompareExchange_nf(
478 				(volatile long*) value, (long) new_value, (long) expected_value);
479 			if (actual_value == expected_value) {
480 				return true;
481 			}
482 		}
483 		return false;
484 	}
485 
pthreadpool_fence_acquire()486 	static inline void pthreadpool_fence_acquire() {
487 		__dmb(_ARM_BARRIER_ISH);
488 		_ReadBarrier();
489 	}
490 
pthreadpool_fence_release()491 	static inline void pthreadpool_fence_release() {
492 		_WriteBarrier();
493 		__dmb(_ARM_BARRIER_ISH);
494 	}
495 #elif defined(_MSC_VER) && defined(_M_ARM64)
496 	typedef volatile uint32_t pthreadpool_atomic_uint32_t;
497 	typedef volatile size_t   pthreadpool_atomic_size_t;
498 	typedef void *volatile    pthreadpool_atomic_void_p;
499 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)500 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
501 		pthreadpool_atomic_uint32_t* address)
502 	{
503 		return (uint32_t) __iso_volatile_load32((const volatile __int32*) address);
504 	}
505 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)506 	static inline size_t pthreadpool_load_relaxed_size_t(
507 		pthreadpool_atomic_size_t* address)
508 	{
509 		return (size_t) __iso_volatile_load64((const volatile __int64*) address);
510 	}
511 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)512 	static inline void* pthreadpool_load_relaxed_void_p(
513 		pthreadpool_atomic_void_p* address)
514 	{
515 		return (void*) __iso_volatile_load64((const volatile __int64*) address);
516 	}
517 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)518 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
519 		pthreadpool_atomic_uint32_t* address)
520 	{
521 		return (uint32_t) __ldar32((volatile unsigned __int32*) address);
522 	}
523 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)524 	static inline size_t pthreadpool_load_acquire_size_t(
525 		pthreadpool_atomic_size_t* address)
526 	{
527 		return (size_t) __ldar64((volatile unsigned __int64*) address);
528 	}
529 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)530 	static inline void pthreadpool_store_relaxed_uint32_t(
531 		pthreadpool_atomic_uint32_t* address,
532 		uint32_t value)
533 	{
534 		__iso_volatile_store32((volatile __int32*) address, (__int32) value);
535 	}
536 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)537 	static inline void pthreadpool_store_relaxed_size_t(
538 		pthreadpool_atomic_size_t* address,
539 		size_t value)
540 	{
541 		__iso_volatile_store64((volatile __int64*) address, (__int64) value);
542 	}
543 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)544 	static inline void pthreadpool_store_relaxed_void_p(
545 		pthreadpool_atomic_void_p* address,
546 		void* value)
547 	{
548 		__iso_volatile_store64((volatile __int64*) address, (__int64) value);
549 	}
550 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)551 	static inline void pthreadpool_store_release_uint32_t(
552 		pthreadpool_atomic_uint32_t* address,
553 		uint32_t value)
554 	{
555 		_WriteBarrier();
556 		__stlr32((unsigned __int32 volatile*) address, (unsigned __int32) value);
557 	}
558 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)559 	static inline void pthreadpool_store_release_size_t(
560 		pthreadpool_atomic_size_t* address,
561 		size_t value)
562 	{
563 		_WriteBarrier();
564 		__stlr64((unsigned __int64 volatile*) address, (unsigned __int64) value);
565 	}
566 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)567 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
568 		pthreadpool_atomic_size_t* address)
569 	{
570 		return (size_t) _InterlockedDecrement64_nf((volatile __int64*) address);
571 	}
572 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)573 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
574 		pthreadpool_atomic_size_t* address)
575 	{
576 		return (size_t) _InterlockedDecrement64_rel((volatile __int64*) address);
577 	}
578 
pthreadpool_decrement_fetch_acquire_release_size_t(pthreadpool_atomic_size_t * address)579 	static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
580 		pthreadpool_atomic_size_t* address)
581 	{
582 		return (size_t) _InterlockedDecrement64((volatile __int64*) address);
583 	}
584 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)585 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
586 		pthreadpool_atomic_size_t* value)
587 	{
588 		size_t actual_value = (size_t) __iso_volatile_load64((const volatile __int64*) value);
589 		while (actual_value != 0) {
590 			const size_t new_value = actual_value - 1;
591 			const size_t expected_value = actual_value;
592 			actual_value = _InterlockedCompareExchange64_nf(
593 				(volatile __int64*) value, (__int64) new_value, (__int64) expected_value);
594 			if (actual_value == expected_value) {
595 				return true;
596 			}
597 		}
598 		return false;
599 	}
600 
pthreadpool_fence_acquire()601 	static inline void pthreadpool_fence_acquire() {
602 		__dmb(_ARM64_BARRIER_ISHLD);
603 		_ReadBarrier();
604 	}
605 
pthreadpool_fence_release()606 	static inline void pthreadpool_fence_release() {
607 		_WriteBarrier();
608 		__dmb(_ARM64_BARRIER_ISH);
609 	}
610 #elif defined(_MSC_VER) && defined(_M_IX86)
611 	typedef volatile uint32_t pthreadpool_atomic_uint32_t;
612 	typedef volatile size_t   pthreadpool_atomic_size_t;
613 	typedef void *volatile    pthreadpool_atomic_void_p;
614 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)615 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
616 		pthreadpool_atomic_uint32_t* address)
617 	{
618 		return *address;
619 	}
620 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)621 	static inline size_t pthreadpool_load_relaxed_size_t(
622 		pthreadpool_atomic_size_t* address)
623 	{
624 		return *address;
625 	}
626 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)627 	static inline void* pthreadpool_load_relaxed_void_p(
628 		pthreadpool_atomic_void_p* address)
629 	{
630 		return *address;
631 	}
632 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)633 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
634 		pthreadpool_atomic_uint32_t* address)
635 	{
636 		/* x86 loads always have acquire semantics; use only a compiler barrier */
637 		const uint32_t value = *address;
638 		_ReadBarrier();
639 		return value;
640 	}
641 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)642 	static inline size_t pthreadpool_load_acquire_size_t(
643 		pthreadpool_atomic_size_t* address)
644 	{
645 		/* x86 loads always have acquire semantics; use only a compiler barrier */
646 		const size_t value = *address;
647 		_ReadBarrier();
648 		return value;
649 	}
650 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)651 	static inline void pthreadpool_store_relaxed_uint32_t(
652 		pthreadpool_atomic_uint32_t* address,
653 		uint32_t value)
654 	{
655 		*address = value;
656 	}
657 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)658 	static inline void pthreadpool_store_relaxed_size_t(
659 		pthreadpool_atomic_size_t* address,
660 		size_t value)
661 	{
662 		*address = value;
663 	}
664 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)665 	static inline void pthreadpool_store_relaxed_void_p(
666 		pthreadpool_atomic_void_p* address,
667 		void* value)
668 	{
669 		*address = value;
670 	}
671 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)672 	static inline void pthreadpool_store_release_uint32_t(
673 		pthreadpool_atomic_uint32_t* address,
674 		uint32_t value)
675 	{
676 		/* x86 stores always have release semantics; use only a compiler barrier */
677 		_WriteBarrier();
678 		*address = value;
679 	}
680 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)681 	static inline void pthreadpool_store_release_size_t(
682 		pthreadpool_atomic_size_t* address,
683 		size_t value)
684 	{
685 		/* x86 stores always have release semantics; use only a compiler barrier */
686 		_WriteBarrier();
687 		*address = value;
688 	}
689 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)690 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
691 		pthreadpool_atomic_size_t* address)
692 	{
693 		return (size_t) _InterlockedDecrement((volatile long*) address);
694 	}
695 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)696 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
697 		pthreadpool_atomic_size_t* address)
698 	{
699 		return (size_t) _InterlockedDecrement((volatile long*) address);
700 	}
701 
pthreadpool_decrement_fetch_acquire_release_size_t(pthreadpool_atomic_size_t * address)702 	static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
703 		pthreadpool_atomic_size_t* address)
704 	{
705 		return (size_t) _InterlockedDecrement((volatile long*) address);
706 	}
707 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)708 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
709 		pthreadpool_atomic_size_t* value)
710 	{
711 		size_t actual_value = *value;
712 		while (actual_value != 0) {
713 			const size_t new_value = actual_value - 1;
714 			const size_t expected_value = actual_value;
715 			actual_value = _InterlockedCompareExchange(
716 				(volatile long*) value, (long) new_value, (long) expected_value);
717 			if (actual_value == expected_value) {
718 				return true;
719 			}
720 		}
721 		return false;
722 	}
723 
pthreadpool_fence_acquire()724 	static inline void pthreadpool_fence_acquire() {
725 		_mm_lfence();
726 	}
727 
pthreadpool_fence_release()728 	static inline void pthreadpool_fence_release() {
729 		_mm_sfence();
730 	}
731 #elif defined(_MSC_VER) && defined(_M_X64)
732 	typedef volatile uint32_t pthreadpool_atomic_uint32_t;
733 	typedef volatile size_t   pthreadpool_atomic_size_t;
734 	typedef void *volatile    pthreadpool_atomic_void_p;
735 
pthreadpool_load_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address)736 	static inline uint32_t pthreadpool_load_relaxed_uint32_t(
737 		pthreadpool_atomic_uint32_t* address)
738 	{
739 		return *address;
740 	}
741 
pthreadpool_load_relaxed_size_t(pthreadpool_atomic_size_t * address)742 	static inline size_t pthreadpool_load_relaxed_size_t(
743 		pthreadpool_atomic_size_t* address)
744 	{
745 		return *address;
746 	}
747 
pthreadpool_load_relaxed_void_p(pthreadpool_atomic_void_p * address)748 	static inline void* pthreadpool_load_relaxed_void_p(
749 		pthreadpool_atomic_void_p* address)
750 	{
751 		return *address;
752 	}
753 
pthreadpool_load_acquire_uint32_t(pthreadpool_atomic_uint32_t * address)754 	static inline uint32_t pthreadpool_load_acquire_uint32_t(
755 		pthreadpool_atomic_uint32_t* address)
756 	{
757 		/* x86-64 loads always have acquire semantics; use only a compiler barrier */
758 		const uint32_t value = *address;
759 		_ReadBarrier();
760 		return value;
761 	}
762 
pthreadpool_load_acquire_size_t(pthreadpool_atomic_size_t * address)763 	static inline size_t pthreadpool_load_acquire_size_t(
764 		pthreadpool_atomic_size_t* address)
765 	{
766 		/* x86-64 loads always have acquire semantics; use only a compiler barrier */
767 		const size_t value = *address;
768 		_ReadBarrier();
769 		return value;
770 	}
771 
pthreadpool_store_relaxed_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)772 	static inline void pthreadpool_store_relaxed_uint32_t(
773 		pthreadpool_atomic_uint32_t* address,
774 		uint32_t value)
775 	{
776 		*address = value;
777 	}
778 
pthreadpool_store_relaxed_size_t(pthreadpool_atomic_size_t * address,size_t value)779 	static inline void pthreadpool_store_relaxed_size_t(
780 		pthreadpool_atomic_size_t* address,
781 		size_t value)
782 	{
783 		*address = value;
784 	}
785 
pthreadpool_store_relaxed_void_p(pthreadpool_atomic_void_p * address,void * value)786 	static inline void pthreadpool_store_relaxed_void_p(
787 		pthreadpool_atomic_void_p* address,
788 		void* value)
789 	{
790 		*address = value;
791 	}
792 
pthreadpool_store_release_uint32_t(pthreadpool_atomic_uint32_t * address,uint32_t value)793 	static inline void pthreadpool_store_release_uint32_t(
794 		pthreadpool_atomic_uint32_t* address,
795 		uint32_t value)
796 	{
797 		/* x86-64 stores always have release semantics; use only a compiler barrier */
798 		_WriteBarrier();
799 		*address = value;
800 	}
801 
pthreadpool_store_release_size_t(pthreadpool_atomic_size_t * address,size_t value)802 	static inline void pthreadpool_store_release_size_t(
803 		pthreadpool_atomic_size_t* address,
804 		size_t value)
805 	{
806 		/* x86-64 stores always have release semantics; use only a compiler barrier */
807 		_WriteBarrier();
808 		*address = value;
809 	}
810 
pthreadpool_decrement_fetch_relaxed_size_t(pthreadpool_atomic_size_t * address)811 	static inline size_t pthreadpool_decrement_fetch_relaxed_size_t(
812 		pthreadpool_atomic_size_t* address)
813 	{
814 		return (size_t) _InterlockedDecrement64((volatile __int64*) address);
815 	}
816 
pthreadpool_decrement_fetch_release_size_t(pthreadpool_atomic_size_t * address)817 	static inline size_t pthreadpool_decrement_fetch_release_size_t(
818 		pthreadpool_atomic_size_t* address)
819 	{
820 		return (size_t) _InterlockedDecrement64((volatile __int64*) address);
821 	}
822 
pthreadpool_decrement_fetch_acquire_release_size_t(pthreadpool_atomic_size_t * address)823 	static inline size_t pthreadpool_decrement_fetch_acquire_release_size_t(
824 		pthreadpool_atomic_size_t* address)
825 	{
826 		return (size_t) _InterlockedDecrement64((volatile __int64*) address);
827 	}
828 
pthreadpool_try_decrement_relaxed_size_t(pthreadpool_atomic_size_t * value)829 	static inline bool pthreadpool_try_decrement_relaxed_size_t(
830 		pthreadpool_atomic_size_t* value)
831 	{
832 		size_t actual_value = *value;
833 		while (actual_value != 0) {
834 			const size_t new_value = actual_value - 1;
835 			const size_t expected_value = actual_value;
836 			actual_value = _InterlockedCompareExchange64(
837 				(volatile __int64*) value, (__int64) new_value, (__int64) expected_value);
838 			if (actual_value == expected_value) {
839 				return true;
840 			}
841 		}
842 		return false;
843 	}
844 
pthreadpool_fence_acquire()845 	static inline void pthreadpool_fence_acquire() {
846 		_mm_lfence();
847 		_ReadBarrier();
848 	}
849 
pthreadpool_fence_release()850 	static inline void pthreadpool_fence_release() {
851 		_WriteBarrier();
852 		_mm_sfence();
853 	}
854 #else
855 	#error "Platform-specific implementation of threadpool-atomics.h required"
856 #endif
857 
858 #if defined(__ARM_ACLE) || defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC))
pthreadpool_yield()859 	static inline void pthreadpool_yield() {
860 		__yield();
861 	}
862 #elif defined(__GNUC__) && (defined(__ARM_ARCH) && (__ARM_ARCH >= 7) || (defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6KZ__)) && !defined(__thumb__))
pthreadpool_yield()863 	static inline void pthreadpool_yield() {
864 		__asm__ __volatile__("yield");
865 	}
866 #elif defined(__i386__) || defined(__i686__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64)
pthreadpool_yield()867 	static inline void pthreadpool_yield() {
868 		_mm_pause();
869 	}
870 #else
pthreadpool_yield()871 	static inline void pthreadpool_yield() {
872 		pthreadpool_fence_acquire();
873 	}
874 #endif
875