1 /*
2 * kmp_atomic.cpp -- ATOMIC implementation routines
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18
19 /*!
20 @defgroup ATOMIC_OPS Atomic Operations
21 These functions are used for implementing the many different varieties of atomic
22 operations.
23
24 The compiler is at liberty to inline atomic operations that are naturally
25 supported by the target architecture. For instance on IA-32 architecture an
26 atomic like this can be inlined
27 @code
28 static int s = 0;
29 #pragma omp atomic
30 s++;
31 @endcode
32 using the single instruction: `lock; incl s`
33
34 However the runtime does provide entrypoints for these operations to support
35 compilers that choose not to inline them. (For instance,
36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
37
38 The names of the functions are encoded by using the data type name and the
39 operation name, as in these tables.
40
41 Data Type | Data type encoding
42 -----------|---------------
43 int8_t | `fixed1`
44 uint8_t | `fixed1u`
45 int16_t | `fixed2`
46 uint16_t | `fixed2u`
47 int32_t | `fixed4`
48 uint32_t | `fixed4u`
49 int32_t | `fixed8`
50 uint32_t | `fixed8u`
51 float | `float4`
52 double | `float8`
53 float 10 (8087 eighty bit float) | `float10`
54 complex<float> | `cmplx4`
55 complex<double> | `cmplx8`
56 complex<float10> | `cmplx10`
57 <br>
58
59 Operation | Operation encoding
60 ----------|-------------------
61 + | add
62 - | sub
63 \* | mul
64 / | div
65 & | andb
66 << | shl
67 \>\> | shr
68 \| | orb
69 ^ | xor
70 && | andl
71 \|\| | orl
72 maximum | max
73 minimum | min
74 .eqv. | eqv
75 .neqv. | neqv
76
77 <br>
78 For non-commutative operations, `_rev` can also be added for the reversed
79 operation. For the functions that capture the result, the suffix `_cpt` is
80 added.
81
82 Update Functions
83 ================
84 The general form of an atomic function that just performs an update (without a
85 `capture`)
86 @code
87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
88 lhs, TYPE rhs );
89 @endcode
90 @param ident_t a pointer to source location
91 @param gtid the global thread id
92 @param lhs a pointer to the left operand
93 @param rhs the right operand
94
95 `capture` functions
96 ===================
97 The capture functions perform an atomic update and return a result, which is
98 either the value before the capture, or that after. They take an additional
99 argument to determine which result is returned.
100 Their general form is therefore
101 @code
102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103 lhs, TYPE rhs, int flag );
104 @endcode
105 @param ident_t a pointer to source location
106 @param gtid the global thread id
107 @param lhs a pointer to the left operand
108 @param rhs the right operand
109 @param flag one if the result is to be captured *after* the operation, zero if
110 captured *before*.
111
112 The one set of exceptions to this is the `complex<float>` type where the value
113 is not returned, rather an extra argument pointer is passed.
114
115 They look like
116 @code
117 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 *
118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
119 @endcode
120
121 Read and Write Operations
122 =========================
123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124 ensure that the value is read or written atomically, with no modification
125 performed. In many cases on IA-32 architecture these operations can be inlined
126 since the architecture guarantees that no tearing occurs on aligned objects
127 accessed with a single memory operation of up to 64 bits in size.
128
129 The general form of the read operations is
130 @code
131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
132 @endcode
133
134 For the write operations the form is
135 @code
136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
137 );
138 @endcode
139
140 Full list of functions
141 ======================
142 This leads to the generation of 376 atomic functions, as follows.
143
144 Functions for integers
145 ---------------------
146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147 unsigned (where that matters).
148 @code
149 __kmpc_atomic_fixed1_add
150 __kmpc_atomic_fixed1_add_cpt
151 __kmpc_atomic_fixed1_add_fp
152 __kmpc_atomic_fixed1_andb
153 __kmpc_atomic_fixed1_andb_cpt
154 __kmpc_atomic_fixed1_andl
155 __kmpc_atomic_fixed1_andl_cpt
156 __kmpc_atomic_fixed1_div
157 __kmpc_atomic_fixed1_div_cpt
158 __kmpc_atomic_fixed1_div_cpt_rev
159 __kmpc_atomic_fixed1_div_float8
160 __kmpc_atomic_fixed1_div_fp
161 __kmpc_atomic_fixed1_div_rev
162 __kmpc_atomic_fixed1_eqv
163 __kmpc_atomic_fixed1_eqv_cpt
164 __kmpc_atomic_fixed1_max
165 __kmpc_atomic_fixed1_max_cpt
166 __kmpc_atomic_fixed1_min
167 __kmpc_atomic_fixed1_min_cpt
168 __kmpc_atomic_fixed1_mul
169 __kmpc_atomic_fixed1_mul_cpt
170 __kmpc_atomic_fixed1_mul_float8
171 __kmpc_atomic_fixed1_mul_fp
172 __kmpc_atomic_fixed1_neqv
173 __kmpc_atomic_fixed1_neqv_cpt
174 __kmpc_atomic_fixed1_orb
175 __kmpc_atomic_fixed1_orb_cpt
176 __kmpc_atomic_fixed1_orl
177 __kmpc_atomic_fixed1_orl_cpt
178 __kmpc_atomic_fixed1_rd
179 __kmpc_atomic_fixed1_shl
180 __kmpc_atomic_fixed1_shl_cpt
181 __kmpc_atomic_fixed1_shl_cpt_rev
182 __kmpc_atomic_fixed1_shl_rev
183 __kmpc_atomic_fixed1_shr
184 __kmpc_atomic_fixed1_shr_cpt
185 __kmpc_atomic_fixed1_shr_cpt_rev
186 __kmpc_atomic_fixed1_shr_rev
187 __kmpc_atomic_fixed1_sub
188 __kmpc_atomic_fixed1_sub_cpt
189 __kmpc_atomic_fixed1_sub_cpt_rev
190 __kmpc_atomic_fixed1_sub_fp
191 __kmpc_atomic_fixed1_sub_rev
192 __kmpc_atomic_fixed1_swp
193 __kmpc_atomic_fixed1_wr
194 __kmpc_atomic_fixed1_xor
195 __kmpc_atomic_fixed1_xor_cpt
196 __kmpc_atomic_fixed1u_add_fp
197 __kmpc_atomic_fixed1u_sub_fp
198 __kmpc_atomic_fixed1u_mul_fp
199 __kmpc_atomic_fixed1u_div
200 __kmpc_atomic_fixed1u_div_cpt
201 __kmpc_atomic_fixed1u_div_cpt_rev
202 __kmpc_atomic_fixed1u_div_fp
203 __kmpc_atomic_fixed1u_div_rev
204 __kmpc_atomic_fixed1u_shr
205 __kmpc_atomic_fixed1u_shr_cpt
206 __kmpc_atomic_fixed1u_shr_cpt_rev
207 __kmpc_atomic_fixed1u_shr_rev
208 __kmpc_atomic_fixed2_add
209 __kmpc_atomic_fixed2_add_cpt
210 __kmpc_atomic_fixed2_add_fp
211 __kmpc_atomic_fixed2_andb
212 __kmpc_atomic_fixed2_andb_cpt
213 __kmpc_atomic_fixed2_andl
214 __kmpc_atomic_fixed2_andl_cpt
215 __kmpc_atomic_fixed2_div
216 __kmpc_atomic_fixed2_div_cpt
217 __kmpc_atomic_fixed2_div_cpt_rev
218 __kmpc_atomic_fixed2_div_float8
219 __kmpc_atomic_fixed2_div_fp
220 __kmpc_atomic_fixed2_div_rev
221 __kmpc_atomic_fixed2_eqv
222 __kmpc_atomic_fixed2_eqv_cpt
223 __kmpc_atomic_fixed2_max
224 __kmpc_atomic_fixed2_max_cpt
225 __kmpc_atomic_fixed2_min
226 __kmpc_atomic_fixed2_min_cpt
227 __kmpc_atomic_fixed2_mul
228 __kmpc_atomic_fixed2_mul_cpt
229 __kmpc_atomic_fixed2_mul_float8
230 __kmpc_atomic_fixed2_mul_fp
231 __kmpc_atomic_fixed2_neqv
232 __kmpc_atomic_fixed2_neqv_cpt
233 __kmpc_atomic_fixed2_orb
234 __kmpc_atomic_fixed2_orb_cpt
235 __kmpc_atomic_fixed2_orl
236 __kmpc_atomic_fixed2_orl_cpt
237 __kmpc_atomic_fixed2_rd
238 __kmpc_atomic_fixed2_shl
239 __kmpc_atomic_fixed2_shl_cpt
240 __kmpc_atomic_fixed2_shl_cpt_rev
241 __kmpc_atomic_fixed2_shl_rev
242 __kmpc_atomic_fixed2_shr
243 __kmpc_atomic_fixed2_shr_cpt
244 __kmpc_atomic_fixed2_shr_cpt_rev
245 __kmpc_atomic_fixed2_shr_rev
246 __kmpc_atomic_fixed2_sub
247 __kmpc_atomic_fixed2_sub_cpt
248 __kmpc_atomic_fixed2_sub_cpt_rev
249 __kmpc_atomic_fixed2_sub_fp
250 __kmpc_atomic_fixed2_sub_rev
251 __kmpc_atomic_fixed2_swp
252 __kmpc_atomic_fixed2_wr
253 __kmpc_atomic_fixed2_xor
254 __kmpc_atomic_fixed2_xor_cpt
255 __kmpc_atomic_fixed2u_add_fp
256 __kmpc_atomic_fixed2u_sub_fp
257 __kmpc_atomic_fixed2u_mul_fp
258 __kmpc_atomic_fixed2u_div
259 __kmpc_atomic_fixed2u_div_cpt
260 __kmpc_atomic_fixed2u_div_cpt_rev
261 __kmpc_atomic_fixed2u_div_fp
262 __kmpc_atomic_fixed2u_div_rev
263 __kmpc_atomic_fixed2u_shr
264 __kmpc_atomic_fixed2u_shr_cpt
265 __kmpc_atomic_fixed2u_shr_cpt_rev
266 __kmpc_atomic_fixed2u_shr_rev
267 __kmpc_atomic_fixed4_add
268 __kmpc_atomic_fixed4_add_cpt
269 __kmpc_atomic_fixed4_add_fp
270 __kmpc_atomic_fixed4_andb
271 __kmpc_atomic_fixed4_andb_cpt
272 __kmpc_atomic_fixed4_andl
273 __kmpc_atomic_fixed4_andl_cpt
274 __kmpc_atomic_fixed4_div
275 __kmpc_atomic_fixed4_div_cpt
276 __kmpc_atomic_fixed4_div_cpt_rev
277 __kmpc_atomic_fixed4_div_float8
278 __kmpc_atomic_fixed4_div_fp
279 __kmpc_atomic_fixed4_div_rev
280 __kmpc_atomic_fixed4_eqv
281 __kmpc_atomic_fixed4_eqv_cpt
282 __kmpc_atomic_fixed4_max
283 __kmpc_atomic_fixed4_max_cpt
284 __kmpc_atomic_fixed4_min
285 __kmpc_atomic_fixed4_min_cpt
286 __kmpc_atomic_fixed4_mul
287 __kmpc_atomic_fixed4_mul_cpt
288 __kmpc_atomic_fixed4_mul_float8
289 __kmpc_atomic_fixed4_mul_fp
290 __kmpc_atomic_fixed4_neqv
291 __kmpc_atomic_fixed4_neqv_cpt
292 __kmpc_atomic_fixed4_orb
293 __kmpc_atomic_fixed4_orb_cpt
294 __kmpc_atomic_fixed4_orl
295 __kmpc_atomic_fixed4_orl_cpt
296 __kmpc_atomic_fixed4_rd
297 __kmpc_atomic_fixed4_shl
298 __kmpc_atomic_fixed4_shl_cpt
299 __kmpc_atomic_fixed4_shl_cpt_rev
300 __kmpc_atomic_fixed4_shl_rev
301 __kmpc_atomic_fixed4_shr
302 __kmpc_atomic_fixed4_shr_cpt
303 __kmpc_atomic_fixed4_shr_cpt_rev
304 __kmpc_atomic_fixed4_shr_rev
305 __kmpc_atomic_fixed4_sub
306 __kmpc_atomic_fixed4_sub_cpt
307 __kmpc_atomic_fixed4_sub_cpt_rev
308 __kmpc_atomic_fixed4_sub_fp
309 __kmpc_atomic_fixed4_sub_rev
310 __kmpc_atomic_fixed4_swp
311 __kmpc_atomic_fixed4_wr
312 __kmpc_atomic_fixed4_xor
313 __kmpc_atomic_fixed4_xor_cpt
314 __kmpc_atomic_fixed4u_add_fp
315 __kmpc_atomic_fixed4u_sub_fp
316 __kmpc_atomic_fixed4u_mul_fp
317 __kmpc_atomic_fixed4u_div
318 __kmpc_atomic_fixed4u_div_cpt
319 __kmpc_atomic_fixed4u_div_cpt_rev
320 __kmpc_atomic_fixed4u_div_fp
321 __kmpc_atomic_fixed4u_div_rev
322 __kmpc_atomic_fixed4u_shr
323 __kmpc_atomic_fixed4u_shr_cpt
324 __kmpc_atomic_fixed4u_shr_cpt_rev
325 __kmpc_atomic_fixed4u_shr_rev
326 __kmpc_atomic_fixed8_add
327 __kmpc_atomic_fixed8_add_cpt
328 __kmpc_atomic_fixed8_add_fp
329 __kmpc_atomic_fixed8_andb
330 __kmpc_atomic_fixed8_andb_cpt
331 __kmpc_atomic_fixed8_andl
332 __kmpc_atomic_fixed8_andl_cpt
333 __kmpc_atomic_fixed8_div
334 __kmpc_atomic_fixed8_div_cpt
335 __kmpc_atomic_fixed8_div_cpt_rev
336 __kmpc_atomic_fixed8_div_float8
337 __kmpc_atomic_fixed8_div_fp
338 __kmpc_atomic_fixed8_div_rev
339 __kmpc_atomic_fixed8_eqv
340 __kmpc_atomic_fixed8_eqv_cpt
341 __kmpc_atomic_fixed8_max
342 __kmpc_atomic_fixed8_max_cpt
343 __kmpc_atomic_fixed8_min
344 __kmpc_atomic_fixed8_min_cpt
345 __kmpc_atomic_fixed8_mul
346 __kmpc_atomic_fixed8_mul_cpt
347 __kmpc_atomic_fixed8_mul_float8
348 __kmpc_atomic_fixed8_mul_fp
349 __kmpc_atomic_fixed8_neqv
350 __kmpc_atomic_fixed8_neqv_cpt
351 __kmpc_atomic_fixed8_orb
352 __kmpc_atomic_fixed8_orb_cpt
353 __kmpc_atomic_fixed8_orl
354 __kmpc_atomic_fixed8_orl_cpt
355 __kmpc_atomic_fixed8_rd
356 __kmpc_atomic_fixed8_shl
357 __kmpc_atomic_fixed8_shl_cpt
358 __kmpc_atomic_fixed8_shl_cpt_rev
359 __kmpc_atomic_fixed8_shl_rev
360 __kmpc_atomic_fixed8_shr
361 __kmpc_atomic_fixed8_shr_cpt
362 __kmpc_atomic_fixed8_shr_cpt_rev
363 __kmpc_atomic_fixed8_shr_rev
364 __kmpc_atomic_fixed8_sub
365 __kmpc_atomic_fixed8_sub_cpt
366 __kmpc_atomic_fixed8_sub_cpt_rev
367 __kmpc_atomic_fixed8_sub_fp
368 __kmpc_atomic_fixed8_sub_rev
369 __kmpc_atomic_fixed8_swp
370 __kmpc_atomic_fixed8_wr
371 __kmpc_atomic_fixed8_xor
372 __kmpc_atomic_fixed8_xor_cpt
373 __kmpc_atomic_fixed8u_add_fp
374 __kmpc_atomic_fixed8u_sub_fp
375 __kmpc_atomic_fixed8u_mul_fp
376 __kmpc_atomic_fixed8u_div
377 __kmpc_atomic_fixed8u_div_cpt
378 __kmpc_atomic_fixed8u_div_cpt_rev
379 __kmpc_atomic_fixed8u_div_fp
380 __kmpc_atomic_fixed8u_div_rev
381 __kmpc_atomic_fixed8u_shr
382 __kmpc_atomic_fixed8u_shr_cpt
383 __kmpc_atomic_fixed8u_shr_cpt_rev
384 __kmpc_atomic_fixed8u_shr_rev
385 @endcode
386
387 Functions for floating point
388 ----------------------------
389 There are versions here for floating point numbers of size 4, 8, 10 and 16
390 bytes. (Ten byte floats are used by X87, but are now rare).
391 @code
392 __kmpc_atomic_float4_add
393 __kmpc_atomic_float4_add_cpt
394 __kmpc_atomic_float4_add_float8
395 __kmpc_atomic_float4_add_fp
396 __kmpc_atomic_float4_div
397 __kmpc_atomic_float4_div_cpt
398 __kmpc_atomic_float4_div_cpt_rev
399 __kmpc_atomic_float4_div_float8
400 __kmpc_atomic_float4_div_fp
401 __kmpc_atomic_float4_div_rev
402 __kmpc_atomic_float4_max
403 __kmpc_atomic_float4_max_cpt
404 __kmpc_atomic_float4_min
405 __kmpc_atomic_float4_min_cpt
406 __kmpc_atomic_float4_mul
407 __kmpc_atomic_float4_mul_cpt
408 __kmpc_atomic_float4_mul_float8
409 __kmpc_atomic_float4_mul_fp
410 __kmpc_atomic_float4_rd
411 __kmpc_atomic_float4_sub
412 __kmpc_atomic_float4_sub_cpt
413 __kmpc_atomic_float4_sub_cpt_rev
414 __kmpc_atomic_float4_sub_float8
415 __kmpc_atomic_float4_sub_fp
416 __kmpc_atomic_float4_sub_rev
417 __kmpc_atomic_float4_swp
418 __kmpc_atomic_float4_wr
419 __kmpc_atomic_float8_add
420 __kmpc_atomic_float8_add_cpt
421 __kmpc_atomic_float8_add_fp
422 __kmpc_atomic_float8_div
423 __kmpc_atomic_float8_div_cpt
424 __kmpc_atomic_float8_div_cpt_rev
425 __kmpc_atomic_float8_div_fp
426 __kmpc_atomic_float8_div_rev
427 __kmpc_atomic_float8_max
428 __kmpc_atomic_float8_max_cpt
429 __kmpc_atomic_float8_min
430 __kmpc_atomic_float8_min_cpt
431 __kmpc_atomic_float8_mul
432 __kmpc_atomic_float8_mul_cpt
433 __kmpc_atomic_float8_mul_fp
434 __kmpc_atomic_float8_rd
435 __kmpc_atomic_float8_sub
436 __kmpc_atomic_float8_sub_cpt
437 __kmpc_atomic_float8_sub_cpt_rev
438 __kmpc_atomic_float8_sub_fp
439 __kmpc_atomic_float8_sub_rev
440 __kmpc_atomic_float8_swp
441 __kmpc_atomic_float8_wr
442 __kmpc_atomic_float10_add
443 __kmpc_atomic_float10_add_cpt
444 __kmpc_atomic_float10_add_fp
445 __kmpc_atomic_float10_div
446 __kmpc_atomic_float10_div_cpt
447 __kmpc_atomic_float10_div_cpt_rev
448 __kmpc_atomic_float10_div_fp
449 __kmpc_atomic_float10_div_rev
450 __kmpc_atomic_float10_mul
451 __kmpc_atomic_float10_mul_cpt
452 __kmpc_atomic_float10_mul_fp
453 __kmpc_atomic_float10_rd
454 __kmpc_atomic_float10_sub
455 __kmpc_atomic_float10_sub_cpt
456 __kmpc_atomic_float10_sub_cpt_rev
457 __kmpc_atomic_float10_sub_fp
458 __kmpc_atomic_float10_sub_rev
459 __kmpc_atomic_float10_swp
460 __kmpc_atomic_float10_wr
461 __kmpc_atomic_float16_add
462 __kmpc_atomic_float16_add_cpt
463 __kmpc_atomic_float16_div
464 __kmpc_atomic_float16_div_cpt
465 __kmpc_atomic_float16_div_cpt_rev
466 __kmpc_atomic_float16_div_rev
467 __kmpc_atomic_float16_max
468 __kmpc_atomic_float16_max_cpt
469 __kmpc_atomic_float16_min
470 __kmpc_atomic_float16_min_cpt
471 __kmpc_atomic_float16_mul
472 __kmpc_atomic_float16_mul_cpt
473 __kmpc_atomic_float16_rd
474 __kmpc_atomic_float16_sub
475 __kmpc_atomic_float16_sub_cpt
476 __kmpc_atomic_float16_sub_cpt_rev
477 __kmpc_atomic_float16_sub_rev
478 __kmpc_atomic_float16_swp
479 __kmpc_atomic_float16_wr
480 @endcode
481
482 Functions for Complex types
483 ---------------------------
484 Functions for complex types whose component floating point variables are of size
485 4,8,10 or 16 bytes. The names here are based on the size of the component float,
486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
488
489 @code
490 __kmpc_atomic_cmplx4_add
491 __kmpc_atomic_cmplx4_add_cmplx8
492 __kmpc_atomic_cmplx4_add_cpt
493 __kmpc_atomic_cmplx4_div
494 __kmpc_atomic_cmplx4_div_cmplx8
495 __kmpc_atomic_cmplx4_div_cpt
496 __kmpc_atomic_cmplx4_div_cpt_rev
497 __kmpc_atomic_cmplx4_div_rev
498 __kmpc_atomic_cmplx4_mul
499 __kmpc_atomic_cmplx4_mul_cmplx8
500 __kmpc_atomic_cmplx4_mul_cpt
501 __kmpc_atomic_cmplx4_rd
502 __kmpc_atomic_cmplx4_sub
503 __kmpc_atomic_cmplx4_sub_cmplx8
504 __kmpc_atomic_cmplx4_sub_cpt
505 __kmpc_atomic_cmplx4_sub_cpt_rev
506 __kmpc_atomic_cmplx4_sub_rev
507 __kmpc_atomic_cmplx4_swp
508 __kmpc_atomic_cmplx4_wr
509 __kmpc_atomic_cmplx8_add
510 __kmpc_atomic_cmplx8_add_cpt
511 __kmpc_atomic_cmplx8_div
512 __kmpc_atomic_cmplx8_div_cpt
513 __kmpc_atomic_cmplx8_div_cpt_rev
514 __kmpc_atomic_cmplx8_div_rev
515 __kmpc_atomic_cmplx8_mul
516 __kmpc_atomic_cmplx8_mul_cpt
517 __kmpc_atomic_cmplx8_rd
518 __kmpc_atomic_cmplx8_sub
519 __kmpc_atomic_cmplx8_sub_cpt
520 __kmpc_atomic_cmplx8_sub_cpt_rev
521 __kmpc_atomic_cmplx8_sub_rev
522 __kmpc_atomic_cmplx8_swp
523 __kmpc_atomic_cmplx8_wr
524 __kmpc_atomic_cmplx10_add
525 __kmpc_atomic_cmplx10_add_cpt
526 __kmpc_atomic_cmplx10_div
527 __kmpc_atomic_cmplx10_div_cpt
528 __kmpc_atomic_cmplx10_div_cpt_rev
529 __kmpc_atomic_cmplx10_div_rev
530 __kmpc_atomic_cmplx10_mul
531 __kmpc_atomic_cmplx10_mul_cpt
532 __kmpc_atomic_cmplx10_rd
533 __kmpc_atomic_cmplx10_sub
534 __kmpc_atomic_cmplx10_sub_cpt
535 __kmpc_atomic_cmplx10_sub_cpt_rev
536 __kmpc_atomic_cmplx10_sub_rev
537 __kmpc_atomic_cmplx10_swp
538 __kmpc_atomic_cmplx10_wr
539 __kmpc_atomic_cmplx16_add
540 __kmpc_atomic_cmplx16_add_cpt
541 __kmpc_atomic_cmplx16_div
542 __kmpc_atomic_cmplx16_div_cpt
543 __kmpc_atomic_cmplx16_div_cpt_rev
544 __kmpc_atomic_cmplx16_div_rev
545 __kmpc_atomic_cmplx16_mul
546 __kmpc_atomic_cmplx16_mul_cpt
547 __kmpc_atomic_cmplx16_rd
548 __kmpc_atomic_cmplx16_sub
549 __kmpc_atomic_cmplx16_sub_cpt
550 __kmpc_atomic_cmplx16_sub_cpt_rev
551 __kmpc_atomic_cmplx16_swp
552 __kmpc_atomic_cmplx16_wr
553 @endcode
554 */
555
556 /*!
557 @ingroup ATOMIC_OPS
558 @{
559 */
560
561 /*
562 * Global vars
563 */
564
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570
571 KMP_ALIGN(128)
572
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599
600 /* 2007-03-02:
601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602 on *_32 and *_32e. This is just a temporary workaround for the problem. It
603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604 in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608
operator +(Quad_a4_t & lhs,Quad_a4_t & rhs)609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610 return lhs.q + rhs.q;
611 }
operator -(Quad_a4_t & lhs,Quad_a4_t & rhs)612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613 return lhs.q - rhs.q;
614 }
operator *(Quad_a4_t & lhs,Quad_a4_t & rhs)615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616 return lhs.q * rhs.q;
617 }
operator /(Quad_a4_t & lhs,Quad_a4_t & rhs)618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619 return lhs.q / rhs.q;
620 }
operator <(Quad_a4_t & lhs,Quad_a4_t & rhs)621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622 return lhs.q < rhs.q;
623 }
operator >(Quad_a4_t & lhs,Quad_a4_t & rhs)624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625 return lhs.q > rhs.q;
626 }
627
operator +(Quad_a16_t & lhs,Quad_a16_t & rhs)628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629 return lhs.q + rhs.q;
630 }
operator -(Quad_a16_t & lhs,Quad_a16_t & rhs)631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632 return lhs.q - rhs.q;
633 }
operator *(Quad_a16_t & lhs,Quad_a16_t & rhs)634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635 return lhs.q * rhs.q;
636 }
operator /(Quad_a16_t & lhs,Quad_a16_t & rhs)637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638 return lhs.q / rhs.q;
639 }
operator <(Quad_a16_t & lhs,Quad_a16_t & rhs)640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641 return lhs.q < rhs.q;
642 }
operator >(Quad_a16_t & lhs,Quad_a16_t & rhs)643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644 return lhs.q > rhs.q;
645 }
646
operator +(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648 kmp_cmplx128_a4_t &rhs) {
649 return lhs.q + rhs.q;
650 }
operator -(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652 kmp_cmplx128_a4_t &rhs) {
653 return lhs.q - rhs.q;
654 }
operator *(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656 kmp_cmplx128_a4_t &rhs) {
657 return lhs.q * rhs.q;
658 }
operator /(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660 kmp_cmplx128_a4_t &rhs) {
661 return lhs.q / rhs.q;
662 }
663
operator +(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665 kmp_cmplx128_a16_t &rhs) {
666 return lhs.q + rhs.q;
667 }
operator -(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669 kmp_cmplx128_a16_t &rhs) {
670 return lhs.q - rhs.q;
671 }
operator *(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673 kmp_cmplx128_a16_t &rhs) {
674 return lhs.q * rhs.q;
675 }
operator /(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677 kmp_cmplx128_a16_t &rhs) {
678 return lhs.q / rhs.q;
679 }
680
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687
688 #define KMP_CHECK_GTID \
689 if (gtid == KMP_GTID_UNKNOWN) { \
690 gtid = __kmp_entry_gtid(); \
691 } // check and get gtid when needed
692
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 // fixed)
696 // OP_ID - operation identifier (add, sub, mul, ...)
697 // TYPE - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
699 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
700 TYPE *lhs, TYPE rhs) { \
701 KMP_DEBUG_ASSERT(__kmp_init_serial); \
702 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 // OP - operator (it's supposed to contain an assignment)
723 // LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID) \
727 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
728 \
729 (*lhs) OP(rhs); \
730 \
731 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \
734 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
735 (*lhs) = (TYPE)((*lhs)OP(rhs)); \
736 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section. On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange. Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1. If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG) \
762 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
763 KMP_CHECK_GTID; \
764 OP_CRITICAL(OP, 0); \
765 return; \
766 }
767
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \
769 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
770 KMP_CHECK_GTID; \
771 OP_UPDATE_CRITICAL(TYPE, OP, 0); \
772 return; \
773 }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE KMP_CPU_PAUSE()
783 #endif /* KMP_MIC */
784
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 // TYPE - operands' type
788 // BITS - size in bits, used to distinguish low level calls
789 // OP - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP) \
791 { \
792 TYPE old_value, new_value; \
793 old_value = *(TYPE volatile *)lhs; \
794 new_value = (TYPE)(old_value OP rhs); \
795 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
796 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
797 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
798 KMP_DO_PAUSE; \
799 \
800 old_value = *(TYPE volatile *)lhs; \
801 new_value = (TYPE)(old_value OP rhs); \
802 } \
803 }
804
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
813 { \
814 struct _sss { \
815 TYPE cmp; \
816 kmp_int##BITS *vvv; \
817 }; \
818 struct _sss old_value, new_value; \
819 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
820 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
821 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
822 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
823 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
824 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
825 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
826 KMP_DO_PAUSE; \
827 \
828 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
829 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \
830 } \
831 }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834
835 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
836
837 // ------------------------------------------------------------------------
838 // X86 or X86_64: no alignment problems ====================================
839 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
840 GOMP_FLAG) \
841 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
842 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
843 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
844 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
845 }
846 // -------------------------------------------------------------------------
847 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
848 GOMP_FLAG) \
849 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
850 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
851 OP_CMPXCHG(TYPE, BITS, OP) \
852 }
853 #if USE_CMPXCHG_FIX
854 // -------------------------------------------------------------------------
855 // workaround for C78287 (complex(kind=4) data type)
856 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
857 MASK, GOMP_FLAG) \
858 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
859 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
860 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
861 }
862 // end of the second part of the workaround for C78287
863 #endif // USE_CMPXCHG_FIX
864
865 #else
866 // -------------------------------------------------------------------------
867 // Code for other architectures that don't handle unaligned accesses.
868 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
869 GOMP_FLAG) \
870 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
871 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
872 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
873 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
874 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
875 } else { \
876 KMP_CHECK_GTID; \
877 OP_UPDATE_CRITICAL(TYPE, OP, \
878 LCK_ID) /* unaligned address - use critical */ \
879 } \
880 }
881 // -------------------------------------------------------------------------
882 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
883 GOMP_FLAG) \
884 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
885 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
886 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
887 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
888 } else { \
889 KMP_CHECK_GTID; \
890 OP_UPDATE_CRITICAL(TYPE, OP, \
891 LCK_ID) /* unaligned address - use critical */ \
892 } \
893 }
894 #if USE_CMPXCHG_FIX
895 // -------------------------------------------------------------------------
896 // workaround for C78287 (complex(kind=4) data type)
897 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
898 MASK, GOMP_FLAG) \
899 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
900 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
901 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
902 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
903 } else { \
904 KMP_CHECK_GTID; \
905 OP_UPDATE_CRITICAL(TYPE, OP, \
906 LCK_ID) /* unaligned address - use critical */ \
907 } \
908 }
909 // end of the second part of the workaround for C78287
910 #endif // USE_CMPXCHG_FIX
911 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
912
913 // Routines for ATOMIC 4-byte operands addition and subtraction
914 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
915 0) // __kmpc_atomic_fixed4_add
916 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
917 0) // __kmpc_atomic_fixed4_sub
918
919 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
920 KMP_ARCH_X86) // __kmpc_atomic_float4_add
921 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
922 KMP_ARCH_X86) // __kmpc_atomic_float4_sub
923
924 // Routines for ATOMIC 8-byte operands addition and subtraction
925 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
926 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
927 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
928 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
929
930 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
931 KMP_ARCH_X86) // __kmpc_atomic_float8_add
932 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
933 KMP_ARCH_X86) // __kmpc_atomic_float8_sub
934
935 // ------------------------------------------------------------------------
936 // Entries definition for integer operands
937 // TYPE_ID - operands type and size (fixed4, float4)
938 // OP_ID - operation identifier (add, sub, mul, ...)
939 // TYPE - operand type
940 // BITS - size in bits, used to distinguish low level calls
941 // OP - operator (used in critical section)
942 // LCK_ID - lock identifier, used to possibly distinguish lock variable
943 // MASK - used for alignment check
944
945 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
946 // ------------------------------------------------------------------------
947 // Routines for ATOMIC integer operands, other operators
948 // ------------------------------------------------------------------------
949 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
950 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
951 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
952 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
953 0) // __kmpc_atomic_fixed1_andb
954 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
955 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
956 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
957 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
958 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
959 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
960 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
961 0) // __kmpc_atomic_fixed1_orb
962 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
963 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
964 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
965 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
966 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
967 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
968 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
969 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
970 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
971 0) // __kmpc_atomic_fixed1_xor
972 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
973 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
974 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
975 0) // __kmpc_atomic_fixed2_andb
976 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
977 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
978 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
979 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
980 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
981 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
982 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
983 0) // __kmpc_atomic_fixed2_orb
984 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
985 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
986 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
987 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
988 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
989 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
990 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
991 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
992 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
993 0) // __kmpc_atomic_fixed2_xor
994 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
995 0) // __kmpc_atomic_fixed4_andb
996 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
997 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
998 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
999 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1000 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1001 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1002 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1003 0) // __kmpc_atomic_fixed4_orb
1004 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1005 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1006 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1007 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1008 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1009 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1010 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1011 0) // __kmpc_atomic_fixed4_xor
1012 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1013 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1014 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1015 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1016 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1017 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1018 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1019 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1020 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1021 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1022 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1023 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1024 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1025 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1026 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1027 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1028 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1029 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1030 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1031 KMP_ARCH_X86) // __kmpc_atomic_float4_div
1032 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1033 KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1034 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1035 KMP_ARCH_X86) // __kmpc_atomic_float8_div
1036 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1037 KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1038 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1039
1040 /* ------------------------------------------------------------------------ */
1041 /* Routines for C/C++ Reduction operators && and || */
1042
1043 // ------------------------------------------------------------------------
1044 // Need separate macros for &&, || because there is no combined assignment
1045 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1046 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1047 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1048 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1049 OP_CRITICAL(= *lhs OP, LCK_ID) \
1050 }
1051
1052 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1053
1054 // ------------------------------------------------------------------------
1055 // X86 or X86_64: no alignment problems ===================================
1056 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1057 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1058 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1059 OP_CMPXCHG(TYPE, BITS, OP) \
1060 }
1061
1062 #else
1063 // ------------------------------------------------------------------------
1064 // Code for other architectures that don't handle unaligned accesses.
1065 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1066 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1067 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1068 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1069 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1070 } else { \
1071 KMP_CHECK_GTID; \
1072 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1073 } \
1074 }
1075 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1076
1077 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1078 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1079 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1080 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1081 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1082 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1083 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1084 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1085 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1086 0) // __kmpc_atomic_fixed4_andl
1087 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1088 0) // __kmpc_atomic_fixed4_orl
1089 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1090 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1091 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1092 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1093
1094 /* ------------------------------------------------------------------------- */
1095 /* Routines for Fortran operators that matched no one in C: */
1096 /* MAX, MIN, .EQV., .NEQV. */
1097 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1098 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1099
1100 // -------------------------------------------------------------------------
1101 // MIN and MAX need separate macros
1102 // OP - operator to check if we need any actions?
1103 #define MIN_MAX_CRITSECT(OP, LCK_ID) \
1104 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1105 \
1106 if (*lhs OP rhs) { /* still need actions? */ \
1107 *lhs = rhs; \
1108 } \
1109 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1110
1111 // -------------------------------------------------------------------------
1112 #ifdef KMP_GOMP_COMPAT
1113 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1114 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1115 KMP_CHECK_GTID; \
1116 MIN_MAX_CRITSECT(OP, 0); \
1117 return; \
1118 }
1119 #else
1120 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1121 #endif /* KMP_GOMP_COMPAT */
1122
1123 // -------------------------------------------------------------------------
1124 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1125 { \
1126 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1127 TYPE old_value; \
1128 temp_val = *lhs; \
1129 old_value = temp_val; \
1130 while (old_value OP rhs && /* still need actions? */ \
1131 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1132 (kmp_int##BITS *)lhs, \
1133 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1134 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1135 KMP_CPU_PAUSE(); \
1136 temp_val = *lhs; \
1137 old_value = temp_val; \
1138 } \
1139 }
1140
1141 // -------------------------------------------------------------------------
1142 // 1-byte, 2-byte operands - use critical section
1143 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1144 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1145 if (*lhs OP rhs) { /* need actions? */ \
1146 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1147 MIN_MAX_CRITSECT(OP, LCK_ID) \
1148 } \
1149 }
1150
1151 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1152
1153 // -------------------------------------------------------------------------
1154 // X86 or X86_64: no alignment problems ====================================
1155 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1156 GOMP_FLAG) \
1157 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1158 if (*lhs OP rhs) { \
1159 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1160 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1161 } \
1162 }
1163
1164 #else
1165 // -------------------------------------------------------------------------
1166 // Code for other architectures that don't handle unaligned accesses.
1167 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1168 GOMP_FLAG) \
1169 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1170 if (*lhs OP rhs) { \
1171 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1172 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1173 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1174 } else { \
1175 KMP_CHECK_GTID; \
1176 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1177 } \
1178 } \
1179 }
1180 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1181
1182 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1183 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1184 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1185 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1186 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1187 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1188 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1189 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1190 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1191 0) // __kmpc_atomic_fixed4_max
1192 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1193 0) // __kmpc_atomic_fixed4_min
1194 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1195 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1196 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1197 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1198 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1199 KMP_ARCH_X86) // __kmpc_atomic_float4_max
1200 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1201 KMP_ARCH_X86) // __kmpc_atomic_float4_min
1202 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1203 KMP_ARCH_X86) // __kmpc_atomic_float8_max
1204 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1205 KMP_ARCH_X86) // __kmpc_atomic_float8_min
1206 #if KMP_HAVE_QUAD
1207 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1208 1) // __kmpc_atomic_float16_max
1209 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1210 1) // __kmpc_atomic_float16_min
1211 #if (KMP_ARCH_X86)
1212 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1213 1) // __kmpc_atomic_float16_max_a16
1214 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1215 1) // __kmpc_atomic_float16_min_a16
1216 #endif // (KMP_ARCH_X86)
1217 #endif // KMP_HAVE_QUAD
1218 // ------------------------------------------------------------------------
1219 // Need separate macros for .EQV. because of the need of complement (~)
1220 // OP ignored for critical sections, ^=~ used instead
1221 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1222 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1223 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1224 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \
1225 }
1226
1227 // ------------------------------------------------------------------------
1228 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1229 // ------------------------------------------------------------------------
1230 // X86 or X86_64: no alignment problems ===================================
1231 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1232 GOMP_FLAG) \
1233 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1234 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
1235 OP_CMPXCHG(TYPE, BITS, OP) \
1236 }
1237 // ------------------------------------------------------------------------
1238 #else
1239 // ------------------------------------------------------------------------
1240 // Code for other architectures that don't handle unaligned accesses.
1241 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1242 GOMP_FLAG) \
1243 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1244 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \
1245 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1246 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1247 } else { \
1248 KMP_CHECK_GTID; \
1249 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \
1250 } \
1251 }
1252 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1253
1254 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1255 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1256 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1257 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1258 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1259 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1260 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1261 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1262 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1263 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1264 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1265 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1266 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1267 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1268 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1269 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1270
1271 // ------------------------------------------------------------------------
1272 // Routines for Extended types: long double, _Quad, complex flavours (use
1273 // critical section)
1274 // TYPE_ID, OP_ID, TYPE - detailed above
1275 // OP - operator
1276 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1277 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1278 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1279 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1280 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1281 }
1282
1283 /* ------------------------------------------------------------------------- */
1284 // routines for long double type
1285 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1286 1) // __kmpc_atomic_float10_add
1287 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1288 1) // __kmpc_atomic_float10_sub
1289 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1290 1) // __kmpc_atomic_float10_mul
1291 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1292 1) // __kmpc_atomic_float10_div
1293 #if KMP_HAVE_QUAD
1294 // routines for _Quad type
1295 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1296 1) // __kmpc_atomic_float16_add
1297 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1298 1) // __kmpc_atomic_float16_sub
1299 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1300 1) // __kmpc_atomic_float16_mul
1301 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1302 1) // __kmpc_atomic_float16_div
1303 #if (KMP_ARCH_X86)
1304 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1305 1) // __kmpc_atomic_float16_add_a16
1306 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1307 1) // __kmpc_atomic_float16_sub_a16
1308 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1309 1) // __kmpc_atomic_float16_mul_a16
1310 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1311 1) // __kmpc_atomic_float16_div_a16
1312 #endif // (KMP_ARCH_X86)
1313 #endif // KMP_HAVE_QUAD
1314 // routines for complex types
1315
1316 #if USE_CMPXCHG_FIX
1317 // workaround for C78287 (complex(kind=4) data type)
1318 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1319 1) // __kmpc_atomic_cmplx4_add
1320 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1321 1) // __kmpc_atomic_cmplx4_sub
1322 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1323 1) // __kmpc_atomic_cmplx4_mul
1324 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1325 1) // __kmpc_atomic_cmplx4_div
1326 // end of the workaround for C78287
1327 #else
1328 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1329 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1330 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1331 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1332 #endif // USE_CMPXCHG_FIX
1333
1334 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1335 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1336 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1337 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1338 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1339 1) // __kmpc_atomic_cmplx10_add
1340 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1341 1) // __kmpc_atomic_cmplx10_sub
1342 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1343 1) // __kmpc_atomic_cmplx10_mul
1344 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1345 1) // __kmpc_atomic_cmplx10_div
1346 #if KMP_HAVE_QUAD
1347 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1348 1) // __kmpc_atomic_cmplx16_add
1349 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1350 1) // __kmpc_atomic_cmplx16_sub
1351 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1352 1) // __kmpc_atomic_cmplx16_mul
1353 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1354 1) // __kmpc_atomic_cmplx16_div
1355 #if (KMP_ARCH_X86)
1356 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1357 1) // __kmpc_atomic_cmplx16_add_a16
1358 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1359 1) // __kmpc_atomic_cmplx16_sub_a16
1360 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1361 1) // __kmpc_atomic_cmplx16_mul_a16
1362 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1363 1) // __kmpc_atomic_cmplx16_div_a16
1364 #endif // (KMP_ARCH_X86)
1365 #endif // KMP_HAVE_QUAD
1366
1367 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1368 // Supported only on IA-32 architecture and Intel(R) 64
1369 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1370
1371 // ------------------------------------------------------------------------
1372 // Operation on *lhs, rhs bound by critical section
1373 // OP - operator (it's supposed to contain an assignment)
1374 // LCK_ID - lock identifier
1375 // Note: don't check gtid as it should always be valid
1376 // 1, 2-byte - expect valid parameter, other - check before this macro
1377 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1378 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1379 \
1380 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
1381 \
1382 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1383
1384 #ifdef KMP_GOMP_COMPAT
1385 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \
1386 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1387 KMP_CHECK_GTID; \
1388 OP_CRITICAL_REV(TYPE, OP, 0); \
1389 return; \
1390 }
1391
1392 #else
1393 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1394 #endif /* KMP_GOMP_COMPAT */
1395
1396 // Beginning of a definition (provides name, parameters, gebug trace)
1397 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1398 // fixed)
1399 // OP_ID - operation identifier (add, sub, mul, ...)
1400 // TYPE - operands' type
1401 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1402 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1403 TYPE *lhs, TYPE rhs) { \
1404 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1405 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1406
1407 // ------------------------------------------------------------------------
1408 // Operation on *lhs, rhs using "compare_and_store" routine
1409 // TYPE - operands' type
1410 // BITS - size in bits, used to distinguish low level calls
1411 // OP - operator
1412 // Note: temp_val introduced in order to force the compiler to read
1413 // *lhs only once (w/o it the compiler reads *lhs twice)
1414 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1415 { \
1416 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1417 TYPE old_value, new_value; \
1418 temp_val = *lhs; \
1419 old_value = temp_val; \
1420 new_value = (TYPE)(rhs OP old_value); \
1421 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1422 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1423 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1424 KMP_DO_PAUSE; \
1425 \
1426 temp_val = *lhs; \
1427 old_value = temp_val; \
1428 new_value = (TYPE)(rhs OP old_value); \
1429 } \
1430 }
1431
1432 // -------------------------------------------------------------------------
1433 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1434 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1435 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1436 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1437 }
1438
1439 // ------------------------------------------------------------------------
1440 // Entries definition for integer operands
1441 // TYPE_ID - operands type and size (fixed4, float4)
1442 // OP_ID - operation identifier (add, sub, mul, ...)
1443 // TYPE - operand type
1444 // BITS - size in bits, used to distinguish low level calls
1445 // OP - operator (used in critical section)
1446 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1447
1448 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1449 // ------------------------------------------------------------------------
1450 // Routines for ATOMIC integer operands, other operators
1451 // ------------------------------------------------------------------------
1452 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1453 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1454 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1455 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1456 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1457 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1458 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1459 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1460 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1461 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1462 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1463 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1464 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1465
1466 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1467 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1468 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1469 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1470 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1471 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1472 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1473 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1474 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1475 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1476 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1477 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1478
1479 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1480 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1481 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1482 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1483 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1484 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1485 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1486 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1487 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1488 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1489 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1490 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1491
1492 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1493 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1494 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1495 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1496 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1497 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1498 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1499 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1500 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1501 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1502 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1503 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1504
1505 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1506 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1507 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1508 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1509
1510 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1511 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1512 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1513 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1514 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1515
1516 // ------------------------------------------------------------------------
1517 // Routines for Extended types: long double, _Quad, complex flavours (use
1518 // critical section)
1519 // TYPE_ID, OP_ID, TYPE - detailed above
1520 // OP - operator
1521 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1522 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1523 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1524 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1525 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1526 }
1527
1528 /* ------------------------------------------------------------------------- */
1529 // routines for long double type
1530 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1531 1) // __kmpc_atomic_float10_sub_rev
1532 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1533 1) // __kmpc_atomic_float10_div_rev
1534 #if KMP_HAVE_QUAD
1535 // routines for _Quad type
1536 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1537 1) // __kmpc_atomic_float16_sub_rev
1538 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1539 1) // __kmpc_atomic_float16_div_rev
1540 #if (KMP_ARCH_X86)
1541 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1542 1) // __kmpc_atomic_float16_sub_a16_rev
1543 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1544 1) // __kmpc_atomic_float16_div_a16_rev
1545 #endif // KMP_ARCH_X86
1546 #endif // KMP_HAVE_QUAD
1547
1548 // routines for complex types
1549 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1550 1) // __kmpc_atomic_cmplx4_sub_rev
1551 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1552 1) // __kmpc_atomic_cmplx4_div_rev
1553 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1554 1) // __kmpc_atomic_cmplx8_sub_rev
1555 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1556 1) // __kmpc_atomic_cmplx8_div_rev
1557 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1558 1) // __kmpc_atomic_cmplx10_sub_rev
1559 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1560 1) // __kmpc_atomic_cmplx10_div_rev
1561 #if KMP_HAVE_QUAD
1562 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1563 1) // __kmpc_atomic_cmplx16_sub_rev
1564 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1565 1) // __kmpc_atomic_cmplx16_div_rev
1566 #if (KMP_ARCH_X86)
1567 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1568 1) // __kmpc_atomic_cmplx16_sub_a16_rev
1569 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1570 1) // __kmpc_atomic_cmplx16_div_a16_rev
1571 #endif // KMP_ARCH_X86
1572 #endif // KMP_HAVE_QUAD
1573
1574 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1575 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1576
1577 /* ------------------------------------------------------------------------ */
1578 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1579 /* Note: in order to reduce the total number of types combinations */
1580 /* it is supposed that compiler converts RHS to longest floating type,*/
1581 /* that is _Quad, before call to any of these routines */
1582 /* Conversion to _Quad will be done by the compiler during calculation, */
1583 /* conversion back to TYPE - before the assignment, like: */
1584 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1585 /* Performance penalty expected because of SW emulation use */
1586 /* ------------------------------------------------------------------------ */
1587
1588 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1589 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1590 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1591 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1592 KA_TRACE(100, \
1593 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1594 gtid));
1595
1596 // -------------------------------------------------------------------------
1597 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1598 GOMP_FLAG) \
1599 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1600 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \
1601 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \
1602 }
1603
1604 // -------------------------------------------------------------------------
1605 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1606 // -------------------------------------------------------------------------
1607 // X86 or X86_64: no alignment problems ====================================
1608 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1609 LCK_ID, MASK, GOMP_FLAG) \
1610 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1611 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1612 OP_CMPXCHG(TYPE, BITS, OP) \
1613 }
1614 // -------------------------------------------------------------------------
1615 #else
1616 // ------------------------------------------------------------------------
1617 // Code for other architectures that don't handle unaligned accesses.
1618 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1619 LCK_ID, MASK, GOMP_FLAG) \
1620 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1621 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1622 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1623 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1624 } else { \
1625 KMP_CHECK_GTID; \
1626 OP_UPDATE_CRITICAL(TYPE, OP, \
1627 LCK_ID) /* unaligned address - use critical */ \
1628 } \
1629 }
1630 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1631
1632 // -------------------------------------------------------------------------
1633 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1634 // -------------------------------------------------------------------------
1635 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1636 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1637 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1638 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1639 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1640 }
1641 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1642 LCK_ID, GOMP_FLAG) \
1643 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1644 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \
1645 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \
1646 }
1647 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1648
1649 // RHS=float8
1650 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1651 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1652 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1653 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1654 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1655 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1656 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1657 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1658 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1659 0) // __kmpc_atomic_fixed4_mul_float8
1660 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1661 0) // __kmpc_atomic_fixed4_div_float8
1662 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1663 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1664 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1665 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1666 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1667 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1668 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1669 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1670 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1671 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1672 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1673 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1674
1675 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1676 // use them)
1677 #if KMP_HAVE_QUAD
1678 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1679 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1680 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1681 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1682 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1683 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1684 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1685 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1686 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1687 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1688 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1689 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1690 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1691 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1692 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1693 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1694
1695 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1696 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1697 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1698 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1699 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1700 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1701 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1702 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1703 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1704 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1705 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1706 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1707 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1708 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1709 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1710 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1711
1712 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1713 0) // __kmpc_atomic_fixed4_add_fp
1714 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1715 0) // __kmpc_atomic_fixed4u_add_fp
1716 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1717 0) // __kmpc_atomic_fixed4_sub_fp
1718 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1719 0) // __kmpc_atomic_fixed4u_sub_fp
1720 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1721 0) // __kmpc_atomic_fixed4_mul_fp
1722 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1723 0) // __kmpc_atomic_fixed4u_mul_fp
1724 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1725 0) // __kmpc_atomic_fixed4_div_fp
1726 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1727 0) // __kmpc_atomic_fixed4u_div_fp
1728
1729 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1730 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1731 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1732 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1733 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1734 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1735 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1736 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1737 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1738 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1739 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1740 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1741 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1742 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1743 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1744 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1745
1746 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1747 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1748 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1749 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1750 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1751 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1752 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1753 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1754
1755 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1756 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1757 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1758 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1759 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1760 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1761 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1762 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1763
1764 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1765 1) // __kmpc_atomic_float10_add_fp
1766 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1767 1) // __kmpc_atomic_float10_sub_fp
1768 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1769 1) // __kmpc_atomic_float10_mul_fp
1770 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1771 1) // __kmpc_atomic_float10_div_fp
1772
1773 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1774 // Reverse operations
1775 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1776 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1777 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1778 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1779 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1780 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1781 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1782 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1783
1784 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1785 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1786 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1787 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1788 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1789 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1790 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1791 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1792
1793 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1794 0) // __kmpc_atomic_fixed4_sub_rev_fp
1795 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1796 0) // __kmpc_atomic_fixed4u_sub_rev_fp
1797 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1798 0) // __kmpc_atomic_fixed4_div_rev_fp
1799 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1800 0) // __kmpc_atomic_fixed4u_div_rev_fp
1801
1802 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1803 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1804 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1805 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1806 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1807 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1808 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1809 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1810
1811 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1812 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1813 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1814 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1815
1816 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1817 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1818 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1819 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1820
1821 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1822 1) // __kmpc_atomic_float10_sub_rev_fp
1823 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1824 1) // __kmpc_atomic_float10_div_rev_fp
1825 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1826
1827 #endif // KMP_HAVE_QUAD
1828
1829 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1830 // ------------------------------------------------------------------------
1831 // X86 or X86_64: no alignment problems ====================================
1832 #if USE_CMPXCHG_FIX
1833 // workaround for C78287 (complex(kind=4) data type)
1834 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1835 LCK_ID, MASK, GOMP_FLAG) \
1836 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1837 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1838 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1839 }
1840 // end of the second part of the workaround for C78287
1841 #else
1842 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1843 LCK_ID, MASK, GOMP_FLAG) \
1844 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1845 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1846 OP_CMPXCHG(TYPE, BITS, OP) \
1847 }
1848 #endif // USE_CMPXCHG_FIX
1849 #else
1850 // ------------------------------------------------------------------------
1851 // Code for other architectures that don't handle unaligned accesses.
1852 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1853 LCK_ID, MASK, GOMP_FLAG) \
1854 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1855 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \
1856 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1857 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1858 } else { \
1859 KMP_CHECK_GTID; \
1860 OP_UPDATE_CRITICAL(TYPE, OP, \
1861 LCK_ID) /* unaligned address - use critical */ \
1862 } \
1863 }
1864 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1865
1866 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1867 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1868 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1869 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1870 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1871 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1872 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1873 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1874
1875 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1876 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1877
1878 // ------------------------------------------------------------------------
1879 // Atomic READ routines
1880
1881 // ------------------------------------------------------------------------
1882 // Beginning of a definition (provides name, parameters, gebug trace)
1883 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1884 // fixed)
1885 // OP_ID - operation identifier (add, sub, mul, ...)
1886 // TYPE - operands' type
1887 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1888 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1889 TYPE *loc) { \
1890 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1891 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1892
1893 // ------------------------------------------------------------------------
1894 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1895 // TYPE - operands' type
1896 // BITS - size in bits, used to distinguish low level calls
1897 // OP - operator
1898 // Note: temp_val introduced in order to force the compiler to read
1899 // *lhs only once (w/o it the compiler reads *lhs twice)
1900 // TODO: check if it is still necessary
1901 // Return old value regardless of the result of "compare & swap# operation
1902 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1903 { \
1904 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1905 union f_i_union { \
1906 TYPE f_val; \
1907 kmp_int##BITS i_val; \
1908 }; \
1909 union f_i_union old_value; \
1910 temp_val = *loc; \
1911 old_value.f_val = temp_val; \
1912 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1913 (kmp_int##BITS *)loc, \
1914 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1915 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1916 new_value = old_value.f_val; \
1917 return new_value; \
1918 }
1919
1920 // -------------------------------------------------------------------------
1921 // Operation on *lhs, rhs bound by critical section
1922 // OP - operator (it's supposed to contain an assignment)
1923 // LCK_ID - lock identifier
1924 // Note: don't check gtid as it should always be valid
1925 // 1, 2-byte - expect valid parameter, other - check before this macro
1926 #define OP_CRITICAL_READ(OP, LCK_ID) \
1927 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1928 \
1929 new_value = (*loc); \
1930 \
1931 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1932
1933 // -------------------------------------------------------------------------
1934 #ifdef KMP_GOMP_COMPAT
1935 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1936 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1937 KMP_CHECK_GTID; \
1938 OP_CRITICAL_READ(OP, 0); \
1939 return new_value; \
1940 }
1941 #else
1942 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1943 #endif /* KMP_GOMP_COMPAT */
1944
1945 // -------------------------------------------------------------------------
1946 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1947 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1948 TYPE new_value; \
1949 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1950 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1951 return new_value; \
1952 }
1953 // -------------------------------------------------------------------------
1954 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1955 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1956 TYPE new_value; \
1957 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1958 OP_CMPXCHG_READ(TYPE, BITS, OP) \
1959 }
1960 // ------------------------------------------------------------------------
1961 // Routines for Extended types: long double, _Quad, complex flavours (use
1962 // critical section)
1963 // TYPE_ID, OP_ID, TYPE - detailed above
1964 // OP - operator
1965 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1966 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1967 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1968 TYPE new_value; \
1969 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
1970 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
1971 return new_value; \
1972 }
1973
1974 // ------------------------------------------------------------------------
1975 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1976 // value doesn't work.
1977 // Let's return the read value through the additional parameter.
1978 #if (KMP_OS_WINDOWS)
1979
1980 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
1981 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1982 \
1983 (*out) = (*loc); \
1984 \
1985 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1986 // ------------------------------------------------------------------------
1987 #ifdef KMP_GOMP_COMPAT
1988 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
1989 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1990 KMP_CHECK_GTID; \
1991 OP_CRITICAL_READ_WRK(OP, 0); \
1992 }
1993 #else
1994 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1995 #endif /* KMP_GOMP_COMPAT */
1996 // ------------------------------------------------------------------------
1997 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1998 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1999 TYPE *loc) { \
2000 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2001 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2002
2003 // ------------------------------------------------------------------------
2004 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2005 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
2006 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
2007 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
2008 }
2009
2010 #endif // KMP_OS_WINDOWS
2011
2012 // ------------------------------------------------------------------------
2013 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2014 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2015 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2016 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2017 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2018 KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2019 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2020 KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2021
2022 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2023 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2024 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2025 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2026 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2027
2028 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2029 1) // __kmpc_atomic_float10_rd
2030 #if KMP_HAVE_QUAD
2031 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2032 1) // __kmpc_atomic_float16_rd
2033 #endif // KMP_HAVE_QUAD
2034
2035 // Fix for CQ220361 on Windows* OS
2036 #if (KMP_OS_WINDOWS)
2037 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2038 1) // __kmpc_atomic_cmplx4_rd
2039 #else
2040 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2041 1) // __kmpc_atomic_cmplx4_rd
2042 #endif // (KMP_OS_WINDOWS)
2043 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2044 1) // __kmpc_atomic_cmplx8_rd
2045 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2046 1) // __kmpc_atomic_cmplx10_rd
2047 #if KMP_HAVE_QUAD
2048 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2049 1) // __kmpc_atomic_cmplx16_rd
2050 #if (KMP_ARCH_X86)
2051 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2052 1) // __kmpc_atomic_float16_a16_rd
2053 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2054 1) // __kmpc_atomic_cmplx16_a16_rd
2055 #endif // (KMP_ARCH_X86)
2056 #endif // KMP_HAVE_QUAD
2057
2058 // ------------------------------------------------------------------------
2059 // Atomic WRITE routines
2060
2061 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2062 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2063 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2064 KMP_XCHG_FIXED##BITS(lhs, rhs); \
2065 }
2066 // ------------------------------------------------------------------------
2067 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2068 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2069 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2070 KMP_XCHG_REAL##BITS(lhs, rhs); \
2071 }
2072
2073 // ------------------------------------------------------------------------
2074 // Operation on *lhs, rhs using "compare_and_store" routine
2075 // TYPE - operands' type
2076 // BITS - size in bits, used to distinguish low level calls
2077 // OP - operator
2078 // Note: temp_val introduced in order to force the compiler to read
2079 // *lhs only once (w/o it the compiler reads *lhs twice)
2080 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2081 { \
2082 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2083 TYPE old_value, new_value; \
2084 temp_val = *lhs; \
2085 old_value = temp_val; \
2086 new_value = rhs; \
2087 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2088 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2089 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2090 KMP_CPU_PAUSE(); \
2091 \
2092 temp_val = *lhs; \
2093 old_value = temp_val; \
2094 new_value = rhs; \
2095 } \
2096 }
2097
2098 // -------------------------------------------------------------------------
2099 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2100 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2101 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2102 OP_CMPXCHG_WR(TYPE, BITS, OP) \
2103 }
2104
2105 // ------------------------------------------------------------------------
2106 // Routines for Extended types: long double, _Quad, complex flavours (use
2107 // critical section)
2108 // TYPE_ID, OP_ID, TYPE - detailed above
2109 // OP - operator
2110 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2111 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2112 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2113 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2114 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2115 }
2116 // -------------------------------------------------------------------------
2117
2118 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2119 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2120 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2121 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2122 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2123 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2124 #if (KMP_ARCH_X86)
2125 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2126 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2127 #else
2128 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2129 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2130 #endif // (KMP_ARCH_X86)
2131
2132 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2133 KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2134 #if (KMP_ARCH_X86)
2135 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2136 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2137 #else
2138 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2139 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2140 #endif // (KMP_ARCH_X86)
2141
2142 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2143 1) // __kmpc_atomic_float10_wr
2144 #if KMP_HAVE_QUAD
2145 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2146 1) // __kmpc_atomic_float16_wr
2147 #endif // KMP_HAVE_QUAD
2148 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2149 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2150 1) // __kmpc_atomic_cmplx8_wr
2151 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2152 1) // __kmpc_atomic_cmplx10_wr
2153 #if KMP_HAVE_QUAD
2154 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2155 1) // __kmpc_atomic_cmplx16_wr
2156 #if (KMP_ARCH_X86)
2157 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2158 1) // __kmpc_atomic_float16_a16_wr
2159 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2160 1) // __kmpc_atomic_cmplx16_a16_wr
2161 #endif // (KMP_ARCH_X86)
2162 #endif // KMP_HAVE_QUAD
2163
2164 // ------------------------------------------------------------------------
2165 // Atomic CAPTURE routines
2166
2167 // Beginning of a definition (provides name, parameters, gebug trace)
2168 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2169 // fixed)
2170 // OP_ID - operation identifier (add, sub, mul, ...)
2171 // TYPE - operands' type
2172 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2173 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2174 TYPE *lhs, TYPE rhs, int flag) { \
2175 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2176 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2177
2178 // -------------------------------------------------------------------------
2179 // Operation on *lhs, rhs bound by critical section
2180 // OP - operator (it's supposed to contain an assignment)
2181 // LCK_ID - lock identifier
2182 // Note: don't check gtid as it should always be valid
2183 // 1, 2-byte - expect valid parameter, other - check before this macro
2184 #define OP_CRITICAL_CPT(OP, LCK_ID) \
2185 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2186 \
2187 if (flag) { \
2188 (*lhs) OP rhs; \
2189 new_value = (*lhs); \
2190 } else { \
2191 new_value = (*lhs); \
2192 (*lhs) OP rhs; \
2193 } \
2194 \
2195 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2196 return new_value;
2197
2198 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \
2199 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2200 \
2201 if (flag) { \
2202 (*lhs) = (TYPE)((*lhs)OP rhs); \
2203 new_value = (*lhs); \
2204 } else { \
2205 new_value = (*lhs); \
2206 (*lhs) = (TYPE)((*lhs)OP rhs); \
2207 } \
2208 \
2209 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2210 return new_value;
2211
2212 // ------------------------------------------------------------------------
2213 #ifdef KMP_GOMP_COMPAT
2214 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \
2215 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2216 KMP_CHECK_GTID; \
2217 OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \
2218 }
2219 #else
2220 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2221 #endif /* KMP_GOMP_COMPAT */
2222
2223 // ------------------------------------------------------------------------
2224 // Operation on *lhs, rhs using "compare_and_store" routine
2225 // TYPE - operands' type
2226 // BITS - size in bits, used to distinguish low level calls
2227 // OP - operator
2228 // Note: temp_val introduced in order to force the compiler to read
2229 // *lhs only once (w/o it the compiler reads *lhs twice)
2230 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2231 { \
2232 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2233 TYPE old_value, new_value; \
2234 temp_val = *lhs; \
2235 old_value = temp_val; \
2236 new_value = (TYPE)(old_value OP rhs); \
2237 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2238 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2239 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2240 KMP_CPU_PAUSE(); \
2241 \
2242 temp_val = *lhs; \
2243 old_value = temp_val; \
2244 new_value = (TYPE)(old_value OP rhs); \
2245 } \
2246 if (flag) { \
2247 return new_value; \
2248 } else \
2249 return old_value; \
2250 }
2251
2252 // -------------------------------------------------------------------------
2253 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2254 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2255 TYPE new_value; \
2256 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2257 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2258 }
2259
2260 // -------------------------------------------------------------------------
2261 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2262 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2263 TYPE old_value, new_value; \
2264 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2265 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2266 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2267 if (flag) { \
2268 return old_value OP rhs; \
2269 } else \
2270 return old_value; \
2271 }
2272 // -------------------------------------------------------------------------
2273
2274 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2275 0) // __kmpc_atomic_fixed4_add_cpt
2276 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2277 0) // __kmpc_atomic_fixed4_sub_cpt
2278 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2279 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2280 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2281 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2282
2283 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2284 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2285 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2286 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2287 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2288 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2289 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2290 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2291
2292 // ------------------------------------------------------------------------
2293 // Entries definition for integer operands
2294 // TYPE_ID - operands type and size (fixed4, float4)
2295 // OP_ID - operation identifier (add, sub, mul, ...)
2296 // TYPE - operand type
2297 // BITS - size in bits, used to distinguish low level calls
2298 // OP - operator (used in critical section)
2299 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2300 // ------------------------------------------------------------------------
2301 // Routines for ATOMIC integer operands, other operators
2302 // ------------------------------------------------------------------------
2303 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2304 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2305 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2306 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2307 0) // __kmpc_atomic_fixed1_andb_cpt
2308 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2309 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2310 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2311 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2312 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2313 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2314 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2315 0) // __kmpc_atomic_fixed1_orb_cpt
2316 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2317 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2318 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2319 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2320 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2321 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2322 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2323 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2324 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2325 0) // __kmpc_atomic_fixed1_xor_cpt
2326 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2327 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2328 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2329 0) // __kmpc_atomic_fixed2_andb_cpt
2330 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2331 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2332 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2333 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2334 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2335 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2336 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2337 0) // __kmpc_atomic_fixed2_orb_cpt
2338 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2339 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2340 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2341 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2342 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2343 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2344 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2345 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2347 0) // __kmpc_atomic_fixed2_xor_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2349 0) // __kmpc_atomic_fixed4_andb_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2351 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2352 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2353 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2354 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2355 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2356 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2357 0) // __kmpc_atomic_fixed4_orb_cpt
2358 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2359 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2360 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2361 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2362 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2363 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2364 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2365 0) // __kmpc_atomic_fixed4_xor_cpt
2366 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2367 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2368 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2369 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2370 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2371 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2372 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2373 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2374 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2375 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2376 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2377 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2378 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2379 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2380 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2381 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2382 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2383 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2384 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2385 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2386 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2387 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2388 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2389 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2390 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2391 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2392 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2393
2394 // CAPTURE routines for mixed types RHS=float16
2395 #if KMP_HAVE_QUAD
2396
2397 // Beginning of a definition (provides name, parameters, gebug trace)
2398 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2399 // fixed)
2400 // OP_ID - operation identifier (add, sub, mul, ...)
2401 // TYPE - operands' type
2402 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2403 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2404 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2405 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2406 KA_TRACE(100, \
2407 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2408 gtid));
2409
2410 // -------------------------------------------------------------------------
2411 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2412 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2413 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2414 TYPE new_value; \
2415 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \
2416 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2417 }
2418
2419 // -------------------------------------------------------------------------
2420 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2421 LCK_ID, GOMP_FLAG) \
2422 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2423 TYPE new_value; \
2424 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2425 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2426 }
2427
2428 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2429 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2430 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2431 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2432 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2433 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2434 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2435 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2436 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2437 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2438 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2439 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2440 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2441 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2442 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2443 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2444
2445 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2446 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2447 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2448 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2449 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2450 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2451 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2452 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2453 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2454 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2455 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2456 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2457 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2458 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2459 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2460 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2461
2462 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2463 0) // __kmpc_atomic_fixed4_add_cpt_fp
2464 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2465 0) // __kmpc_atomic_fixed4u_add_cpt_fp
2466 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2467 0) // __kmpc_atomic_fixed4_sub_cpt_fp
2468 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2469 0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2471 0) // __kmpc_atomic_fixed4_mul_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2473 0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2474 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2475 0) // __kmpc_atomic_fixed4_div_cpt_fp
2476 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2477 0) // __kmpc_atomic_fixed4u_div_cpt_fp
2478
2479 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2480 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2481 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2482 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2483 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2484 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2485 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2486 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2487 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2488 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2489 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2490 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2491 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2492 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2493 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2494 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2495
2496 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2497 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2498 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2499 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2500 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2501 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2502 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2503 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2504
2505 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2506 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2507 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2508 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2509 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2510 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2511 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2512 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2513
2514 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2515 1) // __kmpc_atomic_float10_add_cpt_fp
2516 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2517 1) // __kmpc_atomic_float10_sub_cpt_fp
2518 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2519 1) // __kmpc_atomic_float10_mul_cpt_fp
2520 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2521 1) // __kmpc_atomic_float10_div_cpt_fp
2522
2523 #endif // KMP_HAVE_QUAD
2524
2525 // ------------------------------------------------------------------------
2526 // Routines for C/C++ Reduction operators && and ||
2527
2528 // -------------------------------------------------------------------------
2529 // Operation on *lhs, rhs bound by critical section
2530 // OP - operator (it's supposed to contain an assignment)
2531 // LCK_ID - lock identifier
2532 // Note: don't check gtid as it should always be valid
2533 // 1, 2-byte - expect valid parameter, other - check before this macro
2534 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2535 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2536 \
2537 if (flag) { \
2538 new_value OP rhs; \
2539 } else \
2540 new_value = (*lhs); \
2541 \
2542 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2543
2544 // ------------------------------------------------------------------------
2545 #ifdef KMP_GOMP_COMPAT
2546 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2547 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2548 KMP_CHECK_GTID; \
2549 OP_CRITICAL_L_CPT(OP, 0); \
2550 return new_value; \
2551 }
2552 #else
2553 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2554 #endif /* KMP_GOMP_COMPAT */
2555
2556 // ------------------------------------------------------------------------
2557 // Need separate macros for &&, || because there is no combined assignment
2558 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2559 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2560 TYPE new_value; \
2561 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2562 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2563 }
2564
2565 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2566 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2567 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2568 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2569 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2570 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2571 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2572 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2573 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2574 0) // __kmpc_atomic_fixed4_andl_cpt
2575 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2576 0) // __kmpc_atomic_fixed4_orl_cpt
2577 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2578 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2579 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2580 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2581
2582 // -------------------------------------------------------------------------
2583 // Routines for Fortran operators that matched no one in C:
2584 // MAX, MIN, .EQV., .NEQV.
2585 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2586 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2587
2588 // -------------------------------------------------------------------------
2589 // MIN and MAX need separate macros
2590 // OP - operator to check if we need any actions?
2591 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2592 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2593 \
2594 if (*lhs OP rhs) { /* still need actions? */ \
2595 old_value = *lhs; \
2596 *lhs = rhs; \
2597 if (flag) \
2598 new_value = rhs; \
2599 else \
2600 new_value = old_value; \
2601 } else { \
2602 new_value = *lhs; \
2603 } \
2604 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2605 return new_value;
2606
2607 // -------------------------------------------------------------------------
2608 #ifdef KMP_GOMP_COMPAT
2609 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2610 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2611 KMP_CHECK_GTID; \
2612 MIN_MAX_CRITSECT_CPT(OP, 0); \
2613 }
2614 #else
2615 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2616 #endif /* KMP_GOMP_COMPAT */
2617
2618 // -------------------------------------------------------------------------
2619 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2620 { \
2621 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2622 /*TYPE old_value; */ \
2623 temp_val = *lhs; \
2624 old_value = temp_val; \
2625 while (old_value OP rhs && /* still need actions? */ \
2626 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2627 (kmp_int##BITS *)lhs, \
2628 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2629 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2630 KMP_CPU_PAUSE(); \
2631 temp_val = *lhs; \
2632 old_value = temp_val; \
2633 } \
2634 if (flag) \
2635 return rhs; \
2636 else \
2637 return old_value; \
2638 }
2639
2640 // -------------------------------------------------------------------------
2641 // 1-byte, 2-byte operands - use critical section
2642 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2643 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2644 TYPE new_value, old_value; \
2645 if (*lhs OP rhs) { /* need actions? */ \
2646 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2647 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2648 } \
2649 return *lhs; \
2650 }
2651
2652 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2653 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2654 TYPE new_value, old_value; \
2655 if (*lhs OP rhs) { \
2656 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2657 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2658 } \
2659 return *lhs; \
2660 }
2661
2662 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2663 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2664 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2665 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2666 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2667 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2668 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2669 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2670 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2671 0) // __kmpc_atomic_fixed4_max_cpt
2672 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2673 0) // __kmpc_atomic_fixed4_min_cpt
2674 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2675 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2676 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2677 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2678 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2679 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2680 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2681 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2682 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2683 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2684 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2685 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2686 #if KMP_HAVE_QUAD
2687 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2688 1) // __kmpc_atomic_float16_max_cpt
2689 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2690 1) // __kmpc_atomic_float16_min_cpt
2691 #if (KMP_ARCH_X86)
2692 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2693 1) // __kmpc_atomic_float16_max_a16_cpt
2694 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2695 1) // __kmpc_atomic_float16_mix_a16_cpt
2696 #endif // (KMP_ARCH_X86)
2697 #endif // KMP_HAVE_QUAD
2698
2699 // ------------------------------------------------------------------------
2700 #ifdef KMP_GOMP_COMPAT
2701 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2702 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2703 KMP_CHECK_GTID; \
2704 OP_CRITICAL_CPT(OP, 0); \
2705 }
2706 #else
2707 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2708 #endif /* KMP_GOMP_COMPAT */
2709 // ------------------------------------------------------------------------
2710 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2711 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2712 TYPE new_value; \
2713 OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \
2714 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2715 }
2716
2717 // ------------------------------------------------------------------------
2718
2719 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2720 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2721 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2722 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2723 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2724 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2725 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2726 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2727 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2728 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2729 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2730 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2731 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2732 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2733 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2734 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2735
2736 // ------------------------------------------------------------------------
2737 // Routines for Extended types: long double, _Quad, complex flavours (use
2738 // critical section)
2739 // TYPE_ID, OP_ID, TYPE - detailed above
2740 // OP - operator
2741 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2742 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2743 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2744 TYPE new_value; \
2745 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \
2746 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \
2747 }
2748
2749 // ------------------------------------------------------------------------
2750 // Workaround for cmplx4. Regular routines with return value don't work
2751 // on Win_32e. Let's return captured values through the additional parameter.
2752 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2753 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2754 \
2755 if (flag) { \
2756 (*lhs) OP rhs; \
2757 (*out) = (*lhs); \
2758 } else { \
2759 (*out) = (*lhs); \
2760 (*lhs) OP rhs; \
2761 } \
2762 \
2763 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2764 return;
2765 // ------------------------------------------------------------------------
2766
2767 #ifdef KMP_GOMP_COMPAT
2768 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2769 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2770 KMP_CHECK_GTID; \
2771 OP_CRITICAL_CPT_WRK(OP## =, 0); \
2772 }
2773 #else
2774 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2775 #endif /* KMP_GOMP_COMPAT */
2776 // ------------------------------------------------------------------------
2777
2778 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2779 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2780 TYPE rhs, TYPE *out, int flag) { \
2781 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2782 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2783 // ------------------------------------------------------------------------
2784
2785 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2786 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2787 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2788 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2789 }
2790 // The end of workaround for cmplx4
2791
2792 /* ------------------------------------------------------------------------- */
2793 // routines for long double type
2794 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2795 1) // __kmpc_atomic_float10_add_cpt
2796 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2797 1) // __kmpc_atomic_float10_sub_cpt
2798 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2799 1) // __kmpc_atomic_float10_mul_cpt
2800 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2801 1) // __kmpc_atomic_float10_div_cpt
2802 #if KMP_HAVE_QUAD
2803 // routines for _Quad type
2804 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2805 1) // __kmpc_atomic_float16_add_cpt
2806 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2807 1) // __kmpc_atomic_float16_sub_cpt
2808 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2809 1) // __kmpc_atomic_float16_mul_cpt
2810 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2811 1) // __kmpc_atomic_float16_div_cpt
2812 #if (KMP_ARCH_X86)
2813 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2814 1) // __kmpc_atomic_float16_add_a16_cpt
2815 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2816 1) // __kmpc_atomic_float16_sub_a16_cpt
2817 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2818 1) // __kmpc_atomic_float16_mul_a16_cpt
2819 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2820 1) // __kmpc_atomic_float16_div_a16_cpt
2821 #endif // (KMP_ARCH_X86)
2822 #endif // KMP_HAVE_QUAD
2823
2824 // routines for complex types
2825
2826 // cmplx4 routines to return void
2827 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2828 1) // __kmpc_atomic_cmplx4_add_cpt
2829 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2830 1) // __kmpc_atomic_cmplx4_sub_cpt
2831 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2832 1) // __kmpc_atomic_cmplx4_mul_cpt
2833 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2834 1) // __kmpc_atomic_cmplx4_div_cpt
2835
2836 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2837 1) // __kmpc_atomic_cmplx8_add_cpt
2838 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2839 1) // __kmpc_atomic_cmplx8_sub_cpt
2840 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2841 1) // __kmpc_atomic_cmplx8_mul_cpt
2842 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2843 1) // __kmpc_atomic_cmplx8_div_cpt
2844 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2845 1) // __kmpc_atomic_cmplx10_add_cpt
2846 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2847 1) // __kmpc_atomic_cmplx10_sub_cpt
2848 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2849 1) // __kmpc_atomic_cmplx10_mul_cpt
2850 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2851 1) // __kmpc_atomic_cmplx10_div_cpt
2852 #if KMP_HAVE_QUAD
2853 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2854 1) // __kmpc_atomic_cmplx16_add_cpt
2855 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2856 1) // __kmpc_atomic_cmplx16_sub_cpt
2857 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2858 1) // __kmpc_atomic_cmplx16_mul_cpt
2859 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2860 1) // __kmpc_atomic_cmplx16_div_cpt
2861 #if (KMP_ARCH_X86)
2862 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2863 1) // __kmpc_atomic_cmplx16_add_a16_cpt
2864 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2865 1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2866 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2867 1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2868 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2869 1) // __kmpc_atomic_cmplx16_div_a16_cpt
2870 #endif // (KMP_ARCH_X86)
2871 #endif // KMP_HAVE_QUAD
2872
2873 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2874 // binop x; v = x; } for non-commutative operations.
2875 // Supported only on IA-32 architecture and Intel(R) 64
2876
2877 // -------------------------------------------------------------------------
2878 // Operation on *lhs, rhs bound by critical section
2879 // OP - operator (it's supposed to contain an assignment)
2880 // LCK_ID - lock identifier
2881 // Note: don't check gtid as it should always be valid
2882 // 1, 2-byte - expect valid parameter, other - check before this macro
2883 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
2884 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2885 \
2886 if (flag) { \
2887 /*temp_val = (*lhs);*/ \
2888 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2889 new_value = (*lhs); \
2890 } else { \
2891 new_value = (*lhs); \
2892 (*lhs) = (TYPE)((rhs)OP(*lhs)); \
2893 } \
2894 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2895 return new_value;
2896
2897 // ------------------------------------------------------------------------
2898 #ifdef KMP_GOMP_COMPAT
2899 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \
2900 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2901 KMP_CHECK_GTID; \
2902 OP_CRITICAL_CPT_REV(TYPE, OP, 0); \
2903 }
2904 #else
2905 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2906 #endif /* KMP_GOMP_COMPAT */
2907
2908 // ------------------------------------------------------------------------
2909 // Operation on *lhs, rhs using "compare_and_store" routine
2910 // TYPE - operands' type
2911 // BITS - size in bits, used to distinguish low level calls
2912 // OP - operator
2913 // Note: temp_val introduced in order to force the compiler to read
2914 // *lhs only once (w/o it the compiler reads *lhs twice)
2915 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2916 { \
2917 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2918 TYPE old_value, new_value; \
2919 temp_val = *lhs; \
2920 old_value = temp_val; \
2921 new_value = (TYPE)(rhs OP old_value); \
2922 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2923 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2924 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2925 KMP_CPU_PAUSE(); \
2926 \
2927 temp_val = *lhs; \
2928 old_value = temp_val; \
2929 new_value = (TYPE)(rhs OP old_value); \
2930 } \
2931 if (flag) { \
2932 return new_value; \
2933 } else \
2934 return old_value; \
2935 }
2936
2937 // -------------------------------------------------------------------------
2938 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2939 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2940 TYPE new_value; \
2941 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
2942 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2943 }
2944
2945 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2946 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2947 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2948 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2949 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2950 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2951 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2952 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2953 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2954 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2955 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2956 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2957 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2958 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2959 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2960 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2961 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2962 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2963 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2964 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2965 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2966 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2967 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2968 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2969 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2970 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2971 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2972 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2973 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2974 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2975 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2976 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2977 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2978 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2979 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2980 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2981 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2982 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2983 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2984 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2985 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2986 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2987 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2988 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2989 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2990 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2991 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2992 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2993 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2994 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2995 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2996 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2997 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2998 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
2999 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3000 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3001 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
3002
3003 // ------------------------------------------------------------------------
3004 // Routines for Extended types: long double, _Quad, complex flavours (use
3005 // critical section)
3006 // TYPE_ID, OP_ID, TYPE - detailed above
3007 // OP - operator
3008 // LCK_ID - lock identifier, used to possibly distinguish lock variable
3009 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
3010 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
3011 TYPE new_value; \
3012 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
3013 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3014 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \
3015 }
3016
3017 /* ------------------------------------------------------------------------- */
3018 // routines for long double type
3019 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3020 1) // __kmpc_atomic_float10_sub_cpt_rev
3021 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3022 1) // __kmpc_atomic_float10_div_cpt_rev
3023 #if KMP_HAVE_QUAD
3024 // routines for _Quad type
3025 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3026 1) // __kmpc_atomic_float16_sub_cpt_rev
3027 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3028 1) // __kmpc_atomic_float16_div_cpt_rev
3029 #if (KMP_ARCH_X86)
3030 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3031 1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3032 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3033 1) // __kmpc_atomic_float16_div_a16_cpt_rev
3034 #endif // (KMP_ARCH_X86)
3035 #endif // KMP_HAVE_QUAD
3036
3037 // routines for complex types
3038
3039 // ------------------------------------------------------------------------
3040 // Workaround for cmplx4. Regular routines with return value don't work
3041 // on Win_32e. Let's return captured values through the additional parameter.
3042 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3043 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3044 \
3045 if (flag) { \
3046 (*lhs) = (rhs)OP(*lhs); \
3047 (*out) = (*lhs); \
3048 } else { \
3049 (*out) = (*lhs); \
3050 (*lhs) = (rhs)OP(*lhs); \
3051 } \
3052 \
3053 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3054 return;
3055 // ------------------------------------------------------------------------
3056
3057 #ifdef KMP_GOMP_COMPAT
3058 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3059 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3060 KMP_CHECK_GTID; \
3061 OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3062 }
3063 #else
3064 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3065 #endif /* KMP_GOMP_COMPAT */
3066 // ------------------------------------------------------------------------
3067
3068 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3069 GOMP_FLAG) \
3070 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3071 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3072 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3073 }
3074 // The end of workaround for cmplx4
3075
3076 // !!! TODO: check if we need to return void for cmplx4 routines
3077 // cmplx4 routines to return void
3078 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3079 1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3080 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3081 1) // __kmpc_atomic_cmplx4_div_cpt_rev
3082
3083 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3084 1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3085 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3086 1) // __kmpc_atomic_cmplx8_div_cpt_rev
3087 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3088 1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3089 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3090 1) // __kmpc_atomic_cmplx10_div_cpt_rev
3091 #if KMP_HAVE_QUAD
3092 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3093 1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3094 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3095 1) // __kmpc_atomic_cmplx16_div_cpt_rev
3096 #if (KMP_ARCH_X86)
3097 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3098 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3099 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3100 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3101 #endif // (KMP_ARCH_X86)
3102 #endif // KMP_HAVE_QUAD
3103
3104 // Capture reverse for mixed type: RHS=float16
3105 #if KMP_HAVE_QUAD
3106
3107 // Beginning of a definition (provides name, parameters, gebug trace)
3108 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3109 // fixed)
3110 // OP_ID - operation identifier (add, sub, mul, ...)
3111 // TYPE - operands' type
3112 // -------------------------------------------------------------------------
3113 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3114 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3115 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3116 TYPE new_value; \
3117 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \
3118 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3119 }
3120
3121 // -------------------------------------------------------------------------
3122 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3123 LCK_ID, GOMP_FLAG) \
3124 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3125 TYPE new_value; \
3126 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \
3127 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \
3128 }
3129
3130 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3131 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3132 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3133 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3134 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3135 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3136 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3137 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3138
3139 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3140 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3141 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3142 1,
3143 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3144 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3145 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3146 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3147 1,
3148 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3149
3150 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3151 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3152 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3153 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3154 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3155 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3156 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3157 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3158
3159 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3160 7,
3161 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3162 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3163 8i, 7,
3164 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3165 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3166 7,
3167 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3168 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3169 8i, 7,
3170 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3171
3172 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3173 4r, 3,
3174 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3175 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3176 4r, 3,
3177 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3178
3179 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3180 8r, 7,
3181 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3182 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3183 8r, 7,
3184 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3185
3186 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3187 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3188 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3189 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3190
3191 #endif // KMP_HAVE_QUAD
3192
3193 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3194
3195 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3196 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3197 TYPE rhs) { \
3198 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3199 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3200
3201 #define CRITICAL_SWP(LCK_ID) \
3202 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3203 \
3204 old_value = (*lhs); \
3205 (*lhs) = rhs; \
3206 \
3207 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3208 return old_value;
3209
3210 // ------------------------------------------------------------------------
3211 #ifdef KMP_GOMP_COMPAT
3212 #define GOMP_CRITICAL_SWP(FLAG) \
3213 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3214 KMP_CHECK_GTID; \
3215 CRITICAL_SWP(0); \
3216 }
3217 #else
3218 #define GOMP_CRITICAL_SWP(FLAG)
3219 #endif /* KMP_GOMP_COMPAT */
3220
3221 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3222 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3223 TYPE old_value; \
3224 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3225 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3226 return old_value; \
3227 }
3228 // ------------------------------------------------------------------------
3229 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3230 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3231 TYPE old_value; \
3232 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3233 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3234 return old_value; \
3235 }
3236
3237 // ------------------------------------------------------------------------
3238 #define CMPXCHG_SWP(TYPE, BITS) \
3239 { \
3240 TYPE KMP_ATOMIC_VOLATILE temp_val; \
3241 TYPE old_value, new_value; \
3242 temp_val = *lhs; \
3243 old_value = temp_val; \
3244 new_value = rhs; \
3245 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3246 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3247 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3248 KMP_CPU_PAUSE(); \
3249 \
3250 temp_val = *lhs; \
3251 old_value = temp_val; \
3252 new_value = rhs; \
3253 } \
3254 return old_value; \
3255 }
3256
3257 // -------------------------------------------------------------------------
3258 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3259 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3260 TYPE old_value; \
3261 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3262 CMPXCHG_SWP(TYPE, BITS) \
3263 }
3264
3265 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3266 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3267 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3268
3269 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3270 KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3271
3272 #if (KMP_ARCH_X86)
3273 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3274 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3275 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3276 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3277 #else
3278 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3279 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3280 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3281 #endif // (KMP_ARCH_X86)
3282
3283 // ------------------------------------------------------------------------
3284 // Routines for Extended types: long double, _Quad, complex flavours (use
3285 // critical section)
3286 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3287 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3288 TYPE old_value; \
3289 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3290 CRITICAL_SWP(LCK_ID) \
3291 }
3292
3293 // ------------------------------------------------------------------------
3294 // !!! TODO: check if we need to return void for cmplx4 routines
3295 // Workaround for cmplx4. Regular routines with return value don't work
3296 // on Win_32e. Let's return captured values through the additional parameter.
3297
3298 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3299 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3300 TYPE rhs, TYPE *out) { \
3301 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3302 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3303
3304 #define CRITICAL_SWP_WRK(LCK_ID) \
3305 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3306 \
3307 tmp = (*lhs); \
3308 (*lhs) = (rhs); \
3309 (*out) = tmp; \
3310 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3311 return;
3312 // ------------------------------------------------------------------------
3313
3314 #ifdef KMP_GOMP_COMPAT
3315 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
3316 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3317 KMP_CHECK_GTID; \
3318 CRITICAL_SWP_WRK(0); \
3319 }
3320 #else
3321 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3322 #endif /* KMP_GOMP_COMPAT */
3323 // ------------------------------------------------------------------------
3324
3325 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3326 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3327 TYPE tmp; \
3328 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3329 CRITICAL_SWP_WRK(LCK_ID) \
3330 }
3331 // The end of workaround for cmplx4
3332
3333 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3334 #if KMP_HAVE_QUAD
3335 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3336 #endif // KMP_HAVE_QUAD
3337 // cmplx4 routine to return void
3338 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3339
3340 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3341 // __kmpc_atomic_cmplx4_swp
3342
3343 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3344 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3345 #if KMP_HAVE_QUAD
3346 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3347 #if (KMP_ARCH_X86)
3348 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3349 1) // __kmpc_atomic_float16_a16_swp
3350 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3351 1) // __kmpc_atomic_cmplx16_a16_swp
3352 #endif // (KMP_ARCH_X86)
3353 #endif // KMP_HAVE_QUAD
3354
3355 // End of OpenMP 4.0 Capture
3356
3357 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3358
3359 #undef OP_CRITICAL
3360
3361 /* ------------------------------------------------------------------------ */
3362 /* Generic atomic routines */
3363
__kmpc_atomic_1(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3364 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3365 void (*f)(void *, void *, void *)) {
3366 KMP_DEBUG_ASSERT(__kmp_init_serial);
3367
3368 if (
3369 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3370 FALSE /* must use lock */
3371 #else
3372 TRUE
3373 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3374 ) {
3375 kmp_int8 old_value, new_value;
3376
3377 old_value = *(kmp_int8 *)lhs;
3378 (*f)(&new_value, &old_value, rhs);
3379
3380 /* TODO: Should this be acquire or release? */
3381 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3382 *(kmp_int8 *)&new_value)) {
3383 KMP_CPU_PAUSE();
3384
3385 old_value = *(kmp_int8 *)lhs;
3386 (*f)(&new_value, &old_value, rhs);
3387 }
3388
3389 return;
3390 } else {
3391 // All 1-byte data is of integer data type.
3392
3393 #ifdef KMP_GOMP_COMPAT
3394 if (__kmp_atomic_mode == 2) {
3395 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3396 } else
3397 #endif /* KMP_GOMP_COMPAT */
3398 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3399
3400 (*f)(lhs, lhs, rhs);
3401
3402 #ifdef KMP_GOMP_COMPAT
3403 if (__kmp_atomic_mode == 2) {
3404 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3405 } else
3406 #endif /* KMP_GOMP_COMPAT */
3407 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3408 }
3409 }
3410
__kmpc_atomic_2(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3411 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3412 void (*f)(void *, void *, void *)) {
3413 if (
3414 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3415 FALSE /* must use lock */
3416 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3417 TRUE /* no alignment problems */
3418 #else
3419 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3420 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3421 ) {
3422 kmp_int16 old_value, new_value;
3423
3424 old_value = *(kmp_int16 *)lhs;
3425 (*f)(&new_value, &old_value, rhs);
3426
3427 /* TODO: Should this be acquire or release? */
3428 while (!KMP_COMPARE_AND_STORE_ACQ16(
3429 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3430 KMP_CPU_PAUSE();
3431
3432 old_value = *(kmp_int16 *)lhs;
3433 (*f)(&new_value, &old_value, rhs);
3434 }
3435
3436 return;
3437 } else {
3438 // All 2-byte data is of integer data type.
3439
3440 #ifdef KMP_GOMP_COMPAT
3441 if (__kmp_atomic_mode == 2) {
3442 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3443 } else
3444 #endif /* KMP_GOMP_COMPAT */
3445 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3446
3447 (*f)(lhs, lhs, rhs);
3448
3449 #ifdef KMP_GOMP_COMPAT
3450 if (__kmp_atomic_mode == 2) {
3451 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3452 } else
3453 #endif /* KMP_GOMP_COMPAT */
3454 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3455 }
3456 }
3457
__kmpc_atomic_4(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3458 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3459 void (*f)(void *, void *, void *)) {
3460 KMP_DEBUG_ASSERT(__kmp_init_serial);
3461
3462 if (
3463 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3464 // Gomp compatibility is broken if this routine is called for floats.
3465 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3466 TRUE /* no alignment problems */
3467 #else
3468 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3469 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3470 ) {
3471 kmp_int32 old_value, new_value;
3472
3473 old_value = *(kmp_int32 *)lhs;
3474 (*f)(&new_value, &old_value, rhs);
3475
3476 /* TODO: Should this be acquire or release? */
3477 while (!KMP_COMPARE_AND_STORE_ACQ32(
3478 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3479 KMP_CPU_PAUSE();
3480
3481 old_value = *(kmp_int32 *)lhs;
3482 (*f)(&new_value, &old_value, rhs);
3483 }
3484
3485 return;
3486 } else {
3487 // Use __kmp_atomic_lock_4i for all 4-byte data,
3488 // even if it isn't of integer data type.
3489
3490 #ifdef KMP_GOMP_COMPAT
3491 if (__kmp_atomic_mode == 2) {
3492 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3493 } else
3494 #endif /* KMP_GOMP_COMPAT */
3495 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3496
3497 (*f)(lhs, lhs, rhs);
3498
3499 #ifdef KMP_GOMP_COMPAT
3500 if (__kmp_atomic_mode == 2) {
3501 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3502 } else
3503 #endif /* KMP_GOMP_COMPAT */
3504 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3505 }
3506 }
3507
__kmpc_atomic_8(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3508 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3509 void (*f)(void *, void *, void *)) {
3510 KMP_DEBUG_ASSERT(__kmp_init_serial);
3511 if (
3512
3513 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3514 FALSE /* must use lock */
3515 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3516 TRUE /* no alignment problems */
3517 #else
3518 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3519 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3520 ) {
3521 kmp_int64 old_value, new_value;
3522
3523 old_value = *(kmp_int64 *)lhs;
3524 (*f)(&new_value, &old_value, rhs);
3525 /* TODO: Should this be acquire or release? */
3526 while (!KMP_COMPARE_AND_STORE_ACQ64(
3527 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3528 KMP_CPU_PAUSE();
3529
3530 old_value = *(kmp_int64 *)lhs;
3531 (*f)(&new_value, &old_value, rhs);
3532 }
3533
3534 return;
3535 } else {
3536 // Use __kmp_atomic_lock_8i for all 8-byte data,
3537 // even if it isn't of integer data type.
3538
3539 #ifdef KMP_GOMP_COMPAT
3540 if (__kmp_atomic_mode == 2) {
3541 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3542 } else
3543 #endif /* KMP_GOMP_COMPAT */
3544 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3545
3546 (*f)(lhs, lhs, rhs);
3547
3548 #ifdef KMP_GOMP_COMPAT
3549 if (__kmp_atomic_mode == 2) {
3550 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3551 } else
3552 #endif /* KMP_GOMP_COMPAT */
3553 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3554 }
3555 }
3556
__kmpc_atomic_10(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3557 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3558 void (*f)(void *, void *, void *)) {
3559 KMP_DEBUG_ASSERT(__kmp_init_serial);
3560
3561 #ifdef KMP_GOMP_COMPAT
3562 if (__kmp_atomic_mode == 2) {
3563 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3564 } else
3565 #endif /* KMP_GOMP_COMPAT */
3566 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3567
3568 (*f)(lhs, lhs, rhs);
3569
3570 #ifdef KMP_GOMP_COMPAT
3571 if (__kmp_atomic_mode == 2) {
3572 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3573 } else
3574 #endif /* KMP_GOMP_COMPAT */
3575 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3576 }
3577
__kmpc_atomic_16(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3578 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3579 void (*f)(void *, void *, void *)) {
3580 KMP_DEBUG_ASSERT(__kmp_init_serial);
3581
3582 #ifdef KMP_GOMP_COMPAT
3583 if (__kmp_atomic_mode == 2) {
3584 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3585 } else
3586 #endif /* KMP_GOMP_COMPAT */
3587 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3588
3589 (*f)(lhs, lhs, rhs);
3590
3591 #ifdef KMP_GOMP_COMPAT
3592 if (__kmp_atomic_mode == 2) {
3593 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3594 } else
3595 #endif /* KMP_GOMP_COMPAT */
3596 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3597 }
3598
__kmpc_atomic_20(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3599 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3600 void (*f)(void *, void *, void *)) {
3601 KMP_DEBUG_ASSERT(__kmp_init_serial);
3602
3603 #ifdef KMP_GOMP_COMPAT
3604 if (__kmp_atomic_mode == 2) {
3605 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3606 } else
3607 #endif /* KMP_GOMP_COMPAT */
3608 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3609
3610 (*f)(lhs, lhs, rhs);
3611
3612 #ifdef KMP_GOMP_COMPAT
3613 if (__kmp_atomic_mode == 2) {
3614 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3615 } else
3616 #endif /* KMP_GOMP_COMPAT */
3617 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3618 }
3619
__kmpc_atomic_32(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3620 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3621 void (*f)(void *, void *, void *)) {
3622 KMP_DEBUG_ASSERT(__kmp_init_serial);
3623
3624 #ifdef KMP_GOMP_COMPAT
3625 if (__kmp_atomic_mode == 2) {
3626 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3627 } else
3628 #endif /* KMP_GOMP_COMPAT */
3629 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3630
3631 (*f)(lhs, lhs, rhs);
3632
3633 #ifdef KMP_GOMP_COMPAT
3634 if (__kmp_atomic_mode == 2) {
3635 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3636 } else
3637 #endif /* KMP_GOMP_COMPAT */
3638 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3639 }
3640
3641 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3642 // compiler; duplicated in order to not use 3-party names in pure Intel code
3643 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
__kmpc_atomic_start(void)3644 void __kmpc_atomic_start(void) {
3645 int gtid = __kmp_entry_gtid();
3646 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3647 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3648 }
3649
__kmpc_atomic_end(void)3650 void __kmpc_atomic_end(void) {
3651 int gtid = __kmp_get_gtid();
3652 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3653 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3654 }
3655
3656 /*!
3657 @}
3658 */
3659
3660 // end of file
3661