1 /*
2 * kmp_wait_release.h -- Wait/Release implementation
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22
23 /*!
24 @defgroup WAIT_RELEASE Wait/Release operations
25
26 The definitions and functions here implement the lowest level thread
27 synchronizations of suspending a thread and awaking it. They are used to build
28 higher level operations such as barriers and fork/join.
29 */
30
31 /*!
32 @ingroup WAIT_RELEASE
33 @{
34 */
35
36 /*!
37 * The flag_type describes the storage used for the flag.
38 */
39 enum flag_type {
40 flag32, /**< 32 bit flags */
41 flag64, /**< 64 bit flags */
42 flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */
43 };
44
45 struct flag_properties {
46 unsigned int type : 16;
47 unsigned int reserved : 16;
48 };
49
50 /*!
51 * Base class for wait/release volatile flag
52 */
53 template <typename P> class kmp_flag_native {
54 volatile P *loc;
55 flag_properties t;
56
57 public:
58 typedef P flag_t;
kmp_flag_native(volatile P * p,flag_type ft)59 kmp_flag_native(volatile P *p, flag_type ft)
60 : loc(p), t({(unsigned int)ft, 0U}) {}
get()61 volatile P *get() { return loc; }
get_void_p()62 void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
set(volatile P * new_loc)63 void set(volatile P *new_loc) { loc = new_loc; }
get_type()64 flag_type get_type() { return (flag_type)(t.type); }
load()65 P load() { return *loc; }
store(P val)66 void store(P val) { *loc = val; }
67 };
68
69 /*!
70 * Base class for wait/release atomic flag
71 */
72 template <typename P> class kmp_flag {
73 std::atomic<P>
74 *loc; /**< Pointer to the flag storage that is modified by another thread
75 */
76 flag_properties t; /**< "Type" of the flag in loc */
77 public:
78 typedef P flag_t;
kmp_flag(std::atomic<P> * p,flag_type ft)79 kmp_flag(std::atomic<P> *p, flag_type ft)
80 : loc(p), t({(unsigned int)ft, 0U}) {}
81 /*!
82 * @result the pointer to the actual flag
83 */
get()84 std::atomic<P> *get() { return loc; }
85 /*!
86 * @result void* pointer to the actual flag
87 */
get_void_p()88 void *get_void_p() { return RCAST(void *, loc); }
89 /*!
90 * @param new_loc in set loc to point at new_loc
91 */
set(std::atomic<P> * new_loc)92 void set(std::atomic<P> *new_loc) { loc = new_loc; }
93 /*!
94 * @result the flag_type
95 */
get_type()96 flag_type get_type() { return (flag_type)(t.type); }
97 /*!
98 * @result flag value
99 */
load()100 P load() { return loc->load(std::memory_order_acquire); }
101 /*!
102 * @param val the new flag value to be stored
103 */
store(P val)104 void store(P val) { loc->store(val, std::memory_order_release); }
105 // Derived classes must provide the following:
106 /*
107 kmp_info_t * get_waiter(kmp_uint32 i);
108 kmp_uint32 get_num_waiters();
109 bool done_check();
110 bool done_check_val(P old_loc);
111 bool notdone_check();
112 P internal_release();
113 void suspend(int th_gtid);
114 void mwait(int th_gtid);
115 void resume(int th_gtid);
116 P set_sleeping();
117 P unset_sleeping();
118 bool is_sleeping();
119 bool is_any_sleeping();
120 bool is_sleeping_val(P old_loc);
121 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
122 int *thread_finished
123 USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
124 is_constrained);
125 */
126 };
127
128 #if OMPT_SUPPORT
129 OMPT_NOINLINE
__ompt_implicit_task_end(kmp_info_t * this_thr,ompt_state_t ompt_state,ompt_data_t * tId)130 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
131 ompt_state_t ompt_state,
132 ompt_data_t *tId) {
133 int ds_tid = this_thr->th.th_info.ds.ds_tid;
134 if (ompt_state == ompt_state_wait_barrier_implicit) {
135 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
136 #if OMPT_OPTIONAL
137 void *codeptr = NULL;
138 if (ompt_enabled.ompt_callback_sync_region_wait) {
139 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
140 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
141 codeptr);
142 }
143 if (ompt_enabled.ompt_callback_sync_region) {
144 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
145 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
146 codeptr);
147 }
148 #endif
149 if (!KMP_MASTER_TID(ds_tid)) {
150 if (ompt_enabled.ompt_callback_implicit_task) {
151 int flags = this_thr->th.ompt_thread_info.parallel_flags;
152 flags = (flags & ompt_parallel_league) ? ompt_task_initial
153 : ompt_task_implicit;
154 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
155 ompt_scope_end, NULL, tId, 0, ds_tid, flags);
156 }
157 // return to idle state
158 this_thr->th.ompt_thread_info.state = ompt_state_idle;
159 } else {
160 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
161 }
162 }
163 }
164 #endif
165
166 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
167 __kmp_wait_* must make certain that another thread calls __kmp_release
168 to wake it back up to prevent deadlocks!
169
170 NOTE: We may not belong to a team at this point. */
171 template <class C, bool final_spin, bool Cancellable = false,
172 bool Sleepable = true>
173 static inline bool
__kmp_wait_template(kmp_info_t * this_thr,C * flag USE_ITT_BUILD_ARG (void * itt_sync_obj))174 __kmp_wait_template(kmp_info_t *this_thr,
175 C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
176 #if USE_ITT_BUILD && USE_ITT_NOTIFY
177 volatile void *spin = flag->get();
178 #endif
179 kmp_uint32 spins;
180 int th_gtid;
181 int tasks_completed = FALSE;
182 int oversubscribed;
183 #if !KMP_USE_MONITOR
184 kmp_uint64 poll_count;
185 kmp_uint64 hibernate_goal;
186 #else
187 kmp_uint32 hibernate;
188 #endif
189
190 KMP_FSYNC_SPIN_INIT(spin, NULL);
191 if (flag->done_check()) {
192 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
193 return false;
194 }
195 th_gtid = this_thr->th.th_info.ds.ds_gtid;
196 if (Cancellable) {
197 kmp_team_t *team = this_thr->th.th_team;
198 if (team && team->t.t_cancel_request == cancel_parallel)
199 return true;
200 }
201 #if KMP_OS_UNIX
202 if (final_spin)
203 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
204 #endif
205 KA_TRACE(20,
206 ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
207 #if KMP_STATS_ENABLED
208 stats_state_e thread_state = KMP_GET_THREAD_STATE();
209 #endif
210
211 /* OMPT Behavior:
212 THIS function is called from
213 __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
214 these have join / fork behavior
215
216 In these cases, we don't change the state or trigger events in THIS
217 function.
218 Events are triggered in the calling code (__kmp_barrier):
219
220 state := ompt_state_overhead
221 barrier-begin
222 barrier-wait-begin
223 state := ompt_state_wait_barrier
224 call join-barrier-implementation (finally arrive here)
225 {}
226 call fork-barrier-implementation (finally arrive here)
227 {}
228 state := ompt_state_overhead
229 barrier-wait-end
230 barrier-end
231 state := ompt_state_work_parallel
232
233
234 __kmp_fork_barrier (after thread creation, before executing implicit task)
235 call fork-barrier-implementation (finally arrive here)
236 {} // worker arrive here with state = ompt_state_idle
237
238
239 __kmp_join_barrier (implicit barrier at end of parallel region)
240 state := ompt_state_barrier_implicit
241 barrier-begin
242 barrier-wait-begin
243 call join-barrier-implementation (finally arrive here
244 final_spin=FALSE)
245 {
246 }
247 __kmp_fork_barrier (implicit barrier at end of parallel region)
248 call fork-barrier-implementation (finally arrive here final_spin=TRUE)
249
250 Worker after task-team is finished:
251 barrier-wait-end
252 barrier-end
253 implicit-task-end
254 idle-begin
255 state := ompt_state_idle
256
257 Before leaving, if state = ompt_state_idle
258 idle-end
259 state := ompt_state_overhead
260 */
261 #if OMPT_SUPPORT
262 ompt_state_t ompt_entry_state;
263 ompt_data_t *tId;
264 if (ompt_enabled.enabled) {
265 ompt_entry_state = this_thr->th.ompt_thread_info.state;
266 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
267 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
268 ompt_lw_taskteam_t *team =
269 this_thr->th.th_team->t.ompt_serialized_team_info;
270 if (team) {
271 tId = &(team->ompt_task_info.task_data);
272 } else {
273 tId = OMPT_CUR_TASK_DATA(this_thr);
274 }
275 } else {
276 tId = &(this_thr->th.ompt_thread_info.task_data);
277 }
278 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
279 this_thr->th.th_task_team == NULL)) {
280 // implicit task is done. Either no taskqueue, or task-team finished
281 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
282 }
283 }
284 #endif
285
286 KMP_INIT_YIELD(spins); // Setup for waiting
287
288 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
289 __kmp_pause_status == kmp_soft_paused) {
290 #if KMP_USE_MONITOR
291 // The worker threads cannot rely on the team struct existing at this point.
292 // Use the bt values cached in the thread struct instead.
293 #ifdef KMP_ADJUST_BLOCKTIME
294 if (__kmp_pause_status == kmp_soft_paused ||
295 (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
296 // Force immediate suspend if not set by user and more threads than
297 // available procs
298 hibernate = 0;
299 else
300 hibernate = this_thr->th.th_team_bt_intervals;
301 #else
302 hibernate = this_thr->th.th_team_bt_intervals;
303 #endif /* KMP_ADJUST_BLOCKTIME */
304
305 /* If the blocktime is nonzero, we want to make sure that we spin wait for
306 the entirety of the specified #intervals, plus up to one interval more.
307 This increment make certain that this thread doesn't go to sleep too
308 soon. */
309 if (hibernate != 0)
310 hibernate++;
311
312 // Add in the current time value.
313 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
314 KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
315 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
316 hibernate - __kmp_global.g.g_time.dt.t_value));
317 #else
318 if (__kmp_pause_status == kmp_soft_paused) {
319 // Force immediate suspend
320 hibernate_goal = KMP_NOW();
321 } else
322 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
323 poll_count = 0;
324 #endif // KMP_USE_MONITOR
325 }
326
327 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
328 KMP_MB();
329
330 // Main wait spin loop
331 while (flag->notdone_check()) {
332 kmp_task_team_t *task_team = NULL;
333 if (__kmp_tasking_mode != tskm_immediate_exec) {
334 task_team = this_thr->th.th_task_team;
335 /* If the thread's task team pointer is NULL, it means one of 3 things:
336 1) A newly-created thread is first being released by
337 __kmp_fork_barrier(), and its task team has not been set up yet.
338 2) All tasks have been executed to completion.
339 3) Tasking is off for this region. This could be because we are in a
340 serialized region (perhaps the outer one), or else tasking was manually
341 disabled (KMP_TASKING=0). */
342 if (task_team != NULL) {
343 if (TCR_SYNC_4(task_team->tt.tt_active)) {
344 if (KMP_TASKING_ENABLED(task_team))
345 flag->execute_tasks(
346 this_thr, th_gtid, final_spin,
347 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
348 else
349 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
350 } else {
351 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
352 #if OMPT_SUPPORT
353 // task-team is done now, other cases should be catched above
354 if (final_spin && ompt_enabled.enabled)
355 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
356 #endif
357 this_thr->th.th_task_team = NULL;
358 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
359 }
360 } else {
361 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
362 } // if
363 } // if
364
365 KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
366 if (TCR_4(__kmp_global.g.g_done)) {
367 if (__kmp_global.g.g_abort)
368 __kmp_abort_thread();
369 break;
370 }
371
372 // If we are oversubscribed, or have waited a bit (and
373 // KMP_LIBRARY=throughput), then yield
374 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
375
376 #if KMP_STATS_ENABLED
377 // Check if thread has been signalled to idle state
378 // This indicates that the logical "join-barrier" has finished
379 if (this_thr->th.th_stats->isIdle() &&
380 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
381 KMP_SET_THREAD_STATE(IDLE);
382 KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
383 }
384 #endif
385 // Check if the barrier surrounding this wait loop has been cancelled
386 if (Cancellable) {
387 kmp_team_t *team = this_thr->th.th_team;
388 if (team && team->t.t_cancel_request == cancel_parallel)
389 break;
390 }
391
392 // Don't suspend if KMP_BLOCKTIME is set to "infinite"
393 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
394 __kmp_pause_status != kmp_soft_paused)
395 continue;
396
397 // Don't suspend if there is a likelihood of new tasks being spawned.
398 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
399 continue;
400
401 #if KMP_USE_MONITOR
402 // If we have waited a bit more, fall asleep
403 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
404 continue;
405 #else
406 if (KMP_BLOCKING(hibernate_goal, poll_count++))
407 continue;
408 #endif
409 // Don't suspend if wait loop designated non-sleepable
410 // in template parameters
411 if (!Sleepable)
412 continue;
413
414 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
415 __kmp_pause_status != kmp_soft_paused)
416 continue;
417
418 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
419 if (__kmp_mwait_enabled || __kmp_umwait_enabled) {
420 KF_TRACE(50, ("__kmp_wait_sleep: T#%d using monitor/mwait\n", th_gtid));
421 flag->mwait(th_gtid);
422 } else {
423 #endif
424 KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
425 #if KMP_OS_UNIX
426 if (final_spin)
427 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
428 #endif
429 flag->suspend(th_gtid);
430 #if KMP_OS_UNIX
431 if (final_spin)
432 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
433 #endif
434 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
435 }
436 #endif
437
438 if (TCR_4(__kmp_global.g.g_done)) {
439 if (__kmp_global.g.g_abort)
440 __kmp_abort_thread();
441 break;
442 } else if (__kmp_tasking_mode != tskm_immediate_exec &&
443 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
444 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
445 }
446 // TODO: If thread is done with work and times out, disband/free
447 }
448
449 #if OMPT_SUPPORT
450 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
451 if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
452 #if OMPT_OPTIONAL
453 if (final_spin) {
454 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
455 ompt_exit_state = this_thr->th.ompt_thread_info.state;
456 }
457 #endif
458 if (ompt_exit_state == ompt_state_idle) {
459 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
460 }
461 }
462 #endif
463 #if KMP_STATS_ENABLED
464 // If we were put into idle state, pop that off the state stack
465 if (KMP_GET_THREAD_STATE() == IDLE) {
466 KMP_POP_PARTITIONED_TIMER();
467 KMP_SET_THREAD_STATE(thread_state);
468 this_thr->th.th_stats->resetIdleFlag();
469 }
470 #endif
471
472 #if KMP_OS_UNIX
473 if (final_spin)
474 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
475 #endif
476 KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
477 if (Cancellable) {
478 kmp_team_t *team = this_thr->th.th_team;
479 if (team && team->t.t_cancel_request == cancel_parallel) {
480 if (tasks_completed) {
481 // undo the previous decrement of unfinished_threads so that the
482 // thread can decrement at the join barrier with no problem
483 kmp_task_team_t *task_team = this_thr->th.th_task_team;
484 std::atomic<kmp_int32> *unfinished_threads =
485 &(task_team->tt.tt_unfinished_threads);
486 KMP_ATOMIC_INC(unfinished_threads);
487 }
488 return true;
489 }
490 }
491 return false;
492 }
493
494 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
495 // Set up a monitor on the flag variable causing the calling thread to wait in
496 // a less active state until the flag variable is modified.
497 template <class C>
__kmp_mwait_template(int th_gtid,C * flag)498 static inline void __kmp_mwait_template(int th_gtid, C *flag) {
499 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_mwait);
500 kmp_info_t *th = __kmp_threads[th_gtid];
501
502 KF_TRACE(30, ("__kmp_mwait_template: T#%d enter for flag = %p\n", th_gtid,
503 flag->get()));
504
505 // User-level mwait is available
506 KMP_DEBUG_ASSERT(__kmp_mwait_enabled || __kmp_umwait_enabled);
507
508 __kmp_suspend_initialize_thread(th);
509 __kmp_lock_suspend_mx(th);
510
511 volatile void *spin = flag->get();
512 void *cacheline = (void *)(kmp_uint64(spin) & ~(CACHE_LINE - 1));
513
514 if (!flag->done_check()) {
515 // Mark thread as no longer active
516 th->th.th_active = FALSE;
517 if (th->th.th_active_in_pool) {
518 th->th.th_active_in_pool = FALSE;
519 KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
520 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
521 }
522 flag->set_sleeping();
523 KF_TRACE(50, ("__kmp_mwait_template: T#%d calling monitor\n", th_gtid));
524 #if KMP_HAVE_UMWAIT
525 if (__kmp_umwait_enabled) {
526 __kmp_umonitor(cacheline);
527 }
528 #elif KMP_HAVE_MWAIT
529 if (__kmp_mwait_enabled) {
530 __kmp_mm_monitor(cacheline, 0, 0);
531 }
532 #endif
533 // To avoid a race, check flag between 'monitor' and 'mwait'. A write to
534 // the address could happen after the last time we checked and before
535 // monitoring started, in which case monitor can't detect the change.
536 if (flag->done_check())
537 flag->unset_sleeping();
538 else {
539 // if flag changes here, wake-up happens immediately
540 TCW_PTR(th->th.th_sleep_loc, (void *)flag);
541 __kmp_unlock_suspend_mx(th);
542 KF_TRACE(50, ("__kmp_mwait_template: T#%d calling mwait\n", th_gtid));
543 #if KMP_HAVE_UMWAIT
544 if (__kmp_umwait_enabled) {
545 __kmp_umwait(1, 100); // to do: enable ctrl via hints, backoff counter
546 }
547 #elif KMP_HAVE_MWAIT
548 if (__kmp_mwait_enabled) {
549 __kmp_mm_mwait(0, __kmp_mwait_hints);
550 }
551 #endif
552 KF_TRACE(50, ("__kmp_mwait_template: T#%d mwait done\n", th_gtid));
553 __kmp_lock_suspend_mx(th);
554 // Clean up sleep info; doesn't matter how/why this thread stopped waiting
555 if (flag->is_sleeping())
556 flag->unset_sleeping();
557 TCW_PTR(th->th.th_sleep_loc, NULL);
558 }
559 // Mark thread as active again
560 th->th.th_active = TRUE;
561 if (TCR_4(th->th.th_in_pool)) {
562 KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
563 th->th.th_active_in_pool = TRUE;
564 }
565 } // Drop out to main wait loop to check flag, handle tasks, etc.
566 __kmp_unlock_suspend_mx(th);
567 KF_TRACE(30, ("__kmp_mwait_template: T#%d exit\n", th_gtid));
568 }
569 #endif // KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
570
571 /* Release any threads specified as waiting on the flag by releasing the flag
572 and resume the waiting thread if indicated by the sleep bit(s). A thread that
573 calls __kmp_wait_template must call this function to wake up the potentially
574 sleeping thread and prevent deadlocks! */
__kmp_release_template(C * flag)575 template <class C> static inline void __kmp_release_template(C *flag) {
576 #ifdef KMP_DEBUG
577 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
578 #endif
579 KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
580 KMP_DEBUG_ASSERT(flag->get());
581 KMP_FSYNC_RELEASING(flag->get_void_p());
582
583 flag->internal_release();
584
585 KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
586 flag->load()));
587
588 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
589 // Only need to check sleep stuff if infinite block time not set.
590 // Are *any* threads waiting on flag sleeping?
591 if (flag->is_any_sleeping()) {
592 for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
593 // if sleeping waiter exists at i, sets current_waiter to i inside flag
594 kmp_info_t *waiter = flag->get_waiter(i);
595 if (waiter) {
596 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
597 // Wake up thread if needed
598 KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
599 "flag(%p) set\n",
600 gtid, wait_gtid, flag->get()));
601 flag->resume(wait_gtid); // unsets flag's current_waiter when done
602 }
603 }
604 }
605 }
606 }
607
608 template <typename FlagType> struct flag_traits {};
609
610 template <> struct flag_traits<kmp_uint32> {
611 typedef kmp_uint32 flag_t;
612 static const flag_type t = flag32;
613 static inline flag_t tcr(flag_t f) { return TCR_4(f); }
614 static inline flag_t test_then_add4(volatile flag_t *f) {
615 return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
616 }
617 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
618 return KMP_TEST_THEN_OR32(f, v);
619 }
620 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
621 return KMP_TEST_THEN_AND32(f, v);
622 }
623 };
624
625 template <> struct flag_traits<kmp_uint64> {
626 typedef kmp_uint64 flag_t;
627 static const flag_type t = flag64;
628 static inline flag_t tcr(flag_t f) { return TCR_8(f); }
629 static inline flag_t test_then_add4(volatile flag_t *f) {
630 return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
631 }
632 static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
633 return KMP_TEST_THEN_OR64(f, v);
634 }
635 static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
636 return KMP_TEST_THEN_AND64(f, v);
637 }
638 };
639
640 // Basic flag that does not use C11 Atomics
641 template <typename FlagType, bool Sleepable>
642 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
643 typedef flag_traits<FlagType> traits_type;
644 FlagType checker; /**< Value to compare flag to to check if flag has been
645 released. */
646 kmp_info_t
647 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
648 kmp_uint32
649 num_waiting_threads; /**< Number of threads sleeping on this thread. */
650 public:
651 kmp_basic_flag_native(volatile FlagType *p)
652 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
653 kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
654 : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
655 waiting_threads[0] = thr;
656 }
657 kmp_basic_flag_native(volatile FlagType *p, FlagType c)
658 : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
659 num_waiting_threads(0) {}
660 /*!
661 * param i in index into waiting_threads
662 * @result the thread that is waiting at index i
663 */
664 kmp_info_t *get_waiter(kmp_uint32 i) {
665 KMP_DEBUG_ASSERT(i < num_waiting_threads);
666 return waiting_threads[i];
667 }
668 /*!
669 * @result num_waiting_threads
670 */
671 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
672 /*!
673 * @param thr in the thread which is now waiting
674 *
675 * Insert a waiting thread at index 0.
676 */
677 void set_waiter(kmp_info_t *thr) {
678 waiting_threads[0] = thr;
679 num_waiting_threads = 1;
680 }
681 /*!
682 * @result true if the flag object has been released.
683 */
684 bool done_check() {
685 if (Sleepable)
686 return (traits_type::tcr(*(this->get())) & ~KMP_BARRIER_SLEEP_STATE) ==
687 checker;
688 else
689 return traits_type::tcr(*(this->get())) == checker;
690 }
691 /*!
692 * @param old_loc in old value of flag
693 * @result true if the flag's old value indicates it was released.
694 */
695 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
696 /*!
697 * @result true if the flag object is not yet released.
698 * Used in __kmp_wait_template like:
699 * @code
700 * while (flag.notdone_check()) { pause(); }
701 * @endcode
702 */
703 bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
704 /*!
705 * @result Actual flag value before release was applied.
706 * Trigger all waiting threads to run by modifying flag to release state.
707 */
708 void internal_release() {
709 (void)traits_type::test_then_add4((volatile FlagType *)this->get());
710 }
711 /*!
712 * @result Actual flag value before sleep bit(s) set.
713 * Notes that there is at least one thread sleeping on the flag by setting
714 * sleep bit(s).
715 */
716 FlagType set_sleeping() {
717 return traits_type::test_then_or((volatile FlagType *)this->get(),
718 KMP_BARRIER_SLEEP_STATE);
719 }
720 /*!
721 * @result Actual flag value before sleep bit(s) cleared.
722 * Notes that there are no longer threads sleeping on the flag by clearing
723 * sleep bit(s).
724 */
725 FlagType unset_sleeping() {
726 return traits_type::test_then_and((volatile FlagType *)this->get(),
727 ~KMP_BARRIER_SLEEP_STATE);
728 }
729 /*!
730 * @param old_loc in old value of flag
731 * Test whether there are threads sleeping on the flag's old value in old_loc.
732 */
733 bool is_sleeping_val(FlagType old_loc) {
734 return old_loc & KMP_BARRIER_SLEEP_STATE;
735 }
736 /*!
737 * Test whether there are threads sleeping on the flag.
738 */
739 bool is_sleeping() { return is_sleeping_val(*(this->get())); }
740 bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
741 kmp_uint8 *get_stolen() { return NULL; }
742 enum barrier_type get_bt() { return bs_last_barrier; }
743 };
744
745 template <typename FlagType, bool Sleepable>
746 class kmp_basic_flag : public kmp_flag<FlagType> {
747 typedef flag_traits<FlagType> traits_type;
748 FlagType checker; /**< Value to compare flag to to check if flag has been
749 released. */
750 kmp_info_t
751 *waiting_threads[1]; /**< Array of threads sleeping on this thread. */
752 kmp_uint32
753 num_waiting_threads; /**< Number of threads sleeping on this thread. */
754 public:
755 kmp_basic_flag(std::atomic<FlagType> *p)
756 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
757 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
758 : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
759 waiting_threads[0] = thr;
760 }
761 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
762 : kmp_flag<FlagType>(p, traits_type::t), checker(c),
763 num_waiting_threads(0) {}
764 /*!
765 * param i in index into waiting_threads
766 * @result the thread that is waiting at index i
767 */
768 kmp_info_t *get_waiter(kmp_uint32 i) {
769 KMP_DEBUG_ASSERT(i < num_waiting_threads);
770 return waiting_threads[i];
771 }
772 /*!
773 * @result num_waiting_threads
774 */
775 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
776 /*!
777 * @param thr in the thread which is now waiting
778 *
779 * Insert a waiting thread at index 0.
780 */
781 void set_waiter(kmp_info_t *thr) {
782 waiting_threads[0] = thr;
783 num_waiting_threads = 1;
784 }
785 /*!
786 * @result true if the flag object has been released.
787 */
788 bool done_check() {
789 if (Sleepable)
790 return (this->load() & ~KMP_BARRIER_SLEEP_STATE) == checker;
791 else
792 return this->load() == checker;
793 }
794 /*!
795 * @param old_loc in old value of flag
796 * @result true if the flag's old value indicates it was released.
797 */
798 bool done_check_val(FlagType old_loc) { return old_loc == checker; }
799 /*!
800 * @result true if the flag object is not yet released.
801 * Used in __kmp_wait_template like:
802 * @code
803 * while (flag.notdone_check()) { pause(); }
804 * @endcode
805 */
806 bool notdone_check() { return this->load() != checker; }
807 /*!
808 * @result Actual flag value before release was applied.
809 * Trigger all waiting threads to run by modifying flag to release state.
810 */
811 void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
812 /*!
813 * @result Actual flag value before sleep bit(s) set.
814 * Notes that there is at least one thread sleeping on the flag by setting
815 * sleep bit(s).
816 */
817 FlagType set_sleeping() {
818 return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
819 }
820 /*!
821 * @result Actual flag value before sleep bit(s) cleared.
822 * Notes that there are no longer threads sleeping on the flag by clearing
823 * sleep bit(s).
824 */
825 FlagType unset_sleeping() {
826 return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
827 }
828 /*!
829 * @param old_loc in old value of flag
830 * Test whether there are threads sleeping on the flag's old value in old_loc.
831 */
832 bool is_sleeping_val(FlagType old_loc) {
833 return old_loc & KMP_BARRIER_SLEEP_STATE;
834 }
835 /*!
836 * Test whether there are threads sleeping on the flag.
837 */
838 bool is_sleeping() { return is_sleeping_val(this->load()); }
839 bool is_any_sleeping() { return is_sleeping_val(this->load()); }
840 kmp_uint8 *get_stolen() { return NULL; }
841 enum barrier_type get_bt() { return bs_last_barrier; }
842 };
843
844 template <bool Cancellable, bool Sleepable>
845 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32, Sleepable> {
846 public:
847 kmp_flag_32(std::atomic<kmp_uint32> *p)
848 : kmp_basic_flag<kmp_uint32, Sleepable>(p) {}
849 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
850 : kmp_basic_flag<kmp_uint32, Sleepable>(p, thr) {}
851 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
852 : kmp_basic_flag<kmp_uint32, Sleepable>(p, c) {}
853 void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
854 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
855 void mwait(int th_gtid) { __kmp_mwait_32(th_gtid, this); }
856 #endif
857 void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
858 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
859 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
860 kmp_int32 is_constrained) {
861 return __kmp_execute_tasks_32(
862 this_thr, gtid, this, final_spin,
863 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
864 }
865 bool wait(kmp_info_t *this_thr,
866 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
867 if (final_spin)
868 return __kmp_wait_template<kmp_flag_32, TRUE, Cancellable, Sleepable>(
869 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
870 else
871 return __kmp_wait_template<kmp_flag_32, FALSE, Cancellable, Sleepable>(
872 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
873 }
874 void release() { __kmp_release_template(this); }
875 flag_type get_ptr_type() { return flag32; }
876 };
877
878 template <bool Cancellable, bool Sleepable>
879 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64, Sleepable> {
880 public:
881 kmp_flag_64(volatile kmp_uint64 *p)
882 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p) {}
883 kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
884 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, thr) {}
885 kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
886 : kmp_basic_flag_native<kmp_uint64, Sleepable>(p, c) {}
887 void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
888 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
889 void mwait(int th_gtid) { __kmp_mwait_64(th_gtid, this); }
890 #endif
891 void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
892 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
893 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
894 kmp_int32 is_constrained) {
895 return __kmp_execute_tasks_64(
896 this_thr, gtid, this, final_spin,
897 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
898 }
899 bool wait(kmp_info_t *this_thr,
900 int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
901 if (final_spin)
902 return __kmp_wait_template<kmp_flag_64, TRUE, Cancellable, Sleepable>(
903 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
904 else
905 return __kmp_wait_template<kmp_flag_64, FALSE, Cancellable, Sleepable>(
906 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
907 }
908 void release() { __kmp_release_template(this); }
909 flag_type get_ptr_type() { return flag64; }
910 };
911
912 // Hierarchical 64-bit on-core barrier instantiation
913 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
914 kmp_uint64 checker;
915 kmp_info_t *waiting_threads[1];
916 kmp_uint32 num_waiting_threads;
917 kmp_uint32
918 offset; /**< Portion of flag that is of interest for an operation. */
919 bool flag_switch; /**< Indicates a switch in flag location. */
920 enum barrier_type bt; /**< Barrier type. */
921 kmp_info_t *this_thr; /**< Thread that may be redirected to different flag
922 location. */
923 #if USE_ITT_BUILD
924 void *
925 itt_sync_obj; /**< ITT object that must be passed to new flag location. */
926 #endif
927 unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
928 return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
929 }
930
931 public:
932 kmp_flag_oncore(volatile kmp_uint64 *p)
933 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
934 flag_switch(false) {}
935 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
936 : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
937 offset(idx), flag_switch(false) {}
938 kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
939 enum barrier_type bar_t,
940 kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
941 : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
942 num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
943 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
944 kmp_info_t *get_waiter(kmp_uint32 i) {
945 KMP_DEBUG_ASSERT(i < num_waiting_threads);
946 return waiting_threads[i];
947 }
948 kmp_uint32 get_num_waiters() { return num_waiting_threads; }
949 void set_waiter(kmp_info_t *thr) {
950 waiting_threads[0] = thr;
951 num_waiting_threads = 1;
952 }
953 bool done_check_val(kmp_uint64 old_loc) {
954 return byteref(&old_loc, offset) == checker;
955 }
956 bool done_check() { return done_check_val(*get()); }
957 bool notdone_check() {
958 // Calculate flag_switch
959 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
960 flag_switch = true;
961 if (byteref(get(), offset) != 1 && !flag_switch)
962 return true;
963 else if (flag_switch) {
964 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
965 kmp_flag_64<> flag(&this_thr->th.th_bar[bt].bb.b_go,
966 (kmp_uint64)KMP_BARRIER_STATE_BUMP);
967 __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
968 }
969 return false;
970 }
971 void internal_release() {
972 // Other threads can write their own bytes simultaneously.
973 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
974 byteref(get(), offset) = 1;
975 } else {
976 kmp_uint64 mask = 0;
977 byteref(&mask, offset) = 1;
978 KMP_TEST_THEN_OR64(get(), mask);
979 }
980 }
981 kmp_uint64 set_sleeping() {
982 return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
983 }
984 kmp_uint64 unset_sleeping() {
985 return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
986 }
987 bool is_sleeping_val(kmp_uint64 old_loc) {
988 return old_loc & KMP_BARRIER_SLEEP_STATE;
989 }
990 bool is_sleeping() { return is_sleeping_val(*get()); }
991 bool is_any_sleeping() { return is_sleeping_val(*get()); }
992 void wait(kmp_info_t *this_thr, int final_spin) {
993 if (final_spin)
994 __kmp_wait_template<kmp_flag_oncore, TRUE>(
995 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
996 else
997 __kmp_wait_template<kmp_flag_oncore, FALSE>(
998 this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
999 }
1000 void release() { __kmp_release_template(this); }
1001 void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
1002 #if KMP_HAVE_MWAIT || KMP_HAVE_UMWAIT
1003 void mwait(int th_gtid) { __kmp_mwait_oncore(th_gtid, this); }
1004 #endif
1005 void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
1006 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
1007 int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
1008 kmp_int32 is_constrained) {
1009 return __kmp_execute_tasks_oncore(
1010 this_thr, gtid, this, final_spin,
1011 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
1012 }
1013 kmp_uint8 *get_stolen() { return NULL; }
1014 enum barrier_type get_bt() { return bt; }
1015 flag_type get_ptr_type() { return flag_oncore; }
1016 };
1017
1018 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
1019 // associated with int gtid.
1020 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
1021 if (!flag)
1022 return;
1023
1024 switch (RCAST(kmp_flag_64<> *, CCAST(void *, flag))->get_type()) {
1025 case flag32:
1026 __kmp_resume_32(gtid, (kmp_flag_32<> *)NULL);
1027 break;
1028 case flag64:
1029 __kmp_resume_64(gtid, (kmp_flag_64<> *)NULL);
1030 break;
1031 case flag_oncore:
1032 __kmp_resume_oncore(gtid, (kmp_flag_oncore *)NULL);
1033 break;
1034 }
1035 }
1036
1037 /*!
1038 @}
1039 */
1040
1041 #endif // KMP_WAIT_RELEASE_H
1042