1 /*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12
13 /* Static scheduling initialization.
14
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
18
19 #include "kmp.h"
20 #include "kmp_error.h"
21 #include "kmp_i18n.h"
22 #include "kmp_itt.h"
23 #include "kmp_stats.h"
24 #include "kmp_str.h"
25
26 #if OMPT_SUPPORT
27 #include "ompt-specific.h"
28 #endif
29
30 #ifdef KMP_DEBUG
31 //-------------------------------------------------------------------------
32 // template for debug prints specification ( d, u, lld, llu )
33 char const *traits_t<int>::spec = "d";
34 char const *traits_t<unsigned int>::spec = "u";
35 char const *traits_t<long long>::spec = "lld";
36 char const *traits_t<unsigned long long>::spec = "llu";
37 char const *traits_t<long>::spec = "ld";
38 //-------------------------------------------------------------------------
39 #endif
40
41 #if KMP_STATS_ENABLED
42 #define KMP_STATS_LOOP_END(stat) \
43 { \
44 kmp_int64 t; \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
48 if (i == 1) { \
49 t = u - l + 1; \
50 } else if (i == -1) { \
51 t = l - u + 1; \
52 } else if (i > 0) { \
53 t = (u - l) / i + 1; \
54 } else { \
55 t = (l - u) / (-i) + 1; \
56 } \
57 KMP_COUNT_VALUE(stat, t); \
58 KMP_POP_PARTITIONED_TIMER(); \
59 }
60 #else
61 #define KMP_STATS_LOOP_END(stat) /* Nothing */
62 #endif
63
64 static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
check_loc(ident_t * & loc)65 static inline void check_loc(ident_t *&loc) {
66 if (loc == NULL)
67 loc = &loc_stub; // may need to report location info to ittnotify
68 }
69
70 template <typename T>
__kmp_for_static_init(ident_t * loc,kmp_int32 global_tid,kmp_int32 schedtype,kmp_int32 * plastiter,T * plower,T * pupper,typename traits_t<T>::signed_t * pstride,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk,void * codeptr)71 static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
72 kmp_int32 schedtype, kmp_int32 *plastiter,
73 T *plower, T *pupper,
74 typename traits_t<T>::signed_t *pstride,
75 typename traits_t<T>::signed_t incr,
76 typename traits_t<T>::signed_t chunk
77 #if OMPT_SUPPORT && OMPT_OPTIONAL
78 ,
79 void *codeptr
80 #endif
81 ) {
82 KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
83 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
84 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
85
86 typedef typename traits_t<T>::unsigned_t UT;
87 typedef typename traits_t<T>::signed_t ST;
88 /* this all has to be changed back to TID and such.. */
89 kmp_int32 gtid = global_tid;
90 kmp_uint32 tid;
91 kmp_uint32 nth;
92 UT trip_count;
93 kmp_team_t *team;
94 __kmp_assert_valid_gtid(gtid);
95 kmp_info_t *th = __kmp_threads[gtid];
96
97 #if OMPT_SUPPORT && OMPT_OPTIONAL
98 ompt_team_info_t *team_info = NULL;
99 ompt_task_info_t *task_info = NULL;
100 ompt_work_t ompt_work_type = ompt_work_loop;
101
102 static kmp_int8 warn = 0;
103
104 if (ompt_enabled.ompt_callback_work) {
105 // Only fully initialize variables needed by OMPT if OMPT is enabled.
106 team_info = __ompt_get_teaminfo(0, NULL);
107 task_info = __ompt_get_task_info_object(0);
108 // Determine workshare type
109 if (loc != NULL) {
110 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
111 ompt_work_type = ompt_work_loop;
112 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
113 ompt_work_type = ompt_work_sections;
114 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
115 ompt_work_type = ompt_work_distribute;
116 } else {
117 kmp_int8 bool_res =
118 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
119 if (bool_res)
120 KMP_WARNING(OmptOutdatedWorkshare);
121 }
122 KMP_DEBUG_ASSERT(ompt_work_type);
123 }
124 }
125 #endif
126
127 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
128 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
129 #ifdef KMP_DEBUG
130 {
131 char *buff;
132 // create format specifiers before the debug output
133 buff = __kmp_str_format(
134 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
135 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
136 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
137 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
138 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
139 *pstride, incr, chunk));
140 __kmp_str_free(&buff);
141 }
142 #endif
143
144 if (__kmp_env_consistency_check) {
145 __kmp_push_workshare(global_tid, ct_pdo, loc);
146 if (incr == 0) {
147 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
148 loc);
149 }
150 }
151 /* special handling for zero-trip loops */
152 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
153 if (plastiter != NULL)
154 *plastiter = FALSE;
155 /* leave pupper and plower set to entire iteration space */
156 *pstride = incr; /* value should never be used */
157 // *plower = *pupper - incr;
158 // let compiler bypass the illegal loop (like for(i=1;i<10;i--))
159 // THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
160 // ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
161 #ifdef KMP_DEBUG
162 {
163 char *buff;
164 // create format specifiers before the debug output
165 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
166 "lower=%%%s upper=%%%s stride = %%%s "
167 "signed?<%s>, loc = %%s\n",
168 traits_t<T>::spec, traits_t<T>::spec,
169 traits_t<ST>::spec, traits_t<T>::spec);
170 KD_TRACE(100,
171 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
172 __kmp_str_free(&buff);
173 }
174 #endif
175 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
176
177 #if OMPT_SUPPORT && OMPT_OPTIONAL
178 if (ompt_enabled.ompt_callback_work) {
179 ompt_callbacks.ompt_callback(ompt_callback_work)(
180 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
181 &(task_info->task_data), 0, codeptr);
182 }
183 #endif
184 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
185 return;
186 }
187
188 // Although there are schedule enumerations above kmp_ord_upper which are not
189 // schedules for "distribute", the only ones which are useful are dynamic, so
190 // cannot be seen here, since this codepath is only executed for static
191 // schedules.
192 if (schedtype > kmp_ord_upper) {
193 // we are in DISTRIBUTE construct
194 schedtype += kmp_sch_static -
195 kmp_distribute_static; // AC: convert to usual schedule type
196 tid = th->th.th_team->t.t_master_tid;
197 team = th->th.th_team->t.t_parent;
198 } else {
199 tid = __kmp_tid_from_gtid(global_tid);
200 team = th->th.th_team;
201 }
202
203 /* determine if "for" loop is an active worksharing construct */
204 if (team->t.t_serialized) {
205 /* serialized parallel, each thread executes whole iteration space */
206 if (plastiter != NULL)
207 *plastiter = TRUE;
208 /* leave pupper and plower set to entire iteration space */
209 *pstride =
210 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
211
212 #ifdef KMP_DEBUG
213 {
214 char *buff;
215 // create format specifiers before the debug output
216 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
217 "lower=%%%s upper=%%%s stride = %%%s\n",
218 traits_t<T>::spec, traits_t<T>::spec,
219 traits_t<ST>::spec);
220 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
221 __kmp_str_free(&buff);
222 }
223 #endif
224 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
225
226 #if OMPT_SUPPORT && OMPT_OPTIONAL
227 if (ompt_enabled.ompt_callback_work) {
228 ompt_callbacks.ompt_callback(ompt_callback_work)(
229 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
230 &(task_info->task_data), *pstride, codeptr);
231 }
232 #endif
233 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
234 return;
235 }
236 nth = team->t.t_nproc;
237 if (nth == 1) {
238 if (plastiter != NULL)
239 *plastiter = TRUE;
240 *pstride =
241 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
242 #ifdef KMP_DEBUG
243 {
244 char *buff;
245 // create format specifiers before the debug output
246 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
247 "lower=%%%s upper=%%%s stride = %%%s\n",
248 traits_t<T>::spec, traits_t<T>::spec,
249 traits_t<ST>::spec);
250 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
251 __kmp_str_free(&buff);
252 }
253 #endif
254 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
255
256 #if OMPT_SUPPORT && OMPT_OPTIONAL
257 if (ompt_enabled.ompt_callback_work) {
258 ompt_callbacks.ompt_callback(ompt_callback_work)(
259 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
260 &(task_info->task_data), *pstride, codeptr);
261 }
262 #endif
263 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
264 return;
265 }
266
267 /* compute trip count */
268 if (incr == 1) {
269 trip_count = *pupper - *plower + 1;
270 } else if (incr == -1) {
271 trip_count = *plower - *pupper + 1;
272 } else if (incr > 0) {
273 // upper-lower can exceed the limit of signed type
274 trip_count = (UT)(*pupper - *plower) / incr + 1;
275 } else {
276 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
277 }
278
279 #if KMP_STATS_ENABLED
280 if (KMP_MASTER_GTID(gtid)) {
281 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
282 }
283 #endif
284
285 if (__kmp_env_consistency_check) {
286 /* tripcount overflow? */
287 if (trip_count == 0 && *pupper != *plower) {
288 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
289 loc);
290 }
291 }
292
293 /* compute remaining parameters */
294 switch (schedtype) {
295 case kmp_sch_static: {
296 if (trip_count < nth) {
297 KMP_DEBUG_ASSERT(
298 __kmp_static == kmp_sch_static_greedy ||
299 __kmp_static ==
300 kmp_sch_static_balanced); // Unknown static scheduling type.
301 if (tid < trip_count) {
302 *pupper = *plower = *plower + tid * incr;
303 } else {
304 *plower = *pupper + incr;
305 }
306 if (plastiter != NULL)
307 *plastiter = (tid == trip_count - 1);
308 } else {
309 if (__kmp_static == kmp_sch_static_balanced) {
310 UT small_chunk = trip_count / nth;
311 UT extras = trip_count % nth;
312 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
313 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
314 if (plastiter != NULL)
315 *plastiter = (tid == nth - 1);
316 } else {
317 T big_chunk_inc_count =
318 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
319 T old_upper = *pupper;
320
321 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
322 // Unknown static scheduling type.
323
324 *plower += tid * big_chunk_inc_count;
325 *pupper = *plower + big_chunk_inc_count - incr;
326 if (incr > 0) {
327 if (*pupper < *plower)
328 *pupper = traits_t<T>::max_value;
329 if (plastiter != NULL)
330 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
331 if (*pupper > old_upper)
332 *pupper = old_upper; // tracker C73258
333 } else {
334 if (*pupper > *plower)
335 *pupper = traits_t<T>::min_value;
336 if (plastiter != NULL)
337 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
338 if (*pupper < old_upper)
339 *pupper = old_upper; // tracker C73258
340 }
341 }
342 }
343 *pstride = trip_count;
344 break;
345 }
346 case kmp_sch_static_chunked: {
347 ST span;
348 if (chunk < 1) {
349 chunk = 1;
350 }
351 span = chunk * incr;
352 *pstride = span * nth;
353 *plower = *plower + (span * tid);
354 *pupper = *plower + span - incr;
355 if (plastiter != NULL)
356 *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth);
357 break;
358 }
359 case kmp_sch_static_balanced_chunked: {
360 T old_upper = *pupper;
361 // round up to make sure the chunk is enough to cover all iterations
362 UT span = (trip_count + nth - 1) / nth;
363
364 // perform chunk adjustment
365 chunk = (span + chunk - 1) & ~(chunk - 1);
366
367 span = chunk * incr;
368 *plower = *plower + (span * tid);
369 *pupper = *plower + span - incr;
370 if (incr > 0) {
371 if (*pupper > old_upper)
372 *pupper = old_upper;
373 } else if (*pupper < old_upper)
374 *pupper = old_upper;
375
376 if (plastiter != NULL)
377 *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
378 break;
379 }
380 default:
381 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
382 break;
383 }
384
385 #if USE_ITT_BUILD
386 // Report loop metadata
387 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
388 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
389 team->t.t_active_level == 1) {
390 kmp_uint64 cur_chunk = chunk;
391 check_loc(loc);
392 // Calculate chunk in case it was not specified; it is specified for
393 // kmp_sch_static_chunked
394 if (schedtype == kmp_sch_static) {
395 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
396 }
397 // 0 - "static" schedule
398 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
399 }
400 #endif
401 #ifdef KMP_DEBUG
402 {
403 char *buff;
404 // create format specifiers before the debug output
405 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
406 "upper=%%%s stride = %%%s signed?<%s>\n",
407 traits_t<T>::spec, traits_t<T>::spec,
408 traits_t<ST>::spec, traits_t<T>::spec);
409 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
410 __kmp_str_free(&buff);
411 }
412 #endif
413 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
414
415 #if OMPT_SUPPORT && OMPT_OPTIONAL
416 if (ompt_enabled.ompt_callback_work) {
417 ompt_callbacks.ompt_callback(ompt_callback_work)(
418 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
419 &(task_info->task_data), trip_count, codeptr);
420 }
421 #endif
422
423 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
424 return;
425 }
426
427 template <typename T>
__kmp_dist_for_static_init(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,T * plower,T * pupper,T * pupperDist,typename traits_t<T>::signed_t * pstride,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk)428 static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
429 kmp_int32 schedule, kmp_int32 *plastiter,
430 T *plower, T *pupper, T *pupperDist,
431 typename traits_t<T>::signed_t *pstride,
432 typename traits_t<T>::signed_t incr,
433 typename traits_t<T>::signed_t chunk) {
434 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
435 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
436 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
437 typedef typename traits_t<T>::unsigned_t UT;
438 typedef typename traits_t<T>::signed_t ST;
439 kmp_uint32 tid;
440 kmp_uint32 nth;
441 kmp_uint32 team_id;
442 kmp_uint32 nteams;
443 UT trip_count;
444 kmp_team_t *team;
445 kmp_info_t *th;
446
447 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
448 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
449 __kmp_assert_valid_gtid(gtid);
450 #ifdef KMP_DEBUG
451 {
452 char *buff;
453 // create format specifiers before the debug output
454 buff = __kmp_str_format(
455 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
456 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
457 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
458 traits_t<ST>::spec, traits_t<T>::spec);
459 KD_TRACE(100,
460 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
461 __kmp_str_free(&buff);
462 }
463 #endif
464
465 if (__kmp_env_consistency_check) {
466 __kmp_push_workshare(gtid, ct_pdo, loc);
467 if (incr == 0) {
468 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
469 loc);
470 }
471 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
472 // The loop is illegal.
473 // Some zero-trip loops maintained by compiler, e.g.:
474 // for(i=10;i<0;++i) // lower >= upper - run-time check
475 // for(i=0;i>10;--i) // lower <= upper - run-time check
476 // for(i=0;i>10;++i) // incr > 0 - compile-time check
477 // for(i=10;i<0;--i) // incr < 0 - compile-time check
478 // Compiler does not check the following illegal loops:
479 // for(i=0;i<10;i+=incr) // where incr<0
480 // for(i=10;i>0;i-=incr) // where incr<0
481 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
482 }
483 }
484 tid = __kmp_tid_from_gtid(gtid);
485 th = __kmp_threads[gtid];
486 nth = th->th.th_team_nproc;
487 team = th->th.th_team;
488 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
489 nteams = th->th.th_teams_size.nteams;
490 team_id = team->t.t_master_tid;
491 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
492
493 // compute global trip count
494 if (incr == 1) {
495 trip_count = *pupper - *plower + 1;
496 } else if (incr == -1) {
497 trip_count = *plower - *pupper + 1;
498 } else if (incr > 0) {
499 // upper-lower can exceed the limit of signed type
500 trip_count = (UT)(*pupper - *plower) / incr + 1;
501 } else {
502 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
503 }
504
505 *pstride = *pupper - *plower; // just in case (can be unused)
506 if (trip_count <= nteams) {
507 KMP_DEBUG_ASSERT(
508 __kmp_static == kmp_sch_static_greedy ||
509 __kmp_static ==
510 kmp_sch_static_balanced); // Unknown static scheduling type.
511 // only masters of some teams get single iteration, other threads get
512 // nothing
513 if (team_id < trip_count && tid == 0) {
514 *pupper = *pupperDist = *plower = *plower + team_id * incr;
515 } else {
516 *pupperDist = *pupper;
517 *plower = *pupper + incr; // compiler should skip loop body
518 }
519 if (plastiter != NULL)
520 *plastiter = (tid == 0 && team_id == trip_count - 1);
521 } else {
522 // Get the team's chunk first (each team gets at most one chunk)
523 if (__kmp_static == kmp_sch_static_balanced) {
524 UT chunkD = trip_count / nteams;
525 UT extras = trip_count % nteams;
526 *plower +=
527 incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
528 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
529 if (plastiter != NULL)
530 *plastiter = (team_id == nteams - 1);
531 } else {
532 T chunk_inc_count =
533 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
534 T upper = *pupper;
535 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
536 // Unknown static scheduling type.
537 *plower += team_id * chunk_inc_count;
538 *pupperDist = *plower + chunk_inc_count - incr;
539 // Check/correct bounds if needed
540 if (incr > 0) {
541 if (*pupperDist < *plower)
542 *pupperDist = traits_t<T>::max_value;
543 if (plastiter != NULL)
544 *plastiter = *plower <= upper && *pupperDist > upper - incr;
545 if (*pupperDist > upper)
546 *pupperDist = upper; // tracker C73258
547 if (*plower > *pupperDist) {
548 *pupper = *pupperDist; // no iterations available for the team
549 goto end;
550 }
551 } else {
552 if (*pupperDist > *plower)
553 *pupperDist = traits_t<T>::min_value;
554 if (plastiter != NULL)
555 *plastiter = *plower >= upper && *pupperDist < upper - incr;
556 if (*pupperDist < upper)
557 *pupperDist = upper; // tracker C73258
558 if (*plower < *pupperDist) {
559 *pupper = *pupperDist; // no iterations available for the team
560 goto end;
561 }
562 }
563 }
564 // Get the parallel loop chunk now (for thread)
565 // compute trip count for team's chunk
566 if (incr == 1) {
567 trip_count = *pupperDist - *plower + 1;
568 } else if (incr == -1) {
569 trip_count = *plower - *pupperDist + 1;
570 } else if (incr > 1) {
571 // upper-lower can exceed the limit of signed type
572 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
573 } else {
574 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
575 }
576 KMP_DEBUG_ASSERT(trip_count);
577 switch (schedule) {
578 case kmp_sch_static: {
579 if (trip_count <= nth) {
580 KMP_DEBUG_ASSERT(
581 __kmp_static == kmp_sch_static_greedy ||
582 __kmp_static ==
583 kmp_sch_static_balanced); // Unknown static scheduling type.
584 if (tid < trip_count)
585 *pupper = *plower = *plower + tid * incr;
586 else
587 *plower = *pupper + incr; // no iterations available
588 if (plastiter != NULL)
589 if (*plastiter != 0 && !(tid == trip_count - 1))
590 *plastiter = 0;
591 } else {
592 if (__kmp_static == kmp_sch_static_balanced) {
593 UT chunkL = trip_count / nth;
594 UT extras = trip_count % nth;
595 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
596 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
597 if (plastiter != NULL)
598 if (*plastiter != 0 && !(tid == nth - 1))
599 *plastiter = 0;
600 } else {
601 T chunk_inc_count =
602 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
603 T upper = *pupperDist;
604 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
605 // Unknown static scheduling type.
606 *plower += tid * chunk_inc_count;
607 *pupper = *plower + chunk_inc_count - incr;
608 if (incr > 0) {
609 if (*pupper < *plower)
610 *pupper = traits_t<T>::max_value;
611 if (plastiter != NULL)
612 if (*plastiter != 0 &&
613 !(*plower <= upper && *pupper > upper - incr))
614 *plastiter = 0;
615 if (*pupper > upper)
616 *pupper = upper; // tracker C73258
617 } else {
618 if (*pupper > *plower)
619 *pupper = traits_t<T>::min_value;
620 if (plastiter != NULL)
621 if (*plastiter != 0 &&
622 !(*plower >= upper && *pupper < upper - incr))
623 *plastiter = 0;
624 if (*pupper < upper)
625 *pupper = upper; // tracker C73258
626 }
627 }
628 }
629 break;
630 }
631 case kmp_sch_static_chunked: {
632 ST span;
633 if (chunk < 1)
634 chunk = 1;
635 span = chunk * incr;
636 *pstride = span * nth;
637 *plower = *plower + (span * tid);
638 *pupper = *plower + span - incr;
639 if (plastiter != NULL)
640 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
641 *plastiter = 0;
642 break;
643 }
644 default:
645 KMP_ASSERT2(0,
646 "__kmpc_dist_for_static_init: unknown loop scheduling type");
647 break;
648 }
649 }
650 end:;
651 #ifdef KMP_DEBUG
652 {
653 char *buff;
654 // create format specifiers before the debug output
655 buff = __kmp_str_format(
656 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
657 "stride=%%%s signed?<%s>\n",
658 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
659 traits_t<ST>::spec, traits_t<T>::spec);
660 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
661 __kmp_str_free(&buff);
662 }
663 #endif
664 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
665 KMP_STATS_LOOP_END(OMP_distribute_iterations);
666 return;
667 }
668
669 template <typename T>
__kmp_team_static_init(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,T * p_lb,T * p_ub,typename traits_t<T>::signed_t * p_st,typename traits_t<T>::signed_t incr,typename traits_t<T>::signed_t chunk)670 static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
671 kmp_int32 *p_last, T *p_lb, T *p_ub,
672 typename traits_t<T>::signed_t *p_st,
673 typename traits_t<T>::signed_t incr,
674 typename traits_t<T>::signed_t chunk) {
675 // The routine returns the first chunk distributed to the team and
676 // stride for next chunks calculation.
677 // Last iteration flag set for the team that will execute
678 // the last iteration of the loop.
679 // The routine is called for dist_schedule(static,chunk) only.
680 typedef typename traits_t<T>::unsigned_t UT;
681 typedef typename traits_t<T>::signed_t ST;
682 kmp_uint32 team_id;
683 kmp_uint32 nteams;
684 UT trip_count;
685 T lower;
686 T upper;
687 ST span;
688 kmp_team_t *team;
689 kmp_info_t *th;
690
691 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
692 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
693 __kmp_assert_valid_gtid(gtid);
694 #ifdef KMP_DEBUG
695 {
696 char *buff;
697 // create format specifiers before the debug output
698 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
699 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
700 traits_t<T>::spec, traits_t<T>::spec,
701 traits_t<ST>::spec, traits_t<ST>::spec,
702 traits_t<T>::spec);
703 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
704 __kmp_str_free(&buff);
705 }
706 #endif
707
708 lower = *p_lb;
709 upper = *p_ub;
710 if (__kmp_env_consistency_check) {
711 if (incr == 0) {
712 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
713 loc);
714 }
715 if (incr > 0 ? (upper < lower) : (lower < upper)) {
716 // The loop is illegal.
717 // Some zero-trip loops maintained by compiler, e.g.:
718 // for(i=10;i<0;++i) // lower >= upper - run-time check
719 // for(i=0;i>10;--i) // lower <= upper - run-time check
720 // for(i=0;i>10;++i) // incr > 0 - compile-time check
721 // for(i=10;i<0;--i) // incr < 0 - compile-time check
722 // Compiler does not check the following illegal loops:
723 // for(i=0;i<10;i+=incr) // where incr<0
724 // for(i=10;i>0;i-=incr) // where incr<0
725 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
726 }
727 }
728 th = __kmp_threads[gtid];
729 team = th->th.th_team;
730 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
731 nteams = th->th.th_teams_size.nteams;
732 team_id = team->t.t_master_tid;
733 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
734
735 // compute trip count
736 if (incr == 1) {
737 trip_count = upper - lower + 1;
738 } else if (incr == -1) {
739 trip_count = lower - upper + 1;
740 } else if (incr > 0) {
741 // upper-lower can exceed the limit of signed type
742 trip_count = (UT)(upper - lower) / incr + 1;
743 } else {
744 trip_count = (UT)(lower - upper) / (-incr) + 1;
745 }
746 if (chunk < 1)
747 chunk = 1;
748 span = chunk * incr;
749 *p_st = span * nteams;
750 *p_lb = lower + (span * team_id);
751 *p_ub = *p_lb + span - incr;
752 if (p_last != NULL)
753 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
754 // Correct upper bound if needed
755 if (incr > 0) {
756 if (*p_ub < *p_lb) // overflow?
757 *p_ub = traits_t<T>::max_value;
758 if (*p_ub > upper)
759 *p_ub = upper; // tracker C73258
760 } else { // incr < 0
761 if (*p_ub > *p_lb)
762 *p_ub = traits_t<T>::min_value;
763 if (*p_ub < upper)
764 *p_ub = upper; // tracker C73258
765 }
766 #ifdef KMP_DEBUG
767 {
768 char *buff;
769 // create format specifiers before the debug output
770 buff =
771 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
772 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
773 traits_t<T>::spec, traits_t<T>::spec,
774 traits_t<ST>::spec, traits_t<ST>::spec);
775 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
776 __kmp_str_free(&buff);
777 }
778 #endif
779 }
780
781 //------------------------------------------------------------------------------
782 extern "C" {
783 /*!
784 @ingroup WORK_SHARING
785 @param loc Source code location
786 @param gtid Global thread id of this thread
787 @param schedtype Scheduling type
788 @param plastiter Pointer to the "last iteration" flag
789 @param plower Pointer to the lower bound
790 @param pupper Pointer to the upper bound
791 @param pstride Pointer to the stride
792 @param incr Loop increment
793 @param chunk The chunk size
794
795 Each of the four functions here are identical apart from the argument types.
796
797 The functions compute the upper and lower bounds and stride to be used for the
798 set of iterations to be executed by the current thread from the statically
799 scheduled loop that is described by the initial values of the bounds, stride,
800 increment and chunk size.
801
802 @{
803 */
__kmpc_for_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_int32 * plower,kmp_int32 * pupper,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)804 void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
805 kmp_int32 *plastiter, kmp_int32 *plower,
806 kmp_int32 *pupper, kmp_int32 *pstride,
807 kmp_int32 incr, kmp_int32 chunk) {
808 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
809 pupper, pstride, incr, chunk
810 #if OMPT_SUPPORT && OMPT_OPTIONAL
811 ,
812 OMPT_GET_RETURN_ADDRESS(0)
813 #endif
814 );
815 }
816
817 /*!
818 See @ref __kmpc_for_static_init_4
819 */
__kmpc_for_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_uint32 * plower,kmp_uint32 * pupper,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)820 void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
821 kmp_int32 schedtype, kmp_int32 *plastiter,
822 kmp_uint32 *plower, kmp_uint32 *pupper,
823 kmp_int32 *pstride, kmp_int32 incr,
824 kmp_int32 chunk) {
825 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
826 pupper, pstride, incr, chunk
827 #if OMPT_SUPPORT && OMPT_OPTIONAL
828 ,
829 OMPT_GET_RETURN_ADDRESS(0)
830 #endif
831 );
832 }
833
834 /*!
835 See @ref __kmpc_for_static_init_4
836 */
__kmpc_for_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_int64 * plower,kmp_int64 * pupper,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)837 void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
838 kmp_int32 *plastiter, kmp_int64 *plower,
839 kmp_int64 *pupper, kmp_int64 *pstride,
840 kmp_int64 incr, kmp_int64 chunk) {
841 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
842 pupper, pstride, incr, chunk
843 #if OMPT_SUPPORT && OMPT_OPTIONAL
844 ,
845 OMPT_GET_RETURN_ADDRESS(0)
846 #endif
847 );
848 }
849
850 /*!
851 See @ref __kmpc_for_static_init_4
852 */
__kmpc_for_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedtype,kmp_int32 * plastiter,kmp_uint64 * plower,kmp_uint64 * pupper,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)853 void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
854 kmp_int32 schedtype, kmp_int32 *plastiter,
855 kmp_uint64 *plower, kmp_uint64 *pupper,
856 kmp_int64 *pstride, kmp_int64 incr,
857 kmp_int64 chunk) {
858 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
859 pupper, pstride, incr, chunk
860 #if OMPT_SUPPORT && OMPT_OPTIONAL
861 ,
862 OMPT_GET_RETURN_ADDRESS(0)
863 #endif
864 );
865 }
866 /*!
867 @}
868 */
869
870 /*!
871 @ingroup WORK_SHARING
872 @param loc Source code location
873 @param gtid Global thread id of this thread
874 @param schedule Scheduling type for the parallel loop
875 @param plastiter Pointer to the "last iteration" flag
876 @param plower Pointer to the lower bound
877 @param pupper Pointer to the upper bound of loop chunk
878 @param pupperD Pointer to the upper bound of dist_chunk
879 @param pstride Pointer to the stride for parallel loop
880 @param incr Loop increment
881 @param chunk The chunk size for the parallel loop
882
883 Each of the four functions here are identical apart from the argument types.
884
885 The functions compute the upper and lower bounds and strides to be used for the
886 set of iterations to be executed by the current thread from the statically
887 scheduled loop that is described by the initial values of the bounds, strides,
888 increment and chunks for parallel loop and distribute constructs.
889
890 @{
891 */
__kmpc_dist_for_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_int32 * plower,kmp_int32 * pupper,kmp_int32 * pupperD,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)892 void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
893 kmp_int32 schedule, kmp_int32 *plastiter,
894 kmp_int32 *plower, kmp_int32 *pupper,
895 kmp_int32 *pupperD, kmp_int32 *pstride,
896 kmp_int32 incr, kmp_int32 chunk) {
897 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
898 pupper, pupperD, pstride, incr, chunk);
899 }
900
901 /*!
902 See @ref __kmpc_dist_for_static_init_4
903 */
__kmpc_dist_for_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_uint32 * plower,kmp_uint32 * pupper,kmp_uint32 * pupperD,kmp_int32 * pstride,kmp_int32 incr,kmp_int32 chunk)904 void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
905 kmp_int32 schedule, kmp_int32 *plastiter,
906 kmp_uint32 *plower, kmp_uint32 *pupper,
907 kmp_uint32 *pupperD, kmp_int32 *pstride,
908 kmp_int32 incr, kmp_int32 chunk) {
909 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
910 pupper, pupperD, pstride, incr, chunk);
911 }
912
913 /*!
914 See @ref __kmpc_dist_for_static_init_4
915 */
__kmpc_dist_for_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_int64 * plower,kmp_int64 * pupper,kmp_int64 * pupperD,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)916 void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
917 kmp_int32 schedule, kmp_int32 *plastiter,
918 kmp_int64 *plower, kmp_int64 *pupper,
919 kmp_int64 *pupperD, kmp_int64 *pstride,
920 kmp_int64 incr, kmp_int64 chunk) {
921 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
922 pupper, pupperD, pstride, incr, chunk);
923 }
924
925 /*!
926 See @ref __kmpc_dist_for_static_init_4
927 */
__kmpc_dist_for_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 schedule,kmp_int32 * plastiter,kmp_uint64 * plower,kmp_uint64 * pupper,kmp_uint64 * pupperD,kmp_int64 * pstride,kmp_int64 incr,kmp_int64 chunk)928 void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
929 kmp_int32 schedule, kmp_int32 *plastiter,
930 kmp_uint64 *plower, kmp_uint64 *pupper,
931 kmp_uint64 *pupperD, kmp_int64 *pstride,
932 kmp_int64 incr, kmp_int64 chunk) {
933 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
934 pupper, pupperD, pstride, incr, chunk);
935 }
936 /*!
937 @}
938 */
939
940 //------------------------------------------------------------------------------
941 // Auxiliary routines for Distribute Parallel Loop construct implementation
942 // Transfer call to template< type T >
943 // __kmp_team_static_init( ident_t *loc, int gtid,
944 // int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
945
946 /*!
947 @ingroup WORK_SHARING
948 @{
949 @param loc Source location
950 @param gtid Global thread id
951 @param p_last pointer to last iteration flag
952 @param p_lb pointer to Lower bound
953 @param p_ub pointer to Upper bound
954 @param p_st Step (or increment if you prefer)
955 @param incr Loop increment
956 @param chunk The chunk size to block with
957
958 The functions compute the upper and lower bounds and stride to be used for the
959 set of iterations to be executed by the current team from the statically
960 scheduled loop that is described by the initial values of the bounds, stride,
961 increment and chunk for the distribute construct as part of composite distribute
962 parallel loop construct. These functions are all identical apart from the types
963 of the arguments.
964 */
965
__kmpc_team_static_init_4(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_int32 * p_lb,kmp_int32 * p_ub,kmp_int32 * p_st,kmp_int32 incr,kmp_int32 chunk)966 void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
967 kmp_int32 *p_lb, kmp_int32 *p_ub,
968 kmp_int32 *p_st, kmp_int32 incr,
969 kmp_int32 chunk) {
970 KMP_DEBUG_ASSERT(__kmp_init_serial);
971 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
972 chunk);
973 }
974
975 /*!
976 See @ref __kmpc_team_static_init_4
977 */
__kmpc_team_static_init_4u(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_uint32 * p_lb,kmp_uint32 * p_ub,kmp_int32 * p_st,kmp_int32 incr,kmp_int32 chunk)978 void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
979 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
980 kmp_int32 *p_st, kmp_int32 incr,
981 kmp_int32 chunk) {
982 KMP_DEBUG_ASSERT(__kmp_init_serial);
983 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
984 chunk);
985 }
986
987 /*!
988 See @ref __kmpc_team_static_init_4
989 */
__kmpc_team_static_init_8(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_int64 * p_lb,kmp_int64 * p_ub,kmp_int64 * p_st,kmp_int64 incr,kmp_int64 chunk)990 void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
991 kmp_int64 *p_lb, kmp_int64 *p_ub,
992 kmp_int64 *p_st, kmp_int64 incr,
993 kmp_int64 chunk) {
994 KMP_DEBUG_ASSERT(__kmp_init_serial);
995 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
996 chunk);
997 }
998
999 /*!
1000 See @ref __kmpc_team_static_init_4
1001 */
__kmpc_team_static_init_8u(ident_t * loc,kmp_int32 gtid,kmp_int32 * p_last,kmp_uint64 * p_lb,kmp_uint64 * p_ub,kmp_int64 * p_st,kmp_int64 incr,kmp_int64 chunk)1002 void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1003 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1004 kmp_int64 *p_st, kmp_int64 incr,
1005 kmp_int64 chunk) {
1006 KMP_DEBUG_ASSERT(__kmp_init_serial);
1007 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1008 chunk);
1009 }
1010 /*!
1011 @}
1012 */
1013
1014 } // extern "C"
1015