1 //===------------- task.h - NVPTX OpenMP tasks support ----------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Task implementation support.
10 //
11 // explicit task structure uses
12 // omptarget_nvptx task
13 // kmp_task
14 //
15 // where kmp_task is
16 // - klegacy_TaskDescr <- task pointer
17 // shared -> X
18 // routine
19 // part_id
20 // descr
21 // - private (of size given by task_alloc call). Accessed by
22 // task+sizeof(klegacy_TaskDescr)
23 // * private data *
24 // - shared: X. Accessed by shared ptr in klegacy_TaskDescr
25 // * pointer table to shared variables *
26 // - end
27 //
28 //===----------------------------------------------------------------------===//
29
30 #include "common/omptarget.h"
31
__kmpc_omp_task_alloc(kmp_Ident * loc,uint32_t global_tid,int32_t flag,size_t sizeOfTaskInclPrivate,size_t sizeOfSharedTable,kmp_TaskFctPtr taskSub)32 EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(
33 kmp_Ident *loc, // unused
34 uint32_t global_tid, // unused
35 int32_t flag, // unused (because in our impl, all are immediately exec
36 size_t sizeOfTaskInclPrivate, size_t sizeOfSharedTable,
37 kmp_TaskFctPtr taskSub) {
38 PRINT(LD_IO,
39 "call __kmpc_omp_task_alloc(size priv&struct %lld, shared %lld, "
40 "fct 0x%llx)\n",
41 (long long)sizeOfTaskInclPrivate, (long long)sizeOfSharedTable,
42 (unsigned long long)taskSub);
43 // want task+priv to be a multiple of 8 bytes
44 size_t padForTaskInclPriv = PadBytes(sizeOfTaskInclPrivate, sizeof(void *));
45 sizeOfTaskInclPrivate += padForTaskInclPriv;
46 size_t kmpSize = sizeOfTaskInclPrivate + sizeOfSharedTable;
47 ASSERT(LT_FUSSY, sizeof(omptarget_nvptx_TaskDescr) % sizeof(void *) == 0,
48 "need task descr of size %d to be a multiple of %d\n",
49 (int)sizeof(omptarget_nvptx_TaskDescr), (int)sizeof(void *));
50 size_t totSize = sizeof(omptarget_nvptx_TaskDescr) + kmpSize;
51 omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
52 (omptarget_nvptx_ExplicitTaskDescr *)SafeMalloc(
53 totSize, "explicit task descriptor");
54 kmp_TaskDescr *newKmpTaskDescr = &newExplicitTaskDescr->kmpTaskDescr;
55 ASSERT0(LT_FUSSY,
56 (uint64_t)newKmpTaskDescr ==
57 (uint64_t)ADD_BYTES(newExplicitTaskDescr,
58 sizeof(omptarget_nvptx_TaskDescr)),
59 "bad size assumptions");
60 // init kmp_TaskDescr
61 newKmpTaskDescr->sharedPointerTable =
62 (void *)((char *)newKmpTaskDescr + sizeOfTaskInclPrivate);
63 newKmpTaskDescr->sub = taskSub;
64 newKmpTaskDescr->destructors = NULL;
65 PRINT(LD_TASK, "return with task descr kmp: 0x%llx, omptarget-nvptx 0x%llx\n",
66 (unsigned long long)newKmpTaskDescr,
67 (unsigned long long)newExplicitTaskDescr);
68
69 return newKmpTaskDescr;
70 }
71
__kmpc_omp_task(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr)72 EXTERN int32_t __kmpc_omp_task(kmp_Ident *loc, uint32_t global_tid,
73 kmp_TaskDescr *newKmpTaskDescr) {
74 return __kmpc_omp_task_with_deps(loc, global_tid, newKmpTaskDescr, 0, 0, 0,
75 0);
76 }
77
__kmpc_omp_task_with_deps(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)78 EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
79 kmp_TaskDescr *newKmpTaskDescr,
80 int32_t depNum, void *depList,
81 int32_t noAliasDepNum,
82 void *noAliasDepList) {
83 PRINT(LD_IO, "call to __kmpc_omp_task_with_deps(task 0x%llx)\n",
84 P64(newKmpTaskDescr));
85 ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
86 "Runtime must be initialized.");
87 // 1. get explicit task descr from kmp task descr
88 omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
89 (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
90 newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr));
91 ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr,
92 "bad assumptions");
93 omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr;
94 ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr,
95 "bad assumptions");
96
97 // 2. push new context: update new task descriptor
98 int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
99 omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid);
100 newTaskDescr->CopyForExplicitTask(parentTaskDescr);
101 // set new task descriptor as top
102 omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid, newTaskDescr);
103
104 // 3. call sub
105 PRINT(LD_TASK, "call task sub 0x%llx(task descr 0x%llx)\n",
106 (unsigned long long)newKmpTaskDescr->sub,
107 (unsigned long long)newKmpTaskDescr);
108 newKmpTaskDescr->sub(0, newKmpTaskDescr);
109 PRINT(LD_TASK, "return from call task sub 0x%llx()\n",
110 (unsigned long long)newKmpTaskDescr->sub);
111
112 // 4. pop context
113 omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid,
114 parentTaskDescr);
115 // 5. free
116 SafeFree(newExplicitTaskDescr, "explicit task descriptor");
117 return 0;
118 }
119
__kmpc_omp_task_begin_if0(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr)120 EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
121 kmp_TaskDescr *newKmpTaskDescr) {
122 PRINT(LD_IO, "call to __kmpc_omp_task_begin_if0(task 0x%llx)\n",
123 (unsigned long long)newKmpTaskDescr);
124 ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
125 "Runtime must be initialized.");
126 // 1. get explicit task descr from kmp task descr
127 omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
128 (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
129 newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr));
130 ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr,
131 "bad assumptions");
132 omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr;
133 ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr,
134 "bad assumptions");
135
136 // 2. push new context: update new task descriptor
137 int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
138 omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid);
139 newTaskDescr->CopyForExplicitTask(parentTaskDescr);
140 // set new task descriptor as top
141 omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid, newTaskDescr);
142 // 3... noting to call... is inline
143 // 4 & 5 ... done in complete
144 }
145
__kmpc_omp_task_complete_if0(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr)146 EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
147 kmp_TaskDescr *newKmpTaskDescr) {
148 PRINT(LD_IO, "call to __kmpc_omp_task_complete_if0(task 0x%llx)\n",
149 (unsigned long long)newKmpTaskDescr);
150 ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
151 "Runtime must be initialized.");
152 // 1. get explicit task descr from kmp task descr
153 omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
154 (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
155 newKmpTaskDescr, sizeof(omptarget_nvptx_TaskDescr));
156 ASSERT0(LT_FUSSY, &newExplicitTaskDescr->kmpTaskDescr == newKmpTaskDescr,
157 "bad assumptions");
158 omptarget_nvptx_TaskDescr *newTaskDescr = &newExplicitTaskDescr->taskDescr;
159 ASSERT0(LT_FUSSY, (uint64_t)newTaskDescr == (uint64_t)newExplicitTaskDescr,
160 "bad assumptions");
161 // 2. get parent
162 omptarget_nvptx_TaskDescr *parentTaskDescr = newTaskDescr->GetPrevTaskDescr();
163 // 3... noting to call... is inline
164 // 4. pop context
165 int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
166 omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid,
167 parentTaskDescr);
168 // 5. free
169 SafeFree(newExplicitTaskDescr, "explicit task descriptor");
170 }
171
__kmpc_omp_wait_deps(kmp_Ident * loc,uint32_t global_tid,int32_t depNum,void * depList,int32_t noAliasDepNum,void * noAliasDepList)172 EXTERN void __kmpc_omp_wait_deps(kmp_Ident *loc, uint32_t global_tid,
173 int32_t depNum, void *depList,
174 int32_t noAliasDepNum, void *noAliasDepList) {
175 PRINT0(LD_IO, "call to __kmpc_omp_wait_deps(..)\n");
176 // nothing to do as all our tasks are executed as final
177 }
178
__kmpc_taskgroup(kmp_Ident * loc,uint32_t global_tid)179 EXTERN void __kmpc_taskgroup(kmp_Ident *loc, uint32_t global_tid) {
180 PRINT0(LD_IO, "call to __kmpc_taskgroup(..)\n");
181 // nothing to do as all our tasks are executed as final
182 }
183
__kmpc_end_taskgroup(kmp_Ident * loc,uint32_t global_tid)184 EXTERN void __kmpc_end_taskgroup(kmp_Ident *loc, uint32_t global_tid) {
185 PRINT0(LD_IO, "call to __kmpc_end_taskgroup(..)\n");
186 // nothing to do as all our tasks are executed as final
187 }
188
__kmpc_omp_taskyield(kmp_Ident * loc,uint32_t global_tid,int end_part)189 EXTERN int32_t __kmpc_omp_taskyield(kmp_Ident *loc, uint32_t global_tid,
190 int end_part) {
191 PRINT0(LD_IO, "call to __kmpc_taskyield()\n");
192 // do nothing: tasks are executed immediately, no yielding allowed
193 return 0;
194 }
195
__kmpc_omp_taskwait(kmp_Ident * loc,uint32_t global_tid)196 EXTERN int32_t __kmpc_omp_taskwait(kmp_Ident *loc, uint32_t global_tid) {
197 PRINT0(LD_IO, "call to __kmpc_taskwait()\n");
198 // nothing to do as all our tasks are executed as final
199 return 0;
200 }
201
__kmpc_taskloop(kmp_Ident * loc,uint32_t global_tid,kmp_TaskDescr * newKmpTaskDescr,int if_val,uint64_t * lb,uint64_t * ub,int64_t st,int nogroup,int32_t sched,uint64_t grainsize,void * task_dup)202 EXTERN void __kmpc_taskloop(kmp_Ident *loc, uint32_t global_tid,
203 kmp_TaskDescr *newKmpTaskDescr, int if_val,
204 uint64_t *lb, uint64_t *ub, int64_t st, int nogroup,
205 int32_t sched, uint64_t grainsize, void *task_dup) {
206
207 // skip task entirely if empty iteration space
208 if (*lb > *ub)
209 return;
210
211 // the compiler has already stored lb and ub in the kmp_TaskDescr structure
212 // as we are using a single task to execute the entire loop, we can leave
213 // the initial task_t untouched
214
215 __kmpc_omp_task_with_deps(loc, global_tid, newKmpTaskDescr, 0, 0, 0, 0);
216 }
217