1 //===--------- support.cu - GPU OpenMP support functions --------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Wrapper implementation to some functions natively supported by the GPU.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "common/support.h"
14 #include "common/debug.h"
15 #include "common/omptarget.h"
16
17 ////////////////////////////////////////////////////////////////////////////////
18 // Execution Parameters
19 ////////////////////////////////////////////////////////////////////////////////
20
setExecutionParameters(ExecutionMode EMode,RuntimeMode RMode)21 DEVICE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
22 execution_param = EMode;
23 execution_param |= RMode;
24 }
25
isGenericMode()26 DEVICE bool isGenericMode() { return (execution_param & ModeMask) == Generic; }
27
isSPMDMode()28 DEVICE bool isSPMDMode() { return (execution_param & ModeMask) == Spmd; }
29
isRuntimeUninitialized()30 DEVICE bool isRuntimeUninitialized() {
31 return (execution_param & RuntimeMask) == RuntimeUninitialized;
32 }
33
isRuntimeInitialized()34 DEVICE bool isRuntimeInitialized() {
35 return (execution_param & RuntimeMask) == RuntimeInitialized;
36 }
37
38 ////////////////////////////////////////////////////////////////////////////////
39 // Execution Modes based on location parameter fields
40 ////////////////////////////////////////////////////////////////////////////////
41
checkSPMDMode(kmp_Ident * loc)42 DEVICE bool checkSPMDMode(kmp_Ident *loc) {
43 if (!loc)
44 return isSPMDMode();
45
46 // If SPMD is true then we are not in the UNDEFINED state so
47 // we can return immediately.
48 if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
49 return true;
50
51 // If not in SPMD mode and runtime required is a valid
52 // combination of flags so we can return immediately.
53 if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
54 return false;
55
56 // We are in underfined state.
57 return isSPMDMode();
58 }
59
checkGenericMode(kmp_Ident * loc)60 DEVICE bool checkGenericMode(kmp_Ident *loc) {
61 return !checkSPMDMode(loc);
62 }
63
checkRuntimeUninitialized(kmp_Ident * loc)64 DEVICE bool checkRuntimeUninitialized(kmp_Ident *loc) {
65 if (!loc)
66 return isRuntimeUninitialized();
67
68 // If runtime is required then we know we can't be
69 // in the undefined mode. We can return immediately.
70 if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
71 return false;
72
73 // If runtime is required then we need to check is in
74 // SPMD mode or not. If not in SPMD mode then we end
75 // up in the UNDEFINED state that marks the orphaned
76 // functions.
77 if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
78 return true;
79
80 // Check if we are in an UNDEFINED state. Undefined is denoted by
81 // non-SPMD + noRuntimeRequired which is a combination that
82 // cannot actually happen. Undefined states is used to mark orphaned
83 // functions.
84 return isRuntimeUninitialized();
85 }
86
checkRuntimeInitialized(kmp_Ident * loc)87 DEVICE bool checkRuntimeInitialized(kmp_Ident *loc) {
88 return !checkRuntimeUninitialized(loc);
89 }
90
91 ////////////////////////////////////////////////////////////////////////////////
92 // support: get info from machine
93 ////////////////////////////////////////////////////////////////////////////////
94
95 ////////////////////////////////////////////////////////////////////////////////
96 //
97 // Calls to the Generic Scheme Implementation Layer (assuming 1D layout)
98 //
99 ////////////////////////////////////////////////////////////////////////////////
100
101 // The master thread id is the first thread (lane) of the last warp.
102 // Thread id is 0 indexed.
103 // E.g: If NumThreads is 33, master id is 32.
104 // If NumThreads is 64, master id is 32.
105 // If NumThreads is 97, master id is 96.
106 // If NumThreads is 1024, master id is 992.
107 //
108 // Called in Generic Execution Mode only.
GetMasterThreadID()109 DEVICE int GetMasterThreadID() { return (GetNumberOfThreadsInBlock() - 1) & ~(WARPSIZE - 1); }
110
111 // The last warp is reserved for the master; other warps are workers.
112 // Called in Generic Execution Mode only.
GetNumberOfWorkersInTeam()113 DEVICE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
114
115 ////////////////////////////////////////////////////////////////////////////////
116 // get thread id in team
117
118 // This function may be called in a parallel region by the workers
119 // or a serial region by the master. If the master (whose CUDA thread
120 // id is GetMasterThreadID()) calls this routine, we return 0 because
121 // it is a shadow for the first worker.
GetLogicalThreadIdInBlock(bool isSPMDExecutionMode)122 DEVICE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode) {
123 // Implemented using control flow (predication) instead of with a modulo
124 // operation.
125 int tid = GetThreadIdInBlock();
126 if (!isSPMDExecutionMode && tid >= GetMasterThreadID())
127 return 0;
128 else
129 return tid;
130 }
131
132 ////////////////////////////////////////////////////////////////////////////////
133 //
134 // OpenMP Thread Support Layer
135 //
136 ////////////////////////////////////////////////////////////////////////////////
137
GetOmpThreadId(int threadId,bool isSPMDExecutionMode)138 DEVICE int GetOmpThreadId(int threadId, bool isSPMDExecutionMode) {
139 // omp_thread_num
140 int rc;
141 if ((parallelLevel[GetWarpId()] & (OMP_ACTIVE_PARALLEL_LEVEL - 1)) > 1) {
142 rc = 0;
143 } else if (isSPMDExecutionMode) {
144 rc = GetThreadIdInBlock();
145 } else {
146 omptarget_nvptx_TaskDescr *currTaskDescr =
147 omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId);
148 ASSERT0(LT_FUSSY, currTaskDescr, "expected a top task descr");
149 rc = currTaskDescr->ThreadId();
150 }
151 return rc;
152 }
153
GetNumberOfOmpThreads(bool isSPMDExecutionMode)154 DEVICE int GetNumberOfOmpThreads(bool isSPMDExecutionMode) {
155 // omp_num_threads
156 int rc;
157 int Level = parallelLevel[GetWarpId()];
158 if (Level != OMP_ACTIVE_PARALLEL_LEVEL + 1) {
159 rc = 1;
160 } else if (isSPMDExecutionMode) {
161 rc = GetNumberOfThreadsInBlock();
162 } else {
163 rc = threadsInTeam;
164 }
165
166 return rc;
167 }
168
169 ////////////////////////////////////////////////////////////////////////////////
170 // Team id linked to OpenMP
171
GetOmpTeamId()172 DEVICE int GetOmpTeamId() {
173 // omp_team_num
174 return GetBlockIdInKernel(); // assume 1 block per team
175 }
176
GetNumberOfOmpTeams()177 DEVICE int GetNumberOfOmpTeams() {
178 // omp_num_teams
179 return GetNumberOfBlocksInKernel(); // assume 1 block per team
180 }
181
182 ////////////////////////////////////////////////////////////////////////////////
183 // Masters
184
IsTeamMaster(int ompThreadId)185 DEVICE int IsTeamMaster(int ompThreadId) { return (ompThreadId == 0); }
186
187 ////////////////////////////////////////////////////////////////////////////////
188 // Parallel level
189
IncParallelLevel(bool ActiveParallel,__kmpc_impl_lanemask_t Mask)190 DEVICE void IncParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
191 __kmpc_impl_syncwarp(Mask);
192 __kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
193 unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt);
194 if (Rank == 0) {
195 parallelLevel[GetWarpId()] +=
196 (1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
197 __kmpc_impl_threadfence();
198 }
199 __kmpc_impl_syncwarp(Mask);
200 }
201
DecParallelLevel(bool ActiveParallel,__kmpc_impl_lanemask_t Mask)202 DEVICE void DecParallelLevel(bool ActiveParallel, __kmpc_impl_lanemask_t Mask) {
203 __kmpc_impl_syncwarp(Mask);
204 __kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
205 unsigned Rank = __kmpc_impl_popc(Mask & LaneMaskLt);
206 if (Rank == 0) {
207 parallelLevel[GetWarpId()] -=
208 (1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
209 __kmpc_impl_threadfence();
210 }
211 __kmpc_impl_syncwarp(Mask);
212 }
213
214 ////////////////////////////////////////////////////////////////////////////////
215 // get OpenMP number of procs
216
217 // Get the number of processors in the device.
GetNumberOfProcsInDevice(bool isSPMDExecutionMode)218 DEVICE int GetNumberOfProcsInDevice(bool isSPMDExecutionMode) {
219 if (!isSPMDExecutionMode)
220 return GetNumberOfWorkersInTeam();
221 return GetNumberOfThreadsInBlock();
222 }
223
GetNumberOfProcsInTeam(bool isSPMDExecutionMode)224 DEVICE int GetNumberOfProcsInTeam(bool isSPMDExecutionMode) {
225 return GetNumberOfProcsInDevice(isSPMDExecutionMode);
226 }
227
228 ////////////////////////////////////////////////////////////////////////////////
229 // Memory
230 ////////////////////////////////////////////////////////////////////////////////
231
PadBytes(unsigned long size,unsigned long alignment)232 DEVICE unsigned long PadBytes(unsigned long size,
233 unsigned long alignment) // must be a power of 2
234 {
235 // compute the necessary padding to satisfy alignment constraint
236 ASSERT(LT_FUSSY, (alignment & (alignment - 1)) == 0,
237 "alignment %lu is not a power of 2\n", alignment);
238 return (~(unsigned long)size + 1) & (alignment - 1);
239 }
240
SafeMalloc(size_t size,const char * msg)241 DEVICE void *SafeMalloc(size_t size, const char *msg) // check if success
242 {
243 void *ptr = __kmpc_impl_malloc(size);
244 PRINT(LD_MEM, "malloc data of size %llu for %s: 0x%llx\n",
245 (unsigned long long)size, msg, (unsigned long long)ptr);
246 return ptr;
247 }
248
SafeFree(void * ptr,const char * msg)249 DEVICE void *SafeFree(void *ptr, const char *msg) {
250 PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg);
251 __kmpc_impl_free(ptr);
252 return NULL;
253 }
254
255 ////////////////////////////////////////////////////////////////////////////////
256 // Teams Reduction Scratchpad Helpers
257 ////////////////////////////////////////////////////////////////////////////////
258
GetTeamsReductionTimestamp()259 DEVICE unsigned int *GetTeamsReductionTimestamp() {
260 return static_cast<unsigned int *>(ReductionScratchpadPtr);
261 }
262
GetTeamsReductionScratchpad()263 DEVICE char *GetTeamsReductionScratchpad() {
264 return static_cast<char *>(ReductionScratchpadPtr) + 256;
265 }
266
267