1 //===--- omptarget.cu - OpenMP GPU initialization ---------------- CUDA -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the initialization code for the GPU
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "common/omptarget.h"
14 #include "target_impl.h"
15
16 ////////////////////////////////////////////////////////////////////////////////
17 // global data tables
18 ////////////////////////////////////////////////////////////////////////////////
19
20 extern DEVICE
21 omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
22 omptarget_nvptx_device_State[MAX_SM];
23
24 ////////////////////////////////////////////////////////////////////////////////
25 // init entry points
26 ////////////////////////////////////////////////////////////////////////////////
27
__kmpc_kernel_init(int ThreadLimit,int16_t RequiresOMPRuntime)28 EXTERN void __kmpc_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime) {
29 PRINT(LD_IO, "call to __kmpc_kernel_init with version %f\n",
30 OMPTARGET_NVPTX_VERSION);
31 ASSERT0(LT_FUSSY, RequiresOMPRuntime,
32 "Generic always requires initialized runtime.");
33 setExecutionParameters(Generic, RuntimeInitialized);
34 for (int I = 0; I < MAX_THREADS_PER_TEAM / WARPSIZE; ++I)
35 parallelLevel[I] = 0;
36
37 int threadIdInBlock = GetThreadIdInBlock();
38 ASSERT0(LT_FUSSY, threadIdInBlock == GetMasterThreadID(),
39 "__kmpc_kernel_init() must be called by team master warp only!");
40 PRINT0(LD_IO, "call to __kmpc_kernel_init for master\n");
41
42 // Get a state object from the queue.
43 int slot = __kmpc_impl_smid() % MAX_SM;
44 usedSlotIdx = slot;
45 omptarget_nvptx_threadPrivateContext =
46 omptarget_nvptx_device_State[slot].Dequeue();
47
48 // init thread private
49 int threadId = GetLogicalThreadIdInBlock(/*isSPMDExecutionMode=*/false);
50 omptarget_nvptx_threadPrivateContext->InitThreadPrivateContext(threadId);
51
52 // init team context
53 omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor();
54 currTeamDescr.InitTeamDescr();
55 // this thread will start execution... has to update its task ICV
56 // to point to the level zero task ICV. That ICV was init in
57 // InitTeamDescr()
58 omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(
59 threadId, currTeamDescr.LevelZeroTaskDescr());
60
61 // set number of threads and thread limit in team to started value
62 omptarget_nvptx_TaskDescr *currTaskDescr =
63 omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(threadId);
64 nThreads = GetNumberOfThreadsInBlock();
65 threadLimit = ThreadLimit;
66 __kmpc_impl_target_init();
67 }
68
__kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized)69 EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized) {
70 PRINT0(LD_IO, "call to __kmpc_kernel_deinit\n");
71 ASSERT0(LT_FUSSY, IsOMPRuntimeInitialized,
72 "Generic always requires initialized runtime.");
73 // Enqueue omp state object for use by another team.
74 int slot = usedSlotIdx;
75 omptarget_nvptx_device_State[slot].Enqueue(
76 omptarget_nvptx_threadPrivateContext);
77 // Done with work. Kill the workers.
78 omptarget_nvptx_workFn = 0;
79 }
80
__kmpc_spmd_kernel_init(int ThreadLimit,int16_t RequiresOMPRuntime)81 EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime) {
82 PRINT0(LD_IO, "call to __kmpc_spmd_kernel_init\n");
83
84 setExecutionParameters(Spmd, RequiresOMPRuntime ? RuntimeInitialized
85 : RuntimeUninitialized);
86 int threadId = GetThreadIdInBlock();
87 if (threadId == 0) {
88 usedSlotIdx = __kmpc_impl_smid() % MAX_SM;
89 parallelLevel[0] =
90 1 + (GetNumberOfThreadsInBlock() > 1 ? OMP_ACTIVE_PARALLEL_LEVEL : 0);
91 } else if (GetLaneId() == 0) {
92 parallelLevel[GetWarpId()] =
93 1 + (GetNumberOfThreadsInBlock() > 1 ? OMP_ACTIVE_PARALLEL_LEVEL : 0);
94 }
95 if (!RequiresOMPRuntime) {
96 // Runtime is not required - exit.
97 __kmpc_impl_syncthreads();
98 return;
99 }
100
101 //
102 // Team Context Initialization.
103 //
104 // In SPMD mode there is no master thread so use any cuda thread for team
105 // context initialization.
106 if (threadId == 0) {
107 // Get a state object from the queue.
108 omptarget_nvptx_threadPrivateContext =
109 omptarget_nvptx_device_State[usedSlotIdx].Dequeue();
110
111 omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor();
112 omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
113 // init team context
114 currTeamDescr.InitTeamDescr();
115 }
116 __kmpc_impl_syncthreads();
117
118 omptarget_nvptx_TeamDescr &currTeamDescr = getMyTeamDescriptor();
119 omptarget_nvptx_WorkDescr &workDescr = getMyWorkDescriptor();
120
121 //
122 // Initialize task descr for each thread.
123 //
124 omptarget_nvptx_TaskDescr *newTaskDescr =
125 omptarget_nvptx_threadPrivateContext->Level1TaskDescr(threadId);
126 ASSERT0(LT_FUSSY, newTaskDescr, "expected a task descr");
127 newTaskDescr->InitLevelOneTaskDescr(currTeamDescr.LevelZeroTaskDescr());
128 // install new top descriptor
129 omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(threadId,
130 newTaskDescr);
131
132 // init thread private from init value
133 PRINT(LD_PAR,
134 "thread will execute parallel region with id %d in a team of "
135 "%d threads\n",
136 (int)newTaskDescr->ThreadId(), (int)ThreadLimit);
137 }
138
__kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime)139 EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) {
140 // We're not going to pop the task descr stack of each thread since
141 // there are no more parallel regions in SPMD mode.
142 if (!RequiresOMPRuntime)
143 return;
144
145 __kmpc_impl_syncthreads();
146 int threadId = GetThreadIdInBlock();
147 if (threadId == 0) {
148 // Enqueue omp state object for use by another team.
149 int slot = usedSlotIdx;
150 omptarget_nvptx_device_State[slot].Enqueue(
151 omptarget_nvptx_threadPrivateContext);
152 }
153 }
154
155 // Return true if the current target region is executed in SPMD mode.
__kmpc_is_spmd_exec_mode()156 EXTERN int8_t __kmpc_is_spmd_exec_mode() {
157 PRINT0(LD_IO | LD_PAR, "call to __kmpc_is_spmd_exec_mode\n");
158 return isSPMDMode();
159 }
160