1 //===----- CGOpenMPRuntimeNVPTX.h - Interface to OpenMP NVPTX Runtimes ----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This provides a class for OpenMP runtime code generation specialized to NVPTX 11 // targets. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H 16 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H 17 18 #include "CGOpenMPRuntime.h" 19 #include "CodeGenFunction.h" 20 #include "clang/AST/StmtOpenMP.h" 21 #include "llvm/IR/CallSite.h" 22 23 namespace clang { 24 namespace CodeGen { 25 26 class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime { 27 public: 28 class EntryFunctionState { 29 public: 30 llvm::BasicBlock *ExitBB; 31 EntryFunctionState()32 EntryFunctionState() : ExitBB(nullptr){}; 33 }; 34 35 class WorkerFunctionState { 36 public: 37 llvm::Function *WorkerFn; 38 const CGFunctionInfo *CGFI; 39 40 WorkerFunctionState(CodeGenModule &CGM); 41 42 private: 43 void createWorkerFunction(CodeGenModule &CGM); 44 }; 45 46 /// \brief Helper for target entry function. Guide the master and worker 47 /// threads to their respective locations. 48 void emitEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST, 49 WorkerFunctionState &WST); 50 51 /// \brief Signal termination of OMP execution. 52 void emitEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST); 53 54 private: 55 // 56 // NVPTX calls. 57 // 58 59 /// \brief Get the GPU warp size. 60 llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF); 61 62 /// \brief Get the id of the current thread on the GPU. 63 llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF); 64 65 // \brief Get the maximum number of threads in a block of the GPU. 66 llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF); 67 68 /// \brief Get barrier to synchronize all threads in a block. 69 void getNVPTXCTABarrier(CodeGenFunction &CGF); 70 71 // \brief Synchronize all GPU threads in a block. 72 void syncCTAThreads(CodeGenFunction &CGF); 73 74 // 75 // OMP calls. 76 // 77 78 /// \brief Get the thread id of the OMP master thread. 79 /// The master thread id is the first thread (lane) of the last warp in the 80 /// GPU block. Warp size is assumed to be some power of 2. 81 /// Thread id is 0 indexed. 82 /// E.g: If NumThreads is 33, master id is 32. 83 /// If NumThreads is 64, master id is 32. 84 /// If NumThreads is 1024, master id is 992. 85 llvm::Value *getMasterThreadID(CodeGenFunction &CGF); 86 87 // 88 // Private state and methods. 89 // 90 91 // Master-worker control state. 92 // Number of requested OMP threads in parallel region. 93 llvm::GlobalVariable *ActiveWorkers; 94 // Outlined function for the workers to execute. 95 llvm::GlobalVariable *WorkID; 96 97 /// \brief Initialize master-worker control state. 98 void initializeEnvironment(); 99 100 /// \brief Emit the worker function for the current target region. 101 void emitWorkerFunction(WorkerFunctionState &WST); 102 103 /// \brief Helper for worker function. Emit body of worker loop. 104 void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST); 105 106 /// \brief Returns specified OpenMP runtime function for the current OpenMP 107 /// implementation. Specialized for the NVPTX device. 108 /// \param Function OpenMP runtime function. 109 /// \return Specified function. 110 llvm::Constant *createNVPTXRuntimeFunction(unsigned Function); 111 112 // 113 // Base class overrides. 114 // 115 116 /// \brief Creates offloading entry for the provided entry ID \a ID, 117 /// address \a Addr and size \a Size. 118 void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr, 119 uint64_t Size) override; 120 121 /// \brief Emit outlined function for 'target' directive on the NVPTX 122 /// device. 123 /// \param D Directive to emit. 124 /// \param ParentName Name of the function that encloses the target region. 125 /// \param OutlinedFn Outlined function value to be defined by this call. 126 /// \param OutlinedFnID Outlined function ID value to be defined by this call. 127 /// \param IsOffloadEntry True if the outlined function is an offload entry. 128 /// An outlined function may not be an entry if, e.g. the if clause always 129 /// evaluates to false. 130 void emitTargetOutlinedFunction(const OMPExecutableDirective &D, 131 StringRef ParentName, 132 llvm::Function *&OutlinedFn, 133 llvm::Constant *&OutlinedFnID, 134 bool IsOffloadEntry, 135 const RegionCodeGenTy &CodeGen) override; 136 137 public: 138 explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); 139 140 /// \brief This function ought to emit, in the general case, a call to 141 // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed 142 // as these numbers are obtained through the PTX grid and block configuration. 143 /// \param NumTeams An integer expression of teams. 144 /// \param ThreadLimit An integer expression of threads. 145 void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams, 146 const Expr *ThreadLimit, SourceLocation Loc) override; 147 148 /// \brief Emits inlined function for the specified OpenMP parallel 149 // directive but an inlined function for teams. 150 /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID, 151 /// kmp_int32 BoundID, struct context_vars*). 152 /// \param D OpenMP directive. 153 /// \param ThreadIDVar Variable for thread id in the current OpenMP region. 154 /// \param InnermostKind Kind of innermost directive (for simple directives it 155 /// is a directive itself, for combined - its innermost directive). 156 /// \param CodeGen Code generation sequence for the \a D directive. 157 llvm::Value * 158 emitParallelOrTeamsOutlinedFunction(const OMPExecutableDirective &D, 159 const VarDecl *ThreadIDVar, 160 OpenMPDirectiveKind InnermostKind, 161 const RegionCodeGenTy &CodeGen) override; 162 163 /// \brief Emits code for teams call of the \a OutlinedFn with 164 /// variables captured in a record which address is stored in \a 165 /// CapturedStruct. 166 /// \param OutlinedFn Outlined function to be run by team masters. Type of 167 /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*). 168 /// \param CapturedVars A pointer to the record with the references to 169 /// variables used in \a OutlinedFn function. 170 /// 171 void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D, 172 SourceLocation Loc, llvm::Value *OutlinedFn, 173 ArrayRef<llvm::Value *> CapturedVars) override; 174 }; 175 176 } // CodeGen namespace. 177 } // clang namespace. 178 179 #endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H 180