1 //===----------- device.h - Target independent OpenMP target RTL ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Declarations for managing devices that are handled by RTL plugins. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #ifndef _OMPTARGET_DEVICE_H 14 #define _OMPTARGET_DEVICE_H 15 16 #include <cassert> 17 #include <cstddef> 18 #include <list> 19 #include <map> 20 #include <memory> 21 #include <mutex> 22 #include <set> 23 #include <vector> 24 25 #include "rtl.h" 26 27 // Forward declarations. 28 struct RTLInfoTy; 29 struct __tgt_bin_desc; 30 struct __tgt_target_table; 31 struct __tgt_async_info; 32 class MemoryManagerTy; 33 34 using map_var_info_t = void *; 35 36 // enum for OMP_TARGET_OFFLOAD; keep in sync with kmp.h definition 37 enum kmp_target_offload_kind { 38 tgt_disabled = 0, 39 tgt_default = 1, 40 tgt_mandatory = 2 41 }; 42 typedef enum kmp_target_offload_kind kmp_target_offload_kind_t; 43 44 /// Map between host data and target data. 45 struct HostDataToTargetTy { 46 uintptr_t HstPtrBase; // host info. 47 uintptr_t HstPtrBegin; 48 uintptr_t HstPtrEnd; // non-inclusive. 49 map_var_info_t HstPtrName; // Optional source name of mapped variable. 50 51 uintptr_t TgtPtrBegin; // target info. 52 53 private: 54 /// use mutable to allow modification via std::set iterator which is const. 55 mutable uint64_t RefCount; 56 static const uint64_t INFRefCount = ~(uint64_t)0; 57 58 public: 59 HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E, uintptr_t TB, 60 map_var_info_t Name = nullptr, bool IsINF = false) HstPtrBaseHostDataToTargetTy61 : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), HstPtrName(Name), 62 TgtPtrBegin(TB), RefCount(IsINF ? INFRefCount : 1) {} 63 getRefCountHostDataToTargetTy64 uint64_t getRefCount() const { 65 return RefCount; 66 } 67 resetRefCountHostDataToTargetTy68 uint64_t resetRefCount() const { 69 if (RefCount != INFRefCount) 70 RefCount = 1; 71 72 return RefCount; 73 } 74 incRefCountHostDataToTargetTy75 uint64_t incRefCount() const { 76 if (RefCount != INFRefCount) { 77 ++RefCount; 78 assert(RefCount < INFRefCount && "refcount overflow"); 79 } 80 81 return RefCount; 82 } 83 decRefCountHostDataToTargetTy84 uint64_t decRefCount() const { 85 if (RefCount != INFRefCount) { 86 assert(RefCount > 0 && "refcount underflow"); 87 --RefCount; 88 } 89 90 return RefCount; 91 } 92 isRefCountInfHostDataToTargetTy93 bool isRefCountInf() const { 94 return RefCount == INFRefCount; 95 } 96 }; 97 98 typedef uintptr_t HstPtrBeginTy; 99 inline bool operator<(const HostDataToTargetTy &lhs, const HstPtrBeginTy &rhs) { 100 return lhs.HstPtrBegin < rhs; 101 } 102 inline bool operator<(const HstPtrBeginTy &lhs, const HostDataToTargetTy &rhs) { 103 return lhs < rhs.HstPtrBegin; 104 } 105 inline bool operator<(const HostDataToTargetTy &lhs, 106 const HostDataToTargetTy &rhs) { 107 return lhs.HstPtrBegin < rhs.HstPtrBegin; 108 } 109 110 typedef std::set<HostDataToTargetTy, std::less<>> HostDataToTargetListTy; 111 112 struct LookupResult { 113 struct { 114 unsigned IsContained : 1; 115 unsigned ExtendsBefore : 1; 116 unsigned ExtendsAfter : 1; 117 } Flags; 118 119 HostDataToTargetListTy::iterator Entry; 120 LookupResultLookupResult121 LookupResult() : Flags({0,0,0}), Entry() {} 122 }; 123 124 /// Map for shadow pointers 125 struct ShadowPtrValTy { 126 void *HstPtrVal; 127 void *TgtPtrAddr; 128 void *TgtPtrVal; 129 }; 130 typedef std::map<void *, ShadowPtrValTy> ShadowPtrListTy; 131 132 /// 133 struct PendingCtorDtorListsTy { 134 std::list<void *> PendingCtors; 135 std::list<void *> PendingDtors; 136 }; 137 typedef std::map<__tgt_bin_desc *, PendingCtorDtorListsTy> 138 PendingCtorsDtorsPerLibrary; 139 140 struct DeviceTy { 141 int32_t DeviceID; 142 RTLInfoTy *RTL; 143 int32_t RTLDeviceID; 144 145 bool IsInit; 146 std::once_flag InitFlag; 147 bool HasPendingGlobals; 148 149 HostDataToTargetListTy HostDataToTargetMap; 150 PendingCtorsDtorsPerLibrary PendingCtorsDtors; 151 152 ShadowPtrListTy ShadowPtrMap; 153 154 std::mutex DataMapMtx, PendingGlobalsMtx, ShadowMtx; 155 156 // NOTE: Once libomp gains full target-task support, this state should be 157 // moved into the target task in libomp. 158 std::map<int32_t, uint64_t> LoopTripCnt; 159 160 /// Memory manager 161 std::unique_ptr<MemoryManagerTy> MemoryManager; 162 163 DeviceTy(RTLInfoTy *RTL); 164 165 // The existence of mutexes makes DeviceTy non-copyable. We need to 166 // provide a copy constructor and an assignment operator explicitly. 167 DeviceTy(const DeviceTy &D); 168 169 DeviceTy &operator=(const DeviceTy &D); 170 171 ~DeviceTy(); 172 173 // Return true if data can be copied to DstDevice directly 174 bool isDataExchangable(const DeviceTy& DstDevice); 175 176 uint64_t getMapEntryRefCnt(void *HstPtrBegin); 177 LookupResult lookupMapping(void *HstPtrBegin, int64_t Size); 178 void *getOrAllocTgtPtr(void *HstPtrBegin, void *HstPtrBase, int64_t Size, 179 map_var_info_t HstPtrName, bool &IsNew, 180 bool &IsHostPtr, bool IsImplicit, bool UpdateRefCount, 181 bool HasCloseModifier, bool HasPresentModifier); 182 void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size); 183 void *getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool &IsLast, 184 bool UpdateRefCount, bool &IsHostPtr, 185 bool MustContain = false); 186 int deallocTgtPtr(void *TgtPtrBegin, int64_t Size, bool ForceDelete, 187 bool HasCloseModifier = false); 188 int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size); 189 int disassociatePtr(void *HstPtrBegin); 190 191 // calls to RTL 192 int32_t initOnce(); 193 __tgt_target_table *load_binary(void *Img); 194 195 // device memory allocation/deallocation routines 196 /// Allocates \p Size bytes on the device and returns the address/nullptr when 197 /// succeeds/fails. \p HstPtr is an address of the host data which the 198 /// allocated target data will be associated with. If it is unknown, the 199 /// default value of \p HstPtr is nullptr. Note: this function doesn't do 200 /// pointer association. Actually, all the __tgt_rtl_data_alloc 201 /// implementations ignore \p HstPtr. 202 void *allocData(int64_t Size, void *HstPtr = nullptr); 203 /// Deallocates memory which \p TgtPtrBegin points at and returns 204 /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails. 205 int32_t deleteData(void *TgtPtrBegin); 206 207 // Data transfer. When AsyncInfoPtr is nullptr, the transfer will be 208 // synchronous. 209 // Copy data from host to device 210 int32_t submitData(void *TgtPtrBegin, void *HstPtrBegin, int64_t Size, 211 __tgt_async_info *AsyncInfoPtr); 212 // Copy data from device back to host 213 int32_t retrieveData(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size, 214 __tgt_async_info *AsyncInfoPtr); 215 // Copy data from current device to destination device directly 216 int32_t dataExchange(void *SrcPtr, DeviceTy &DstDev, void *DstPtr, 217 int64_t Size, __tgt_async_info *AsyncInfo); 218 219 int32_t runRegion(void *TgtEntryPtr, void **TgtVarsPtr, ptrdiff_t *TgtOffsets, 220 int32_t TgtVarsSize, __tgt_async_info *AsyncInfoPtr); 221 int32_t runTeamRegion(void *TgtEntryPtr, void **TgtVarsPtr, 222 ptrdiff_t *TgtOffsets, int32_t TgtVarsSize, 223 int32_t NumTeams, int32_t ThreadLimit, 224 uint64_t LoopTripCount, __tgt_async_info *AsyncInfoPtr); 225 226 /// Synchronize device/queue/event based on \p AsyncInfoPtr and return 227 /// OFFLOAD_SUCCESS/OFFLOAD_FAIL when succeeds/fails. 228 int32_t synchronize(__tgt_async_info *AsyncInfoPtr); 229 230 private: 231 // Call to RTL 232 void init(); // To be called only via DeviceTy::initOnce() 233 }; 234 235 /// Map between Device ID (i.e. openmp device id) and its DeviceTy. 236 typedef std::vector<DeviceTy> DevicesTy; 237 238 extern bool device_is_ready(int device_num); 239 240 /// Struct for the data required to handle plugins 241 struct PluginManager { 242 /// RTLs identified on the host 243 RTLsTy RTLs; 244 245 /// Devices associated with RTLs 246 DevicesTy Devices; 247 std::mutex RTLsMtx; ///< For RTLs and Devices 248 249 /// Translation table retreived from the binary 250 HostEntriesBeginToTransTableTy HostEntriesBeginToTransTable; 251 std::mutex TrlTblMtx; ///< For Translation Table 252 253 /// Map from ptrs on the host to an entry in the Translation Table 254 HostPtrToTableMapTy HostPtrToTableMap; 255 std::mutex TblMapMtx; ///< For HostPtrToTableMap 256 257 // Store target policy (disabled, mandatory, default) 258 kmp_target_offload_kind_t TargetOffloadPolicy = tgt_default; 259 std::mutex TargetOffloadMtx; ///< For TargetOffloadPolicy 260 }; 261 262 extern PluginManager *PM; 263 264 #endif 265