Lines Matching refs:DeviceId
147 void resizeStreamPool(const int DeviceId, const size_t NewSize) { in resizeStreamPool() argument
148 std::vector<CUstream> &Pool = StreamPool[DeviceId]; in resizeStreamPool()
152 CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context); in resizeStreamPool()
215 CUstream getStream(const int DeviceId) { in getStream() argument
216 const std::lock_guard<std::mutex> Lock(*StreamMtx[DeviceId]); in getStream()
217 int &Id = NextStreamId[DeviceId]; in getStream()
219 if (Id == StreamPool[DeviceId].size()) { in getStream()
221 resizeStreamPool(DeviceId, Id * 2); in getStream()
223 return StreamPool[DeviceId][Id++]; in getStream()
240 void returnStream(const int DeviceId, CUstream Stream) { in returnStream() argument
241 const std::lock_guard<std::mutex> Lock(*StreamMtx[DeviceId]); in returnStream()
242 int &Id = NextStreamId[DeviceId]; in returnStream()
244 StreamPool[DeviceId][--Id] = Stream; in returnStream()
247 bool initializeDeviceStreamPool(const int DeviceId) { in initializeDeviceStreamPool() argument
248 assert(StreamPool[DeviceId].empty() && "stream pool has been initialized"); in initializeDeviceStreamPool()
250 resizeStreamPool(DeviceId, EnvNumInitialStreams); in initializeDeviceStreamPool()
253 if (StreamPool[DeviceId].size() != EnvNumInitialStreams) in initializeDeviceStreamPool()
257 for (CUstream &S : StreamPool[DeviceId]) in initializeDeviceStreamPool()
283 void addOffloadEntry(const int DeviceId, const __tgt_offload_entry entry) { in addOffloadEntry() argument
284 FuncOrGblEntryTy &E = DeviceData[DeviceId].FuncGblEntries.back(); in addOffloadEntry()
289 const __tgt_offload_entry *getOffloadEntry(const int DeviceId, in getOffloadEntry() argument
292 DeviceData[DeviceId].FuncGblEntries.back().Entries) in getOffloadEntry()
300 __tgt_target_table *getOffloadEntriesTable(const int DeviceId) { in getOffloadEntriesTable() argument
301 FuncOrGblEntryTy &E = DeviceData[DeviceId].FuncGblEntries.back(); in getOffloadEntriesTable()
314 void clearOffloadEntriesTable(const int DeviceId) { in clearOffloadEntriesTable() argument
315 DeviceData[DeviceId].FuncGblEntries.emplace_back(); in clearOffloadEntriesTable()
316 FuncOrGblEntryTy &E = DeviceData[DeviceId].FuncGblEntries.back(); in clearOffloadEntriesTable()
321 CUstream getStream(const int DeviceId, __tgt_async_info *AsyncInfoPtr) const { in getStream() argument
325 AsyncInfoPtr->Queue = StreamManager->getStream(DeviceId); in getStream()
397 bool isValidDeviceId(const int DeviceId) const { in isValidDeviceId()
398 return DeviceId >= 0 && DeviceId < NumberOfDevices; in isValidDeviceId()
405 int initDevice(const int DeviceId) { in initDevice() argument
408 DP("Getting device %d\n", DeviceId); in initDevice()
409 CUresult Err = cuDeviceGet(&Device, DeviceId); in initDevice()
437 Err = cuDevicePrimaryCtxRetain(&DeviceData[DeviceId].Context, Device); in initDevice()
441 Err = cuCtxSetCurrent(DeviceData[DeviceId].Context); in initDevice()
446 if (!StreamManager->initializeDeviceStreamPool(DeviceId)) in initDevice()
456 DeviceData[DeviceId].BlocksPerGrid = DeviceRTLTy::DefaultNumTeams; in initDevice()
459 DeviceData[DeviceId].BlocksPerGrid = MaxGridDimX; in initDevice()
464 DeviceData[DeviceId].BlocksPerGrid = DeviceRTLTy::HardTeamLimit; in initDevice()
474 DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::DefaultNumThreads; in initDevice()
477 DeviceData[DeviceId].ThreadsPerBlock = MaxBlockDimX; in initDevice()
482 DeviceData[DeviceId].ThreadsPerBlock = DeviceRTLTy::HardThreadLimit; in initDevice()
491 DeviceData[DeviceId].WarpSize = 32; in initDevice()
494 DeviceData[DeviceId].WarpSize = WarpSize; in initDevice()
498 if (EnvTeamLimit > 0 && DeviceData[DeviceId].BlocksPerGrid > EnvTeamLimit) { in initDevice()
501 DeviceData[DeviceId].BlocksPerGrid = EnvTeamLimit; in initDevice()
504 INFO(DeviceId, in initDevice()
507 DeviceData[DeviceId].BlocksPerGrid, in initDevice()
508 DeviceData[DeviceId].ThreadsPerBlock, DeviceData[DeviceId].WarpSize); in initDevice()
514 DeviceData[DeviceId].NumTeams = EnvNumTeams; in initDevice()
516 DeviceData[DeviceId].NumTeams = DeviceRTLTy::DefaultNumTeams; in initDevice()
521 if (DeviceData[DeviceId].NumTeams > DeviceData[DeviceId].BlocksPerGrid) { in initDevice()
523 DeviceData[DeviceId].BlocksPerGrid); in initDevice()
524 DeviceData[DeviceId].NumTeams = DeviceData[DeviceId].BlocksPerGrid; in initDevice()
528 DeviceData[DeviceId].NumThreads = DeviceRTLTy::DefaultNumThreads; in initDevice()
531 if (DeviceData[DeviceId].NumThreads > in initDevice()
532 DeviceData[DeviceId].ThreadsPerBlock) { in initDevice()
534 DeviceData[DeviceId].ThreadsPerBlock); in initDevice()
535 DeviceData[DeviceId].NumTeams = DeviceData[DeviceId].ThreadsPerBlock; in initDevice()
541 __tgt_target_table *loadBinary(const int DeviceId, in loadBinary() argument
544 CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context); in loadBinary()
549 clearOffloadEntriesTable(DeviceId); in loadBinary()
566 std::list<KernelTy> &KernelsList = DeviceData[DeviceId].KernelsList; in loadBinary()
618 addOffloadEntry(DeviceId, Entry); in loadBinary()
676 addOffloadEntry(DeviceId, Entry); in loadBinary()
719 return getOffloadEntriesTable(DeviceId); in loadBinary()
722 void *dataAlloc(const int DeviceId, const int64_t Size) const { in dataAlloc() argument
726 CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context); in dataAlloc()
738 int dataSubmit(const int DeviceId, const void *TgtPtr, const void *HstPtr, in dataSubmit() argument
742 CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context); in dataSubmit()
746 CUstream Stream = getStream(DeviceId, AsyncInfoPtr); in dataSubmit()
760 int dataRetrieve(const int DeviceId, void *HstPtr, const void *TgtPtr, in dataRetrieve() argument
764 CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context); in dataRetrieve()
768 CUstream Stream = getStream(DeviceId, AsyncInfoPtr); in dataRetrieve()
833 int dataDelete(const int DeviceId, void *TgtPtr) const { in dataDelete() argument
834 CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context); in dataDelete()
845 int runTargetTeamRegion(const int DeviceId, void *TgtEntryPtr, void **TgtArgs, in runTargetTeamRegion() argument
850 CUresult Err = cuCtxSetCurrent(DeviceData[DeviceId].Context); in runTargetTeamRegion()
871 DP("Adding master warp: +%d threads\n", DeviceData[DeviceId].WarpSize); in runTargetTeamRegion()
872 CudaThreadsPerBlock += DeviceData[DeviceId].WarpSize; in runTargetTeamRegion()
876 DeviceData[DeviceId].NumThreads); in runTargetTeamRegion()
877 CudaThreadsPerBlock = DeviceData[DeviceId].NumThreads; in runTargetTeamRegion()
880 if (CudaThreadsPerBlock > DeviceData[DeviceId].ThreadsPerBlock) { in runTargetTeamRegion()
882 DeviceData[DeviceId].ThreadsPerBlock); in runTargetTeamRegion()
883 CudaThreadsPerBlock = DeviceData[DeviceId].ThreadsPerBlock; in runTargetTeamRegion()
928 DP("Using default number of teams %d\n", DeviceData[DeviceId].NumTeams); in runTargetTeamRegion()
929 CudaBlocksPerGrid = DeviceData[DeviceId].NumTeams; in runTargetTeamRegion()
931 } else if (TeamNum > DeviceData[DeviceId].BlocksPerGrid) { in runTargetTeamRegion()
933 DeviceData[DeviceId].BlocksPerGrid); in runTargetTeamRegion()
934 CudaBlocksPerGrid = DeviceData[DeviceId].BlocksPerGrid; in runTargetTeamRegion()
940 INFO(DeviceId, in runTargetTeamRegion()
943 (getOffloadEntry(DeviceId, TgtEntryPtr)) in runTargetTeamRegion()
944 ? getOffloadEntry(DeviceId, TgtEntryPtr)->name in runTargetTeamRegion()
949 CUstream Stream = getStream(DeviceId, AsyncInfo); in runTargetTeamRegion()
963 int synchronize(const int DeviceId, __tgt_async_info *AsyncInfoPtr) const { in synchronize() argument
978 DeviceId, reinterpret_cast<CUstream>(AsyncInfoPtr->Queue)); in synchronize()