1 /** 2 * Copyright 2019 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_COMMON_H_ 18 #define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_COMMON_H_ 19 20 #include <algorithm> 21 #include "runtime/device/gpu/gpu_device_manager.h" 22 23 #define CUDA_KERNEL_ASSERT(cond) \ 24 if (!(cond)) { \ 25 __assert_fail(#cond, __FILE__, static_cast<unsigned int>(__LINE__), __FUNCTION__); \ 26 } 27 namespace mindspore { 28 namespace device { 29 namespace gpu { 30 class CudaCommon { 31 public: threads_num()32 inline int threads_num() const { return threads_per_block_; } major_sm()33 inline int major_sm() const { return major_sm_; } blocks_num(const int total_threads)34 inline int blocks_num(const int total_threads) const { 35 return std::min(((total_threads - 1) / threads_per_block_) + 1, max_blocks_); 36 } share_memory_size()37 size_t share_memory_size() const { return max_share_memory_; } set_check_sm(const bool & flag)38 void set_check_sm(const bool &flag) { check_sm_ = flag; } check_sm()39 bool check_sm() const { return check_sm_; } 40 GetInstance()41 static CudaCommon &GetInstance() { 42 static CudaCommon instance; 43 return instance; 44 } 45 46 private: CudaCommon()47 CudaCommon() { 48 uint32_t device_id = GPUDeviceManager::GetInstance().cur_device_id(); 49 cudaDeviceProp prop; 50 (void)cudaGetDeviceProperties(&prop, device_id); 51 threads_per_block_ = prop.maxThreadsPerBlock; 52 max_blocks_ = prop.multiProcessorCount; 53 major_sm_ = prop.major; 54 max_share_memory_ = prop.sharedMemPerBlock; 55 } 56 ~CudaCommon() = default; 57 CudaCommon(const CudaCommon &) = delete; 58 CudaCommon &operator=(const CudaCommon &) = delete; 59 60 int max_blocks_; 61 int threads_per_block_; 62 int major_sm_; 63 size_t max_share_memory_; 64 bool check_sm_{true}; 65 }; 66 #define GET_BLOCKS(total_threads) mindspore::device::gpu::CudaCommon::GetInstance().blocks_num(total_threads) 67 #define GET_THREADS mindspore::device::gpu::CudaCommon::GetInstance().threads_num() 68 #define GET_MAJOR_SM mindspore::device::gpu::CudaCommon::GetInstance().major_sm() 69 #define SHARED_MEM_PER_BLOCK mindspore::device::gpu::CudaCommon::GetInstance().share_memory_size() 70 #define MINIUM_SM 6 71 #define RECOMMEND_SM 7 72 } // namespace gpu 73 } // namespace device 74 } // namespace mindspore 75 76 #endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_COMMON_H_ 77