1 /** 2 * Copyright 2019 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_CUDA_COMMON_H_ 18 #define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_CUDA_COMMON_H_ 19 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h" 20 #ifdef _MSC_VER 21 #define uint unsigned int 22 #endif 23 namespace mindspore { 24 namespace device { 25 namespace gpu { 26 class CudaCommon { 27 public: threads_num()28 inline size_t threads_num() const { return CUDA_THREADS(device_id_); } threads_num(size_t size)29 inline size_t threads_num(size_t size) const { return CUDA_THREADS_MAXSIZE(device_id_, size); } major_sm()30 inline size_t major_sm() const { return CUDA_MAJOR_SM(device_id_); } cuda_cap()31 inline float cuda_cap() const { return CUDA_CAP(device_id_); } blocks_num(const size_t total_threads)32 inline size_t blocks_num(const size_t total_threads) const { return CUDA_BLOCKS(device_id_, total_threads); } share_memory_size()33 size_t share_memory_size() const { return CUDA_SHARED_MEM_PER_BLOCK(device_id_); } set_check_sm(const bool & flag)34 void set_check_sm(const bool &flag) { GPUdeviceInfo::GetInstance(device_id_)->set_check_sm(flag); } check_sm()35 bool check_sm() const { return GPUdeviceInfo::GetInstance(device_id_)->check_sm(); } get_ctx_device_id()36 uint32_t get_ctx_device_id() const { return device_id_; } 37 38 static CudaCommon &GetInstance(); 39 40 private: 41 CudaCommon(); 42 ~CudaCommon() = default; 43 CudaCommon(const CudaCommon &) = delete; 44 CudaCommon &operator=(const CudaCommon &) = delete; 45 46 uint32_t device_id_; 47 }; 48 49 #ifndef GET_BLOCKS 50 #define GET_BLOCKS(total_threads) mindspore::device::gpu::CudaCommon::GetInstance().blocks_num(total_threads) 51 #endif 52 #ifndef GET_THREADS 53 #define GET_THREADS mindspore::device::gpu::CudaCommon::GetInstance().threads_num() 54 #endif 55 #define GET_THREADS_MAXSIZE(size) mindspore::device::gpu::CudaCommon::GetInstance().threads_num(size) 56 #define GET_MAJOR_SM mindspore::device::gpu::CudaCommon::GetInstance().major_sm() 57 #define GET_CUDA_CAP mindspore::device::gpu::CudaCommon::GetInstance().cuda_cap() 58 #define SHARED_MEM_PER_BLOCK mindspore::device::gpu::CudaCommon::GetInstance().share_memory_size() 59 #define GET_CTX_DEVICE_ID mindspore::device::gpu::CudaCommon::GetInstance().get_ctx_device_id() 60 } // namespace gpu 61 } // namespace device 62 } // namespace mindspore 63 64 #ifdef _MSC_VER 65 // some cuda op(such as cum_minmax) use isnan with int type, but msvc not support 66 // so, implement its IsNan(const int8_t & x)67__device__ __forceinline__ bool IsNan(const int8_t &x) { return false; } IsNan(const int16_t & x)68__device__ __forceinline__ bool IsNan(const int16_t &x) { return false; } IsNan(const int32_t & x)69__device__ __forceinline__ bool IsNan(const int32_t &x) { return false; } IsNan(const int64_t & x)70__device__ __forceinline__ bool IsNan(const int64_t &x) { return false; } IsNan(const uint8_t & x)71__device__ __forceinline__ bool IsNan(const uint8_t &x) { return false; } IsNan(const uint16_t & x)72__device__ __forceinline__ bool IsNan(const uint16_t &x) { return false; } IsNan(const uint32_t & x)73__device__ __forceinline__ bool IsNan(const uint32_t &x) { return false; } IsNan(const uint64_t & x)74__device__ __forceinline__ bool IsNan(const uint64_t &x) { return false; } 75 #endif // _MSC_VER 76 77 #endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_CUDA_COMMON_H_ 78