• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_CUDA_DEVICE_INFO_H_
18 #define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_CUDA_DEVICE_INFO_H_
19 
20 #include <cudnn.h>
21 #include <cublas_v2.h>
22 #include <algorithm>
23 #include <cusolverDn.h>
24 #include <memory>
25 #include <mutex>
26 #include <string>
27 #ifdef _MSC_VER
28 #include <cassert>
29 #else
30 #include <assert.h>
31 #endif
32 
33 #ifdef _MSC_VER
34 #ifdef BUILDING_CUDA_OPS_DLL
35 #define CUDA_LIB_EXPORT __declspec(dllexport)
36 #else
37 #define CUDA_LIB_EXPORT __declspec(dllimport)
38 #endif
39 #else
40 #define CUDA_LIB_EXPORT __attribute__((visibility("default")))
41 #endif  // _MSC_VER
42 
43 #ifdef _WIN32
44 #ifndef uint
45 #define uint unsigned int
46 #endif
47 #endif
48 
49 #ifndef _MSC_VER
50 #define CUDA_KERNEL_ASSERT(cond)                                                       \
51   if (!(cond)) {                                                                       \
52     __assert_fail(#cond, __FILE__, static_cast<unsigned int>(__LINE__), __FUNCTION__); \
53   }
54 #else
55 #define CUDA_KERNEL_ASSERT(cond) \
56   if (!(cond)) {                 \
57     assert(0);                   \
58   }
59 #endif
60 namespace mindspore {
61 namespace device {
62 namespace gpu {
63 class GPUdeviceInfo {
64  public:
65   explicit GPUdeviceInfo(const uint32_t device_id);
66   ~GPUdeviceInfo();
properties()67   inline const cudaDeviceProp &properties() const { return prop_; }
name()68   inline const std::string &name() const { return name_; }
threads_num()69   inline size_t threads_num() const { return threads_per_block_; }
threads_num(size_t size)70   inline size_t threads_num(size_t size) const { return std::min(size, threads_per_block_); }
major_sm()71   inline size_t major_sm() const { return major_sm_; }
minor_sm()72   inline size_t minor_sm() const { return minor_sm_; }
cuda_cap()73   inline float cuda_cap() const { return static_cast<float>(major_sm_ * 10 + minor_sm_) / 10.0; }
blocks_num(const size_t total_threads)74   inline size_t blocks_num(const size_t total_threads) const {
75     return std::min(static_cast<size_t>(((total_threads - 1) / threads_per_block_) + 1), max_blocks_);
76   }
blocks_num(const size_t total_threads,const size_t block_size)77   inline size_t blocks_num(const size_t total_threads, const size_t block_size) const {
78     size_t valid_block_size = std::min(block_size, threads_per_block_);
79     return std::min(static_cast<size_t>(((total_threads - 1) / valid_block_size) + 1), max_blocks_);
80   }
blocks_max_num(size_t size)81   inline size_t blocks_max_num(size_t size) const { return std::min(size, max_blocks_); }
grids_max_size()82   inline dim3 grids_max_size() const { return max_grid_size_; }
share_memory_size()83   size_t share_memory_size() const { return max_share_memory_; }
set_check_sm(const bool & flag)84   void set_check_sm(const bool &flag) { check_sm_ = flag; }
check_sm()85   bool check_sm() const { return check_sm_; }
86 
87   static std::shared_ptr<GPUdeviceInfo> GetInstance(uint32_t device_id);
88 
89  private:
90   GPUdeviceInfo(const GPUdeviceInfo &) = delete;
91   GPUdeviceInfo &operator=(const GPUdeviceInfo &) = delete;
92 
93   cudaDeviceProp prop_;
94   std::string name_;
95   size_t max_blocks_;
96   size_t threads_per_block_;
97   size_t major_sm_;
98   size_t minor_sm_;
99   size_t max_share_memory_;
100   bool check_sm_{true};
101   dim3 max_grid_size_;
102 #ifndef _MSC_VER
103   static pthread_rwlock_t rwlock_;
104 #else
105   static std::mutex instanceLock;
106 #endif
107 };
108 
109 #define CUDA_BLOCKS(device_id, total_threads) \
110   mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->blocks_num(total_threads)
111 #define CUDA_BLOCKS_CAL(device_id, total_threads, block_size) \
112   mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->blocks_num(total_threads, block_size)
113 #define CUDA_THREADS(device_id) mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->threads_num()
114 #define CUDA_THREADS_MAXSIZE(device_id, size) \
115   mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->threads_num(size)
116 #define CUDA_BLOCKS_MAXSIZE(device_id, size) \
117   mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->blocks_max_num(size)
118 #define CUDA_MAJOR_SM(device_id) mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->major_sm()
119 #define CUDA_CAP(device_id) mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->cuda_cap()
120 #define CUDA_SHARED_MEM_PER_BLOCK(device_id) \
121   mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->share_memory_size()
122 #define CUDA_GRIDS_MAXSIZE(device_id) mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->grids_max_size()
123 
124 #define MINIUM_SM 6
125 #define RECOMMEND_SM 7
126 #define AMPER_ARCH_SM 8
127 #define SUPPORTED_CAP 5.3
128 }  // namespace gpu
129 }  // namespace device
130 }  // namespace mindspore
131 
GetCudaStatus()132 inline cudaError_t GetCudaStatus() {
133   cudaError_t status = cudaGetLastError();
134   if (status != cudaSuccess) {
135     return status;
136   }
137   return cudaSuccess;
138 }
139 
140 #endif  // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_CUDA_DEVICE_INFO_H_
141