1 /**
2 * Copyright 2022 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_CUDA_DEVICE_INFO_H_
18 #define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_CUDA_DEVICE_INFO_H_
19
20 #include <cudnn.h>
21 #include <cublas_v2.h>
22 #include <algorithm>
23 #include <cusolverDn.h>
24 #include <memory>
25 #include <mutex>
26 #include <string>
27 #ifdef _MSC_VER
28 #include <cassert>
29 #else
30 #include <assert.h>
31 #endif
32
33 #ifdef _MSC_VER
34 #ifdef BUILDING_CUDA_OPS_DLL
35 #define CUDA_LIB_EXPORT __declspec(dllexport)
36 #else
37 #define CUDA_LIB_EXPORT __declspec(dllimport)
38 #endif
39 #else
40 #define CUDA_LIB_EXPORT __attribute__((visibility("default")))
41 #endif // _MSC_VER
42
43 #ifdef _WIN32
44 #ifndef uint
45 #define uint unsigned int
46 #endif
47 #endif
48
49 #ifndef _MSC_VER
50 #define CUDA_KERNEL_ASSERT(cond) \
51 if (!(cond)) { \
52 __assert_fail(#cond, __FILE__, static_cast<unsigned int>(__LINE__), __FUNCTION__); \
53 }
54 #else
55 #define CUDA_KERNEL_ASSERT(cond) \
56 if (!(cond)) { \
57 assert(0); \
58 }
59 #endif
60 namespace mindspore {
61 namespace device {
62 namespace gpu {
63 class GPUdeviceInfo {
64 public:
65 explicit GPUdeviceInfo(const uint32_t device_id);
66 ~GPUdeviceInfo();
properties()67 inline const cudaDeviceProp &properties() const { return prop_; }
name()68 inline const std::string &name() const { return name_; }
threads_num()69 inline size_t threads_num() const { return threads_per_block_; }
threads_num(size_t size)70 inline size_t threads_num(size_t size) const { return std::min(size, threads_per_block_); }
major_sm()71 inline size_t major_sm() const { return major_sm_; }
minor_sm()72 inline size_t minor_sm() const { return minor_sm_; }
cuda_cap()73 inline float cuda_cap() const { return static_cast<float>(major_sm_ * 10 + minor_sm_) / 10.0; }
blocks_num(const size_t total_threads)74 inline size_t blocks_num(const size_t total_threads) const {
75 return std::min(static_cast<size_t>(((total_threads - 1) / threads_per_block_) + 1), max_blocks_);
76 }
blocks_num(const size_t total_threads,const size_t block_size)77 inline size_t blocks_num(const size_t total_threads, const size_t block_size) const {
78 size_t valid_block_size = std::min(block_size, threads_per_block_);
79 return std::min(static_cast<size_t>(((total_threads - 1) / valid_block_size) + 1), max_blocks_);
80 }
blocks_max_num(size_t size)81 inline size_t blocks_max_num(size_t size) const { return std::min(size, max_blocks_); }
grids_max_size()82 inline dim3 grids_max_size() const { return max_grid_size_; }
share_memory_size()83 size_t share_memory_size() const { return max_share_memory_; }
set_check_sm(const bool & flag)84 void set_check_sm(const bool &flag) { check_sm_ = flag; }
check_sm()85 bool check_sm() const { return check_sm_; }
86
87 static std::shared_ptr<GPUdeviceInfo> GetInstance(uint32_t device_id);
88
89 private:
90 GPUdeviceInfo(const GPUdeviceInfo &) = delete;
91 GPUdeviceInfo &operator=(const GPUdeviceInfo &) = delete;
92
93 cudaDeviceProp prop_;
94 std::string name_;
95 size_t max_blocks_;
96 size_t threads_per_block_;
97 size_t major_sm_;
98 size_t minor_sm_;
99 size_t max_share_memory_;
100 bool check_sm_{true};
101 dim3 max_grid_size_;
102 #ifndef _MSC_VER
103 static pthread_rwlock_t rwlock_;
104 #else
105 static std::mutex instanceLock;
106 #endif
107 };
108
109 #define CUDA_BLOCKS(device_id, total_threads) \
110 mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->blocks_num(total_threads)
111 #define CUDA_BLOCKS_CAL(device_id, total_threads, block_size) \
112 mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->blocks_num(total_threads, block_size)
113 #define CUDA_THREADS(device_id) mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->threads_num()
114 #define CUDA_THREADS_MAXSIZE(device_id, size) \
115 mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->threads_num(size)
116 #define CUDA_BLOCKS_MAXSIZE(device_id, size) \
117 mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->blocks_max_num(size)
118 #define CUDA_MAJOR_SM(device_id) mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->major_sm()
119 #define CUDA_CAP(device_id) mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->cuda_cap()
120 #define CUDA_SHARED_MEM_PER_BLOCK(device_id) \
121 mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->share_memory_size()
122 #define CUDA_GRIDS_MAXSIZE(device_id) mindspore::device::gpu::GPUdeviceInfo::GetInstance(device_id)->grids_max_size()
123
124 #define MINIUM_SM 6
125 #define RECOMMEND_SM 7
126 #define AMPER_ARCH_SM 8
127 #define SUPPORTED_CAP 5.3
128 } // namespace gpu
129 } // namespace device
130 } // namespace mindspore
131
GetCudaStatus()132 inline cudaError_t GetCudaStatus() {
133 cudaError_t status = cudaGetLastError();
134 if (status != cudaSuccess) {
135 return status;
136 }
137 return cudaSuccess;
138 }
139
140 #endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_CUDA_IMPL_CUDA_OPS_CUDA_DEVICE_INFO_H_
141