• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_COMMON_H_
18 #define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_COMMON_H_
19 
20 #include <algorithm>
21 #include "runtime/device/gpu/gpu_device_manager.h"
22 
23 #define CUDA_KERNEL_ASSERT(cond)                                                       \
24   if (!(cond)) {                                                                       \
25     __assert_fail(#cond, __FILE__, static_cast<unsigned int>(__LINE__), __FUNCTION__); \
26   }
27 namespace mindspore {
28 namespace device {
29 namespace gpu {
30 class CudaCommon {
31  public:
threads_num()32   inline int threads_num() const { return threads_per_block_; }
major_sm()33   inline int major_sm() const { return major_sm_; }
blocks_num(const int total_threads)34   inline int blocks_num(const int total_threads) const {
35     return std::min(((total_threads - 1) / threads_per_block_) + 1, max_blocks_);
36   }
share_memory_size()37   size_t share_memory_size() const { return max_share_memory_; }
set_check_sm(const bool & flag)38   void set_check_sm(const bool &flag) { check_sm_ = flag; }
check_sm()39   bool check_sm() const { return check_sm_; }
40 
GetInstance()41   static CudaCommon &GetInstance() {
42     static CudaCommon instance;
43     return instance;
44   }
45 
46  private:
CudaCommon()47   CudaCommon() {
48     uint32_t device_id = GPUDeviceManager::GetInstance().cur_device_id();
49     cudaDeviceProp prop;
50     (void)cudaGetDeviceProperties(&prop, device_id);
51     threads_per_block_ = prop.maxThreadsPerBlock;
52     max_blocks_ = prop.multiProcessorCount;
53     major_sm_ = prop.major;
54     max_share_memory_ = prop.sharedMemPerBlock;
55   }
56   ~CudaCommon() = default;
57   CudaCommon(const CudaCommon &) = delete;
58   CudaCommon &operator=(const CudaCommon &) = delete;
59 
60   int max_blocks_;
61   int threads_per_block_;
62   int major_sm_;
63   size_t max_share_memory_;
64   bool check_sm_{true};
65 };
66 #define GET_BLOCKS(total_threads) mindspore::device::gpu::CudaCommon::GetInstance().blocks_num(total_threads)
67 #define GET_THREADS mindspore::device::gpu::CudaCommon::GetInstance().threads_num()
68 #define GET_MAJOR_SM mindspore::device::gpu::CudaCommon::GetInstance().major_sm()
69 #define SHARED_MEM_PER_BLOCK mindspore::device::gpu::CudaCommon::GetInstance().share_memory_size()
70 #define MINIUM_SM 6
71 #define RECOMMEND_SM 7
72 }  // namespace gpu
73 }  // namespace device
74 }  // namespace mindspore
75 
76 #endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_CUDA_COMMON_H_
77