1 /**
2 * Copyright 2022 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_device_info.h"
18 #ifndef _MSC_VER
19 #include <pthread.h>
20 #endif
21 #include <unordered_map>
22
23 namespace mindspore {
24 namespace device {
25 namespace gpu {
26 #ifndef _MSC_VER
27 pthread_rwlock_t GPUdeviceInfo::rwlock_;
28 #else
29 std::mutex GPUdeviceInfo::instanceLock;
30 #endif
31
32 #ifndef _MSC_VER
GetInstance(uint32_t device_id)33 std::shared_ptr<GPUdeviceInfo> GPUdeviceInfo::GetInstance(uint32_t device_id) {
34 static std::unordered_map<uint32_t, std::shared_ptr<GPUdeviceInfo>> instances;
35 // read lock
36 std::shared_ptr<GPUdeviceInfo> gpu_device_info{nullptr};
37 pthread_rwlock_rdlock(&rwlock_);
38 auto iter = instances.find(device_id);
39 if (iter != instances.end()) {
40 gpu_device_info = iter->second;
41 }
42 pthread_rwlock_unlock(&rwlock_);
43
44 if (gpu_device_info == nullptr) {
45 // write lock
46 gpu_device_info = std::make_shared<GPUdeviceInfo>(device_id);
47 pthread_rwlock_wrlock(&rwlock_);
48 instances.emplace(device_id, gpu_device_info);
49 pthread_rwlock_unlock(&rwlock_);
50 }
51 return gpu_device_info;
52 }
53 #else
GetInstance(uint32_t device_id)54 std::shared_ptr<GPUdeviceInfo> GPUdeviceInfo::GetInstance(uint32_t device_id) {
55 static std::unordered_map<uint32_t, std::shared_ptr<GPUdeviceInfo>> instances;
56 std::shared_ptr<GPUdeviceInfo> gpu_device_info{nullptr};
57 std::lock_guard<std::mutex> lk(instanceLock);
58 auto iter = instances.find(device_id);
59 if (iter != instances.end()) {
60 gpu_device_info = iter->second;
61 }
62 if (gpu_device_info == nullptr) {
63 gpu_device_info = std::make_shared<GPUdeviceInfo>(device_id);
64 instances.emplace(device_id, gpu_device_info);
65 }
66 return gpu_device_info;
67 }
68 #endif
69
GPUdeviceInfo(const uint32_t device_id)70 GPUdeviceInfo::GPUdeviceInfo(const uint32_t device_id) {
71 (void)cudaGetDeviceProperties(&prop_, device_id);
72 name_ = prop_.name;
73 threads_per_block_ = prop_.maxThreadsPerBlock;
74 max_blocks_ = prop_.multiProcessorCount;
75 major_sm_ = prop_.major;
76 minor_sm_ = prop_.minor;
77 max_share_memory_ = prop_.sharedMemPerBlock;
78 const size_t x_index = 0;
79 const size_t y_index = 1;
80 const size_t z_index = 2;
81 max_grid_size_.x = prop_.maxGridSize[x_index];
82 max_grid_size_.y = prop_.maxGridSize[y_index];
83 max_grid_size_.z = prop_.maxGridSize[z_index];
84 #ifndef _MSC_VER
85 pthread_rwlock_init(&rwlock_, nullptr);
86 #endif
87 }
~GPUdeviceInfo()88 GPUdeviceInfo::~GPUdeviceInfo() {
89 #ifndef _MSC_VER
90 pthread_rwlock_destroy(&rwlock_);
91 #endif
92 }
93 } // namespace gpu
94 } // namespace device
95 } // namespace mindspore
96