• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "runtime/device/gpu/cuda_driver.h"
18 #include <iostream>
19 #include "utils/log_adapter.h"
20 #include "utils/convert_utils.h"
21 
22 namespace mindspore {
23 namespace device {
24 namespace gpu {
AllocDeviceMem(size_t size,DeviceMemPtr * addr)25 size_t CudaDriver::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
26   size_t retreat_count = 0;
27   auto ret = cudaMalloc(reinterpret_cast<void **>(addr), size);
28   // If free memory is not enough, then retry with mem_malloc_retry_rate_.
29   while (ret == cudaErrorMemoryAllocation) {
30     size = FloatToSize(size * mem_malloc_retry_rate_);
31     size = (size / mem_malloc_align_size_) * mem_malloc_align_size_;
32     ret = cudaMalloc(reinterpret_cast<void **>(addr), size);
33     retreat_count++;
34     if (retreat_count > mem_malloc_retry_conut_max_) {
35       break;
36     }
37   }
38 
39   if (ret != cudaSuccess) {
40     MS_LOG(ERROR) << "cudaMalloc failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
41     return 0;
42   }
43   return size;
44 }
45 
FreeDeviceMem(const DeviceMemPtr & addr)46 bool CudaDriver::FreeDeviceMem(const DeviceMemPtr &addr) {
47   auto ret = cudaFree(addr);
48   if (ret != cudaSuccess) {
49     MS_LOG(ERROR) << "cudaFree failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
50     return false;
51   }
52   return true;
53 }
54 
AllocHostPinnedMem(size_t size,void ** addr)55 size_t CudaDriver::AllocHostPinnedMem(size_t size, void **addr) {
56   if (size == 0) {
57     MS_LOG(EXCEPTION) << "The memory allocate size is 0";
58   }
59   auto ret = cudaHostAlloc(addr, size, cudaHostAllocDefault);
60   if (ret != cudaSuccess) {
61     MS_LOG(ERROR) << "cudaHostAlloc failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
62     return 0;
63   }
64   return size;
65 }
66 
FreeHostPinnedMem(void * addr)67 void CudaDriver::FreeHostPinnedMem(void *addr) {
68   if (addr) {
69     auto ret = cudaFreeHost(addr);
70     if (ret != cudaSuccess) {
71       MS_LOG(EXCEPTION) << "cudaFreeHost failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
72     }
73   }
74 }
75 
CopyHostMemToDevice(const DeviceMemPtr & dst,const void * src,size_t size)76 bool CudaDriver::CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size) {
77   auto ret = cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice);
78   if (ret != cudaSuccess) {
79     MS_LOG(ERROR) << "cudaMemcpy failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
80     return false;
81   }
82   return true;
83 }
84 
CopyDeviceMemToHost(const HostMemPtr & dst,const DeviceMemPtr & src,size_t size)85 bool CudaDriver::CopyDeviceMemToHost(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size) {
86   auto ret = cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost);
87   if (ret != cudaSuccess) {
88     MS_LOG(ERROR) << "cudaMemcpy failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
89     return false;
90   }
91   return true;
92 }
93 
CopyHostMemToDeviceAsync(const DeviceMemPtr & dst,const void * src,size_t size,CudaDeviceStream stream)94 bool CudaDriver::CopyHostMemToDeviceAsync(const DeviceMemPtr &dst, const void *src, size_t size,
95                                           CudaDeviceStream stream) {
96   auto ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyHostToDevice, (cudaStream_t)stream);
97   if (ret != cudaSuccess) {
98     MS_LOG(ERROR) << "cudaMemcpyAsync failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
99     return false;
100   }
101   return true;
102 }
103 
CopyDeviceMemToHostAsync(const HostMemPtr & dst,const DeviceMemPtr & src,size_t size,CudaDeviceStream stream)104 bool CudaDriver::CopyDeviceMemToHostAsync(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size,
105                                           CudaDeviceStream stream) {
106   auto ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToHost, (cudaStream_t)stream);
107   if (ret != cudaSuccess) {
108     MS_LOG(ERROR) << "cudaMemcpyAsync failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
109     return false;
110   }
111   return true;
112 }
113 
CopyDeviceMemToDeviceAsync(const DeviceMemPtr & dst,const DeviceMemPtr & src,size_t size,CudaDeviceStream stream)114 bool CudaDriver::CopyDeviceMemToDeviceAsync(const DeviceMemPtr &dst, const DeviceMemPtr &src, size_t size,
115                                             CudaDeviceStream stream) {
116   auto ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToDevice, (cudaStream_t)stream);
117   if (ret != cudaSuccess) {
118     MS_LOG(ERROR) << "cudaMemcpyAsync failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
119     return false;
120   }
121   return true;
122 }
123 
total_mem_size()124 size_t CudaDriver::total_mem_size() {
125   size_t free;
126   size_t total;
127   auto ret = cudaMemGetInfo(&free, &total);
128   if (ret != cudaSuccess) {
129     MS_LOG(ERROR) << "cudaMemGetInfo failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
130     return 0;
131   }
132   return total;
133 }
134 
free_mem_size()135 size_t CudaDriver::free_mem_size() {
136   size_t free;
137   size_t total;
138   auto ret = cudaMemGetInfo(&free, &total);
139   if (ret != cudaSuccess) {
140     MS_LOG(ERROR) << "cudaMemGetInfo failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
141     return 0;
142   }
143 
144   return free;
145 }
146 
CreateStream(CudaDeviceStream * stream)147 bool CudaDriver::CreateStream(CudaDeviceStream *stream) {
148   auto ret = cudaStreamCreateWithFlags(reinterpret_cast<CUstream_st **>(stream), cudaStreamNonBlocking);
149   if (ret != cudaSuccess) {
150     MS_LOG(ERROR) << "cudaStreamCreate failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
151     return false;
152   }
153   return true;
154 }
155 
DestroyStream(const CudaDeviceStream & stream)156 bool CudaDriver::DestroyStream(const CudaDeviceStream &stream) {
157   auto ret = cudaStreamDestroy((cudaStream_t)stream);
158   if (ret != cudaSuccess) {
159     MS_LOG(ERROR) << "cudaStreamDestroy failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
160     return false;
161   }
162   return true;
163 }
164 
SyncStream(const CudaDeviceStream & stream)165 bool CudaDriver::SyncStream(const CudaDeviceStream &stream) {
166   auto ret = cudaStreamSynchronize((cudaStream_t)stream);
167   if (ret != cudaSuccess) {
168     MS_LOG(ERROR) << "cudaStreamSynchronize failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
169     return false;
170   }
171   return true;
172 }
173 
CreateEvent(CudaDeviceEvent * event,unsigned int flag)174 bool CudaDriver::CreateEvent(CudaDeviceEvent *event, unsigned int flag) {
175   auto ret = cudaEventCreateWithFlags(reinterpret_cast<cudaEvent_t *>(event), flag);
176   if (ret != cudaSuccess) {
177     MS_LOG(ERROR) << "cudaEventCreateWithFlags failed, ret[" << static_cast<int>(ret) << "], "
178                   << cudaGetErrorString(ret);
179     return false;
180   }
181   return true;
182 }
183 
DestroyEvent(const CudaDeviceEvent & event)184 bool CudaDriver::DestroyEvent(const CudaDeviceEvent &event) {
185   auto ret = cudaEventDestroy((cudaEvent_t)event);
186   if (ret != cudaSuccess) {
187     MS_LOG(ERROR) << "cudaEventDestroy failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
188     return false;
189   }
190   return true;
191 }
192 
RecordEvent(CudaDeviceEvent event,CudaDeviceStream stream)193 bool CudaDriver::RecordEvent(CudaDeviceEvent event, CudaDeviceStream stream) {
194   auto ret = cudaEventRecord((cudaEvent_t)event, (cudaStream_t)stream);
195   if (ret != cudaSuccess) {
196     MS_LOG(ERROR) << "cudaEventRecord failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
197     return false;
198   }
199   return true;
200 }
201 
SyncEvent(const CudaDeviceEvent & event)202 bool CudaDriver::SyncEvent(const CudaDeviceEvent &event) {
203   auto ret = cudaEventSynchronize((cudaEvent_t)event);
204   if (ret != cudaSuccess) {
205     MS_LOG(ERROR) << "cudaEventSynchronize failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
206     return false;
207   }
208   return true;
209 }
210 
QueryEvent(const CudaDeviceEvent & event)211 bool CudaDriver::QueryEvent(const CudaDeviceEvent &event) {
212   auto ret = cudaEventQuery((cudaEvent_t)event);
213   if (ret == cudaSuccess) {
214     return true;
215   } else if (ret == cudaErrorNotReady) {
216     return false;
217   } else {
218     MS_LOG(ERROR) << "cudaEventQuery failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
219     return false;
220   }
221 }
222 
ElapsedTime(float * cost_time,const CudaDeviceEvent & start,const CudaDeviceEvent & end)223 bool CudaDriver::ElapsedTime(float *cost_time, const CudaDeviceEvent &start, const CudaDeviceEvent &end) {
224   auto ret = cudaEventElapsedTime(cost_time, (cudaEvent_t)start, (cudaEvent_t)end);
225   if (ret == cudaSuccess) {
226     return true;
227   } else {
228     MS_LOG(ERROR) << "cudaEventElapsedTime failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
229     return false;
230   }
231 }
232 
device_count()233 int CudaDriver::device_count() {
234   int dev_count;
235   auto ret = cudaGetDeviceCount(&dev_count);
236   if (ret != cudaSuccess) {
237     MS_LOG(ERROR) << "cudaGetDeviceCount failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
238   }
239   return dev_count;
240 }
241 
SetDevice(int index)242 bool CudaDriver::SetDevice(int index) {
243   auto ret = cudaSetDevice(index);
244   if (ret != cudaSuccess) {
245     MS_LOG(ERROR)
246       << "SetDevice for id:" << index << " failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret)
247       << ". Please make sure that the 'device_id' set in context is in the range:[0, total number of GPU). "
248          "If the environment variable 'CUDA_VISIBLE_DEVICES' is set, the total number of GPU will be the number set "
249          "in the environment variable 'CUDA_VISIBLE_DEVICES'. For example, if export CUDA_VISIBLE_DEVICES=4,5,6, the "
250          "'device_id' can be 0,1,2 at the moment, 'device_id' starts from 0, and 'device_id'=0 means using GPU of "
251          "number 4.";
252     return false;
253   }
254   return true;
255 }
256 }  // namespace gpu
257 }  // namespace device
258 }  // namespace mindspore
259