1 /**
2 * Copyright 2019 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "runtime/device/gpu/cuda_driver.h"
18 #include <iostream>
19 #include "utils/log_adapter.h"
20 #include "utils/convert_utils.h"
21
22 namespace mindspore {
23 namespace device {
24 namespace gpu {
AllocDeviceMem(size_t size,DeviceMemPtr * addr)25 size_t CudaDriver::AllocDeviceMem(size_t size, DeviceMemPtr *addr) {
26 size_t retreat_count = 0;
27 auto ret = cudaMalloc(reinterpret_cast<void **>(addr), size);
28 // If free memory is not enough, then retry with mem_malloc_retry_rate_.
29 while (ret == cudaErrorMemoryAllocation) {
30 size = FloatToSize(size * mem_malloc_retry_rate_);
31 size = (size / mem_malloc_align_size_) * mem_malloc_align_size_;
32 ret = cudaMalloc(reinterpret_cast<void **>(addr), size);
33 retreat_count++;
34 if (retreat_count > mem_malloc_retry_conut_max_) {
35 break;
36 }
37 }
38
39 if (ret != cudaSuccess) {
40 MS_LOG(ERROR) << "cudaMalloc failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
41 return 0;
42 }
43 return size;
44 }
45
FreeDeviceMem(const DeviceMemPtr & addr)46 bool CudaDriver::FreeDeviceMem(const DeviceMemPtr &addr) {
47 auto ret = cudaFree(addr);
48 if (ret != cudaSuccess) {
49 MS_LOG(ERROR) << "cudaFree failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
50 return false;
51 }
52 return true;
53 }
54
AllocHostPinnedMem(size_t size,void ** addr)55 size_t CudaDriver::AllocHostPinnedMem(size_t size, void **addr) {
56 if (size == 0) {
57 MS_LOG(EXCEPTION) << "The memory allocate size is 0";
58 }
59 auto ret = cudaHostAlloc(addr, size, cudaHostAllocDefault);
60 if (ret != cudaSuccess) {
61 MS_LOG(ERROR) << "cudaHostAlloc failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
62 return 0;
63 }
64 return size;
65 }
66
FreeHostPinnedMem(void * addr)67 void CudaDriver::FreeHostPinnedMem(void *addr) {
68 if (addr) {
69 auto ret = cudaFreeHost(addr);
70 if (ret != cudaSuccess) {
71 MS_LOG(EXCEPTION) << "cudaFreeHost failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
72 }
73 }
74 }
75
CopyHostMemToDevice(const DeviceMemPtr & dst,const void * src,size_t size)76 bool CudaDriver::CopyHostMemToDevice(const DeviceMemPtr &dst, const void *src, size_t size) {
77 auto ret = cudaMemcpy(dst, src, size, cudaMemcpyHostToDevice);
78 if (ret != cudaSuccess) {
79 MS_LOG(ERROR) << "cudaMemcpy failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
80 return false;
81 }
82 return true;
83 }
84
CopyDeviceMemToHost(const HostMemPtr & dst,const DeviceMemPtr & src,size_t size)85 bool CudaDriver::CopyDeviceMemToHost(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size) {
86 auto ret = cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost);
87 if (ret != cudaSuccess) {
88 MS_LOG(ERROR) << "cudaMemcpy failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
89 return false;
90 }
91 return true;
92 }
93
CopyHostMemToDeviceAsync(const DeviceMemPtr & dst,const void * src,size_t size,CudaDeviceStream stream)94 bool CudaDriver::CopyHostMemToDeviceAsync(const DeviceMemPtr &dst, const void *src, size_t size,
95 CudaDeviceStream stream) {
96 auto ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyHostToDevice, (cudaStream_t)stream);
97 if (ret != cudaSuccess) {
98 MS_LOG(ERROR) << "cudaMemcpyAsync failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
99 return false;
100 }
101 return true;
102 }
103
CopyDeviceMemToHostAsync(const HostMemPtr & dst,const DeviceMemPtr & src,size_t size,CudaDeviceStream stream)104 bool CudaDriver::CopyDeviceMemToHostAsync(const HostMemPtr &dst, const DeviceMemPtr &src, size_t size,
105 CudaDeviceStream stream) {
106 auto ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToHost, (cudaStream_t)stream);
107 if (ret != cudaSuccess) {
108 MS_LOG(ERROR) << "cudaMemcpyAsync failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
109 return false;
110 }
111 return true;
112 }
113
CopyDeviceMemToDeviceAsync(const DeviceMemPtr & dst,const DeviceMemPtr & src,size_t size,CudaDeviceStream stream)114 bool CudaDriver::CopyDeviceMemToDeviceAsync(const DeviceMemPtr &dst, const DeviceMemPtr &src, size_t size,
115 CudaDeviceStream stream) {
116 auto ret = cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToDevice, (cudaStream_t)stream);
117 if (ret != cudaSuccess) {
118 MS_LOG(ERROR) << "cudaMemcpyAsync failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
119 return false;
120 }
121 return true;
122 }
123
total_mem_size()124 size_t CudaDriver::total_mem_size() {
125 size_t free;
126 size_t total;
127 auto ret = cudaMemGetInfo(&free, &total);
128 if (ret != cudaSuccess) {
129 MS_LOG(ERROR) << "cudaMemGetInfo failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
130 return 0;
131 }
132 return total;
133 }
134
free_mem_size()135 size_t CudaDriver::free_mem_size() {
136 size_t free;
137 size_t total;
138 auto ret = cudaMemGetInfo(&free, &total);
139 if (ret != cudaSuccess) {
140 MS_LOG(ERROR) << "cudaMemGetInfo failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
141 return 0;
142 }
143
144 return free;
145 }
146
CreateStream(CudaDeviceStream * stream)147 bool CudaDriver::CreateStream(CudaDeviceStream *stream) {
148 auto ret = cudaStreamCreateWithFlags(reinterpret_cast<CUstream_st **>(stream), cudaStreamNonBlocking);
149 if (ret != cudaSuccess) {
150 MS_LOG(ERROR) << "cudaStreamCreate failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
151 return false;
152 }
153 return true;
154 }
155
DestroyStream(const CudaDeviceStream & stream)156 bool CudaDriver::DestroyStream(const CudaDeviceStream &stream) {
157 auto ret = cudaStreamDestroy((cudaStream_t)stream);
158 if (ret != cudaSuccess) {
159 MS_LOG(ERROR) << "cudaStreamDestroy failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
160 return false;
161 }
162 return true;
163 }
164
SyncStream(const CudaDeviceStream & stream)165 bool CudaDriver::SyncStream(const CudaDeviceStream &stream) {
166 auto ret = cudaStreamSynchronize((cudaStream_t)stream);
167 if (ret != cudaSuccess) {
168 MS_LOG(ERROR) << "cudaStreamSynchronize failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
169 return false;
170 }
171 return true;
172 }
173
CreateEvent(CudaDeviceEvent * event,unsigned int flag)174 bool CudaDriver::CreateEvent(CudaDeviceEvent *event, unsigned int flag) {
175 auto ret = cudaEventCreateWithFlags(reinterpret_cast<cudaEvent_t *>(event), flag);
176 if (ret != cudaSuccess) {
177 MS_LOG(ERROR) << "cudaEventCreateWithFlags failed, ret[" << static_cast<int>(ret) << "], "
178 << cudaGetErrorString(ret);
179 return false;
180 }
181 return true;
182 }
183
DestroyEvent(const CudaDeviceEvent & event)184 bool CudaDriver::DestroyEvent(const CudaDeviceEvent &event) {
185 auto ret = cudaEventDestroy((cudaEvent_t)event);
186 if (ret != cudaSuccess) {
187 MS_LOG(ERROR) << "cudaEventDestroy failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
188 return false;
189 }
190 return true;
191 }
192
RecordEvent(CudaDeviceEvent event,CudaDeviceStream stream)193 bool CudaDriver::RecordEvent(CudaDeviceEvent event, CudaDeviceStream stream) {
194 auto ret = cudaEventRecord((cudaEvent_t)event, (cudaStream_t)stream);
195 if (ret != cudaSuccess) {
196 MS_LOG(ERROR) << "cudaEventRecord failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
197 return false;
198 }
199 return true;
200 }
201
SyncEvent(const CudaDeviceEvent & event)202 bool CudaDriver::SyncEvent(const CudaDeviceEvent &event) {
203 auto ret = cudaEventSynchronize((cudaEvent_t)event);
204 if (ret != cudaSuccess) {
205 MS_LOG(ERROR) << "cudaEventSynchronize failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
206 return false;
207 }
208 return true;
209 }
210
QueryEvent(const CudaDeviceEvent & event)211 bool CudaDriver::QueryEvent(const CudaDeviceEvent &event) {
212 auto ret = cudaEventQuery((cudaEvent_t)event);
213 if (ret == cudaSuccess) {
214 return true;
215 } else if (ret == cudaErrorNotReady) {
216 return false;
217 } else {
218 MS_LOG(ERROR) << "cudaEventQuery failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
219 return false;
220 }
221 }
222
ElapsedTime(float * cost_time,const CudaDeviceEvent & start,const CudaDeviceEvent & end)223 bool CudaDriver::ElapsedTime(float *cost_time, const CudaDeviceEvent &start, const CudaDeviceEvent &end) {
224 auto ret = cudaEventElapsedTime(cost_time, (cudaEvent_t)start, (cudaEvent_t)end);
225 if (ret == cudaSuccess) {
226 return true;
227 } else {
228 MS_LOG(ERROR) << "cudaEventElapsedTime failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
229 return false;
230 }
231 }
232
device_count()233 int CudaDriver::device_count() {
234 int dev_count;
235 auto ret = cudaGetDeviceCount(&dev_count);
236 if (ret != cudaSuccess) {
237 MS_LOG(ERROR) << "cudaGetDeviceCount failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret);
238 }
239 return dev_count;
240 }
241
SetDevice(int index)242 bool CudaDriver::SetDevice(int index) {
243 auto ret = cudaSetDevice(index);
244 if (ret != cudaSuccess) {
245 MS_LOG(ERROR)
246 << "SetDevice for id:" << index << " failed, ret[" << static_cast<int>(ret) << "], " << cudaGetErrorString(ret)
247 << ". Please make sure that the 'device_id' set in context is in the range:[0, total number of GPU). "
248 "If the environment variable 'CUDA_VISIBLE_DEVICES' is set, the total number of GPU will be the number set "
249 "in the environment variable 'CUDA_VISIBLE_DEVICES'. For example, if export CUDA_VISIBLE_DEVICES=4,5,6, the "
250 "'device_id' can be 0,1,2 at the moment, 'device_id' starts from 0, and 'device_id'=0 means using GPU of "
251 "number 4.";
252 return false;
253 }
254 return true;
255 }
256 } // namespace gpu
257 } // namespace device
258 } // namespace mindspore
259