1 /**
2 * Copyright 2019 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_COMMON_H_
18 #define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_COMMON_H_
19
20 #include <iostream>
21 #include <vector>
22 #include <algorithm>
23 #include <map>
24 #include "utils/log_adapter.h"
25 #include "utils/trace_base.h"
26 #include "include/curand.h"
27
28 namespace mindspore {
29 namespace device {
30 namespace gpu {
31 #define CHECK_OP_RET_WITH_EXCEPT(expression, message) \
32 { \
33 bool success = (expression); \
34 if (!success) { \
35 MS_LOG(EXCEPTION) << "Op Error: " << message << " | Error Number: " << success; \
36 } \
37 }
38
39 #define CHECK_OP_RET_WITH_ERROR(expression, message) \
40 { \
41 bool success = (expression); \
42 if (!success) { \
43 MS_LOG(ERROR) << "Op Error: " << message << " | Error Number: " << success; \
44 } \
45 }
46
47 #define CHECK_RET_WITH_RETURN_ERROR(expression, message) \
48 { \
49 bool success = (expression); \
50 if (!success) { \
51 MS_LOG(ERROR) << message; \
52 return false; \
53 } \
54 }
55
56 #define CHECK_CUDA_RET_WITH_ERROR(node, expression, message) \
57 { \
58 cudaError_t status = (expression); \
59 if (status != cudaSuccess) { \
60 MS_LOG(ERROR) << "CUDA Error: " << message << " | Error Number: " << status << " " << cudaGetErrorString(status) \
61 << trace::DumpSourceLines(node.lock()); \
62 } \
63 }
64
65 #define CHECK_CUDA_RET_WITH_ERROR_NOTRACE(expression, message) \
66 { \
67 cudaError_t status = (expression); \
68 if (status != cudaSuccess) { \
69 MS_LOG(ERROR) << "CUDA Error: " << message << " | Error Number: " << status << " " \
70 << cudaGetErrorString(status); \
71 } \
72 }
73
74 #define CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(expression, message) \
75 { \
76 cudaError_t status = (expression); \
77 if (status != cudaSuccess) { \
78 MS_LOG(ERROR) << "CUDA Error: " << message << " | Error Number: " << status << " " \
79 << cudaGetErrorString(status); \
80 return false; \
81 } \
82 }
83
84 #define CHECK_CUDA_RET_WITH_EXCEPT(node, expression, message) \
85 { \
86 cudaError_t status = (expression); \
87 if (status != cudaSuccess) { \
88 MS_LOG(EXCEPTION) << "CUDA Error: " << message << " | Error Number: " << status << " " \
89 << cudaGetErrorString(status) << trace::DumpSourceLines(node.lock()); \
90 } \
91 }
92
93 #define CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(expression, message) \
94 { \
95 cudaError_t status = (expression); \
96 if (status != cudaSuccess) { \
97 MS_LOG(EXCEPTION) << "CUDA Error: " << message << " | Error Number: " << status << " " \
98 << cudaGetErrorString(status); \
99 } \
100 }
101
102 #define CHECK_CUDNN_RET_WITH_EXCEPT(node, expression, message) \
103 { \
104 cudnnStatus_t status = (expression); \
105 if (status != CUDNN_STATUS_SUCCESS) { \
106 MS_LOG(EXCEPTION) << "cuDNN Error: " << message << " | Error Number: " << status << " " \
107 << cudnnGetErrorString(status) << trace::DumpSourceLines(node.lock()); \
108 } \
109 }
110
111 #define CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE(expression, message) \
112 { \
113 cudnnStatus_t status = (expression); \
114 if (status != CUDNN_STATUS_SUCCESS) { \
115 MS_LOG(EXCEPTION) << "cuDNN Error: " << message << " | Error Number: " << status << " " \
116 << cudnnGetErrorString(status); \
117 } \
118 }
119
120 #define CHECK_CUDNN_RET_WITH_ERROR_NOTRACE(expression, message) \
121 { \
122 cudnnStatus_t status = (expression); \
123 if (status != CUDNN_STATUS_SUCCESS) { \
124 MS_LOG(ERROR) << "cuDNN Error: " << message << " | Error Number: " << status << " " \
125 << cudnnGetErrorString(status); \
126 } \
127 }
128
129 #define CHECK_CUDNN_RET_WITH_ERROR(node, expression, message) \
130 { \
131 cudnnStatus_t status = (expression); \
132 if (status != CUDNN_STATUS_SUCCESS) { \
133 MS_LOG(ERROR) << "cuDNN Error: " << message << " | Error Number: " << status << " " \
134 << cudnnGetErrorString(status) << trace::DumpSourceLines(node.lock()); \
135 } \
136 }
137
138 #define CHECK_CUBLAS_RET_WITH_EXCEPT_NOTRACE(expression, message) \
139 { \
140 cublasStatus_t status = (expression); \
141 if (status != CUBLAS_STATUS_SUCCESS) { \
142 MS_LOG(EXCEPTION) << "cuBLAS Error: " << message << " | Error Number: " << status; \
143 } \
144 }
145
146 #define CHECK_CUBLAS_RET_WITH_EXCEPT(node, expression, message) \
147 { \
148 cublasStatus_t status = (expression); \
149 if (status != CUBLAS_STATUS_SUCCESS) { \
150 MS_LOG(EXCEPTION) << "cuBLAS Error: " << message << " | Error Number: " << status \
151 << trace::DumpSourceLines(node.lock()); \
152 } \
153 }
154
155 #define CHECK_CUBLAS_RET_WITH_ERROR(expression, message) \
156 { \
157 cublasStatus_t status = (expression); \
158 if (status != CUBLAS_STATUS_SUCCESS) { \
159 MS_LOG(ERROR) << "cuBLAS Error: " << message << " | Error Number: " << status; \
160 } \
161 }
162
163 #define CHECK_CUSOLVER_RET_WITH_EXCEPT_NOTRACE(expression, message) \
164 { \
165 cusolverStatus_t status = (expression); \
166 if (status != CUSOLVER_STATUS_SUCCESS) { \
167 MS_LOG(EXCEPTION) << "cusolver Error: " << message << " | Error Number: " << status; \
168 } \
169 }
170
171 #define CHECK_CUSOLVER_RET_WITH_EXCEPT(node, expression, message) \
172 { \
173 cusolverStatus_t status = (expression); \
174 if (status != CUSOLVER_STATUS_SUCCESS) { \
175 MS_LOG(EXCEPTION) << "cusolver Error: " << message << " | Error Number: " << status \
176 << trace::DumpSourceLines(node.lock()); \
177 ; \
178 } \
179 }
180
181 #define CHECK_CUSOLVER_RET_WITH_ERROR(expression, message) \
182 { \
183 cusolverStatus_t status = (expression); \
184 if (status != CUSOLVER_STATUS_SUCCESS) { \
185 MS_LOG(ERROR) << "cusolver Error: " << message << " | Error Number: " << status; \
186 } \
187 }
188
189 #define CHECK_NCCL_RET_WITH_EXCEPT(node, expression, message) \
190 { \
191 int result = (expression); \
192 if (result != ncclSuccess) { \
193 MS_LOG(EXCEPTION) << "NCCL Error: " << message << " | Error Number: " << result \
194 << trace::DumpSourceLines(node.lock()); \
195 } \
196 }
197
198 #define VARIABLE_NOT_USED(var) \
199 { (void)(var); }
200
CheckNullInput(const std::vector<size_t> & input_shape)201 inline bool CheckNullInput(const std::vector<size_t> &input_shape) {
202 // If input_shape.size() == 0, it means a scalar input; If input_shape.size() != 0 and input_shape contains 0,
203 // it means a null input. Just return a null output.
204 if (input_shape.size() != 0) {
205 if (std::any_of(input_shape.begin(), input_shape.end(), [](size_t i) { return i == 0; })) {
206 return true;
207 }
208 }
209 return false;
210 }
211 #define CHECK_NULL_INPUT(input_shape) mindspore::device::gpu::CheckNullInput(input_shape)
212
CurandGetErrorString(curandStatus_t status)213 inline const char *CurandGetErrorString(curandStatus_t status) {
214 switch (status) {
215 case CURAND_STATUS_VERSION_MISMATCH:
216 return "Header file and linked library version do not match.";
217 case CURAND_STATUS_NOT_INITIALIZED:
218 return "Generator not initialized.";
219 case CURAND_STATUS_ALLOCATION_FAILED:
220 return "Memory allocation failed.";
221 case CURAND_STATUS_TYPE_ERROR:
222 return "Generator is wrong type.";
223 case CURAND_STATUS_OUT_OF_RANGE:
224 return "Argument out of range.";
225 case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
226 return "Length requested is not a multiple of dimension.";
227 case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
228 return "GPU does not have double precision required by MRG32k3a.";
229 case CURAND_STATUS_LAUNCH_FAILURE:
230 return "Kernel launch failure.";
231 case CURAND_STATUS_PREEXISTING_FAILURE:
232 return "Preexisting failure on library entry.";
233 case CURAND_STATUS_INITIALIZATION_FAILED:
234 return "Initialization of CUDA failed.";
235 case CURAND_STATUS_ARCH_MISMATCH:
236 return "Architecture mismatch, GPU does not support requested feature.";
237 case CURAND_STATUS_INTERNAL_ERROR:
238 return "Internal library error.";
239 default:
240 return "Unknown the curandStatus.";
241 }
242 }
243
244 #define CHECK_CURAND_RET_WITH_EXCEPT(expression, message) \
245 { \
246 curandStatus_t status = (expression); \
247 if (status != CURAND_STATUS_SUCCESS) { \
248 MS_LOG(EXCEPTION) << "CUDA curand Error: " << message << " | curandStatus: " << status << " " \
249 << mindspore::device::gpu::CurandGetErrorString(status); \
250 } \
251 }
252 } // namespace gpu
253 } // namespace device
254 } // namespace mindspore
255
256 #endif // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_COMMON_H_
257