• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_COMMON_H_
18 #define MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_COMMON_H_
19 
20 #include <iostream>
21 #include <vector>
22 #include <algorithm>
23 #include <map>
24 #include "utils/log_adapter.h"
25 #include "utils/trace_base.h"
26 #include "include/curand.h"
27 
28 namespace mindspore {
29 namespace device {
30 namespace gpu {
31 #define CHECK_OP_RET_WITH_EXCEPT(expression, message)                                 \
32   {                                                                                   \
33     bool success = (expression);                                                      \
34     if (!success) {                                                                   \
35       MS_LOG(EXCEPTION) << "Op Error: " << message << " | Error Number: " << success; \
36     }                                                                                 \
37   }
38 
39 #define CHECK_OP_RET_WITH_ERROR(expression, message)                              \
40   {                                                                               \
41     bool success = (expression);                                                  \
42     if (!success) {                                                               \
43       MS_LOG(ERROR) << "Op Error: " << message << " | Error Number: " << success; \
44     }                                                                             \
45   }
46 
47 #define CHECK_RET_WITH_RETURN_ERROR(expression, message) \
48   {                                                      \
49     bool success = (expression);                         \
50     if (!success) {                                      \
51       MS_LOG(ERROR) << message;                          \
52       return false;                                      \
53     }                                                    \
54   }
55 
56 #define CHECK_CUDA_RET_WITH_ERROR(node, expression, message)                                                           \
57   {                                                                                                                    \
58     cudaError_t status = (expression);                                                                                 \
59     if (status != cudaSuccess) {                                                                                       \
60       MS_LOG(ERROR) << "CUDA Error: " << message << " | Error Number: " << status << " " << cudaGetErrorString(status) \
61                     << trace::DumpSourceLines(node.lock());                                                            \
62     }                                                                                                                  \
63   }
64 
65 #define CHECK_CUDA_RET_WITH_ERROR_NOTRACE(expression, message)                           \
66   {                                                                                      \
67     cudaError_t status = (expression);                                                   \
68     if (status != cudaSuccess) {                                                         \
69       MS_LOG(ERROR) << "CUDA Error: " << message << " | Error Number: " << status << " " \
70                     << cudaGetErrorString(status);                                       \
71     }                                                                                    \
72   }
73 
74 #define CHECK_CUDA_RET_WITH_RETURN_ERROR_NOTRACE(expression, message)                    \
75   {                                                                                      \
76     cudaError_t status = (expression);                                                   \
77     if (status != cudaSuccess) {                                                         \
78       MS_LOG(ERROR) << "CUDA Error: " << message << " | Error Number: " << status << " " \
79                     << cudaGetErrorString(status);                                       \
80       return false;                                                                      \
81     }                                                                                    \
82   }
83 
84 #define CHECK_CUDA_RET_WITH_EXCEPT(node, expression, message)                                 \
85   {                                                                                           \
86     cudaError_t status = (expression);                                                        \
87     if (status != cudaSuccess) {                                                              \
88       MS_LOG(EXCEPTION) << "CUDA Error: " << message << " | Error Number: " << status << " "  \
89                         << cudaGetErrorString(status) << trace::DumpSourceLines(node.lock()); \
90     }                                                                                         \
91   }
92 
93 #define CHECK_CUDA_RET_WITH_EXCEPT_NOTRACE(expression, message)                              \
94   {                                                                                          \
95     cudaError_t status = (expression);                                                       \
96     if (status != cudaSuccess) {                                                             \
97       MS_LOG(EXCEPTION) << "CUDA Error: " << message << " | Error Number: " << status << " " \
98                         << cudaGetErrorString(status);                                       \
99     }                                                                                        \
100   }
101 
102 #define CHECK_CUDNN_RET_WITH_EXCEPT(node, expression, message)                                 \
103   {                                                                                            \
104     cudnnStatus_t status = (expression);                                                       \
105     if (status != CUDNN_STATUS_SUCCESS) {                                                      \
106       MS_LOG(EXCEPTION) << "cuDNN Error: " << message << " | Error Number: " << status << " "  \
107                         << cudnnGetErrorString(status) << trace::DumpSourceLines(node.lock()); \
108     }                                                                                          \
109   }
110 
111 #define CHECK_CUDNN_RET_WITH_EXCEPT_NOTRACE(expression, message)                              \
112   {                                                                                           \
113     cudnnStatus_t status = (expression);                                                      \
114     if (status != CUDNN_STATUS_SUCCESS) {                                                     \
115       MS_LOG(EXCEPTION) << "cuDNN Error: " << message << " | Error Number: " << status << " " \
116                         << cudnnGetErrorString(status);                                       \
117     }                                                                                         \
118   }
119 
120 #define CHECK_CUDNN_RET_WITH_ERROR_NOTRACE(expression, message)                           \
121   {                                                                                       \
122     cudnnStatus_t status = (expression);                                                  \
123     if (status != CUDNN_STATUS_SUCCESS) {                                                 \
124       MS_LOG(ERROR) << "cuDNN Error: " << message << " | Error Number: " << status << " " \
125                     << cudnnGetErrorString(status);                                       \
126     }                                                                                     \
127   }
128 
129 #define CHECK_CUDNN_RET_WITH_ERROR(node, expression, message)                              \
130   {                                                                                        \
131     cudnnStatus_t status = (expression);                                                   \
132     if (status != CUDNN_STATUS_SUCCESS) {                                                  \
133       MS_LOG(ERROR) << "cuDNN Error: " << message << " | Error Number: " << status << " "  \
134                     << cudnnGetErrorString(status) << trace::DumpSourceLines(node.lock()); \
135     }                                                                                      \
136   }
137 
138 #define CHECK_CUBLAS_RET_WITH_EXCEPT_NOTRACE(expression, message)                        \
139   {                                                                                      \
140     cublasStatus_t status = (expression);                                                \
141     if (status != CUBLAS_STATUS_SUCCESS) {                                               \
142       MS_LOG(EXCEPTION) << "cuBLAS Error: " << message << " | Error Number: " << status; \
143     }                                                                                    \
144   }
145 
146 #define CHECK_CUBLAS_RET_WITH_EXCEPT(node, expression, message)                         \
147   {                                                                                     \
148     cublasStatus_t status = (expression);                                               \
149     if (status != CUBLAS_STATUS_SUCCESS) {                                              \
150       MS_LOG(EXCEPTION) << "cuBLAS Error: " << message << " | Error Number: " << status \
151                         << trace::DumpSourceLines(node.lock());                         \
152     }                                                                                   \
153   }
154 
155 #define CHECK_CUBLAS_RET_WITH_ERROR(expression, message)                             \
156   {                                                                                  \
157     cublasStatus_t status = (expression);                                            \
158     if (status != CUBLAS_STATUS_SUCCESS) {                                           \
159       MS_LOG(ERROR) << "cuBLAS Error: " << message << " | Error Number: " << status; \
160     }                                                                                \
161   }
162 
163 #define CHECK_CUSOLVER_RET_WITH_EXCEPT_NOTRACE(expression, message)                        \
164   {                                                                                        \
165     cusolverStatus_t status = (expression);                                                \
166     if (status != CUSOLVER_STATUS_SUCCESS) {                                               \
167       MS_LOG(EXCEPTION) << "cusolver Error: " << message << " | Error Number: " << status; \
168     }                                                                                      \
169   }
170 
171 #define CHECK_CUSOLVER_RET_WITH_EXCEPT(node, expression, message)                         \
172   {                                                                                       \
173     cusolverStatus_t status = (expression);                                               \
174     if (status != CUSOLVER_STATUS_SUCCESS) {                                              \
175       MS_LOG(EXCEPTION) << "cusolver Error: " << message << " | Error Number: " << status \
176                         << trace::DumpSourceLines(node.lock());                           \
177       ;                                                                                   \
178     }                                                                                     \
179   }
180 
181 #define CHECK_CUSOLVER_RET_WITH_ERROR(expression, message)                             \
182   {                                                                                    \
183     cusolverStatus_t status = (expression);                                            \
184     if (status != CUSOLVER_STATUS_SUCCESS) {                                           \
185       MS_LOG(ERROR) << "cusolver Error: " << message << " | Error Number: " << status; \
186     }                                                                                  \
187   }
188 
189 #define CHECK_NCCL_RET_WITH_EXCEPT(node, expression, message)                         \
190   {                                                                                   \
191     int result = (expression);                                                        \
192     if (result != ncclSuccess) {                                                      \
193       MS_LOG(EXCEPTION) << "NCCL Error: " << message << " | Error Number: " << result \
194                         << trace::DumpSourceLines(node.lock());                       \
195     }                                                                                 \
196   }
197 
198 #define VARIABLE_NOT_USED(var) \
199   { (void)(var); }
200 
CheckNullInput(const std::vector<size_t> & input_shape)201 inline bool CheckNullInput(const std::vector<size_t> &input_shape) {
202   // If input_shape.size() == 0, it means a scalar input; If input_shape.size() != 0 and input_shape contains 0,
203   // it means a null input. Just return a null output.
204   if (input_shape.size() != 0) {
205     if (std::any_of(input_shape.begin(), input_shape.end(), [](size_t i) { return i == 0; })) {
206       return true;
207     }
208   }
209   return false;
210 }
211 #define CHECK_NULL_INPUT(input_shape) mindspore::device::gpu::CheckNullInput(input_shape)
212 
CurandGetErrorString(curandStatus_t status)213 inline const char *CurandGetErrorString(curandStatus_t status) {
214   switch (status) {
215     case CURAND_STATUS_VERSION_MISMATCH:
216       return "Header file and linked library version do not match.";
217     case CURAND_STATUS_NOT_INITIALIZED:
218       return "Generator not initialized.";
219     case CURAND_STATUS_ALLOCATION_FAILED:
220       return "Memory allocation failed.";
221     case CURAND_STATUS_TYPE_ERROR:
222       return "Generator is wrong type.";
223     case CURAND_STATUS_OUT_OF_RANGE:
224       return "Argument out of range.";
225     case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
226       return "Length requested is not a multiple of dimension.";
227     case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
228       return "GPU does not have double precision required by MRG32k3a.";
229     case CURAND_STATUS_LAUNCH_FAILURE:
230       return "Kernel launch failure.";
231     case CURAND_STATUS_PREEXISTING_FAILURE:
232       return "Preexisting failure on library entry.";
233     case CURAND_STATUS_INITIALIZATION_FAILED:
234       return "Initialization of CUDA failed.";
235     case CURAND_STATUS_ARCH_MISMATCH:
236       return "Architecture mismatch, GPU does not support requested feature.";
237     case CURAND_STATUS_INTERNAL_ERROR:
238       return "Internal library error.";
239     default:
240       return "Unknown the curandStatus.";
241   }
242 }
243 
244 #define CHECK_CURAND_RET_WITH_EXCEPT(expression, message)                                           \
245   {                                                                                                 \
246     curandStatus_t status = (expression);                                                           \
247     if (status != CURAND_STATUS_SUCCESS) {                                                          \
248       MS_LOG(EXCEPTION) << "CUDA curand Error: " << message << " | curandStatus: " << status << " " \
249                         << mindspore::device::gpu::CurandGetErrorString(status);                    \
250     }                                                                                               \
251   }
252 }  // namespace gpu
253 }  // namespace device
254 }  // namespace mindspore
255 
256 #endif  // MINDSPORE_CCSRC_RUNTIME_DEVICE_GPU_GPU_COMMON_H_
257