1 /**
2 * Copyright 2020-2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "plugin/device/gpu/kernel/gpu_kernel.h"
18 #include <tuple>
19 #include <set>
20 #include <numeric>
21
22 namespace mindspore {
23 namespace kernel {
24 namespace {
CheckDeviceSm(const KernelAttr & kernel_attr)25 void CheckDeviceSm(const KernelAttr &kernel_attr) {
26 const int major_sm = GET_MAJOR_SM;
27 if (!mindspore::device::gpu::CudaCommon::GetInstance().check_sm() || major_sm >= RECOMMEND_SM) {
28 return;
29 }
30
31 for (size_t i = 0; i < kernel_attr.GetInputSize(); ++i) {
32 if (kernel_attr.GetInputAttr(i).dtype != kNumberTypeFloat16) {
33 continue;
34 }
35
36 if (major_sm < MINIUM_SM) {
37 MS_LOG(EXCEPTION) << "Half precision ops can be used on Devices which computing capacity is >= " << MINIUM_SM
38 << ", but the current device's computing capacity is " << major_sm;
39 }
40 MS_LOG(WARNING) << "It is recommended to use devices with a computing capacity >= " << RECOMMEND_SM
41 << ", but the current device's computing capacity is " << major_sm;
42 mindspore::device::gpu::CudaCommon::GetInstance().set_check_sm(false);
43 return;
44 }
45 }
46 } // namespace
47
GpuCheckSupport(const std::string & kernel_name,const KernelAttr & kernel_attr)48 bool NativeGpuKernelMod::GpuCheckSupport(const std::string &kernel_name, const KernelAttr &kernel_attr) {
49 return kernel::Factory<NativeGpuKernelMod>::Instance().Create(kernel_name)->CheckSupport(kernel_name, kernel_attr);
50 }
51
GetAllSupportedList(const std::string & kernel_name)52 std::vector<KernelAttr> NativeGpuKernelMod::GetAllSupportedList(const std::string &kernel_name) {
53 auto iter = support_map_.find(kernel_name);
54 if (iter == support_map_.end()) {
55 auto kernel_support = GetOpSupport();
56 (void)support_map_.emplace(kernel_name, kernel_support);
57 }
58 return support_map_[kernel_name];
59 }
60
CheckSupport(const std::string & kernel_name,const KernelAttr & kernel_attr_to_check)61 bool NativeGpuKernelMod::CheckSupport(const std::string &kernel_name, const KernelAttr &kernel_attr_to_check) {
62 CheckDeviceSm(kernel_attr_to_check);
63 auto kernel_attrs = GetAllSupportedList(kernel_name);
64 bool is_match;
65 std::tie(is_match, std::ignore) = MatchKernelAttr(kernel_attr_to_check, kernel_attrs);
66
67 if (kernel_attrs[0].GetSkipCheck()) {
68 is_match = true;
69 }
70 return is_match;
71 }
72
ReducePrecisionCheck(const std::string & kernel_name,const KernelAttr & kernel_attr_to_check)73 NativeGpuKernelMod::ReducePrecisonRes NativeGpuKernelMod::ReducePrecisionCheck(const std::string &kernel_name,
74 const KernelAttr &kernel_attr_to_check) {
75 std::vector<ReduceDetail> input_reduce_index;
76 std::vector<ReduceDetail> output_reduce_index;
77 std::vector<KernelAttr> kernel_attr_list = this->GetOpSupport();
78
79 const TypeId from_precision = kNumberTypeInt64;
80 const TypeId to_precision = kNumberTypeInt32;
81 for (size_t attr_index = 0; attr_index < kernel_attr_list.size(); ++attr_index) {
82 auto &cur_kernel_attr = kernel_attr_list[attr_index];
83 auto attr_size = cur_kernel_attr.GetInputSize();
84 MS_EXCEPTION_IF_ZERO("kernel attr input size", attr_size);
85 for (size_t iidx = 0; iidx < kernel_attr_to_check.GetInputSize(); iidx++) {
86 auto cur_input_attr = kernel_attr_to_check.GetInputAttr(iidx);
87 const auto &type_id = cur_input_attr.dtype;
88 if (type_id == from_precision && cur_kernel_attr.GetInputAttr(iidx % attr_size).dtype == to_precision) {
89 (void)input_reduce_index.emplace_back(iidx, from_precision, to_precision);
90 MS_LOG(INFO) << "Kernel [" << kernel_name << "] does not support int64, cast input " << iidx << " to int32.";
91 }
92 }
93 for (size_t oidx = 0; oidx < kernel_attr_to_check.GetOutputSize(); oidx++) {
94 auto cur_output_attr = kernel_attr_to_check.GetOutputAttr(oidx);
95 const auto &type_id = cur_output_attr.dtype;
96 if (type_id == from_precision && cur_kernel_attr.GetOutputAttr(oidx % attr_size).dtype == to_precision) {
97 (void)output_reduce_index.emplace_back(oidx, from_precision, to_precision);
98 MS_LOG(INFO) << "Kernel [" << kernel_name << "] does not support int64, cast output " << oidx << " to int32.";
99 }
100 }
101 }
102
103 if (input_reduce_index.empty() && output_reduce_index.empty()) {
104 return std::make_tuple(false, input_reduce_index, output_reduce_index);
105 }
106
107 auto reduce_kernel_attr = kernel_attr_to_check;
108 const size_t kTwo = 2;
109 for (const auto &reduce_item : input_reduce_index) {
110 auto reduce_idx = std::get<0>(reduce_item);
111 auto cur_attr = reduce_kernel_attr.GetInputAttr(reduce_idx);
112 reduce_kernel_attr.SetInputAttr(reduce_idx, std::get<kTwo>(reduce_item), cur_attr.format);
113 }
114 for (const auto &reduce_item : output_reduce_index) {
115 auto reduce_idx = std::get<0>(reduce_item);
116 auto cur_attr = reduce_kernel_attr.GetOutputAttr(reduce_idx);
117 reduce_kernel_attr.SetOutputAttr(reduce_idx, std::get<kTwo>(reduce_item), cur_attr.format);
118 }
119
120 MS_LOG(INFO) << "Kernel [" << kernel_name << "] reduce precision attr: " << reduce_kernel_attr;
121 return std::make_tuple(CheckSupport(kernel_name, reduce_kernel_attr), input_reduce_index, output_reduce_index);
122 }
123
124 mindspore::HashMap<std::string, std::vector<KernelAttr>> NativeGpuKernelMod::support_map_{};
125
ConvertPtrs(const std::vector<KernelTensor * > & input_ptrs)126 std::vector<void *> ConvertPtrs(const std::vector<KernelTensor *> &input_ptrs) {
127 std::vector<void *> out_ptrs;
128 std::transform(input_ptrs.begin(), input_ptrs.end(), std::back_inserter(out_ptrs),
129 [](const auto &cur_addr) { return (cur_addr == nullptr) ? nullptr : cur_addr->device_ptr(); });
130 return out_ptrs;
131 }
132
ShapeNdTo4d(const ShapeVector & src,ShapeVector * dst)133 bool ShapeNdTo4d(const ShapeVector &src, ShapeVector *dst) {
134 const size_t nd_maximum_size = 4;
135 if (src.size() > nd_maximum_size) {
136 MS_LOG(ERROR) << src.size() << "-D data is not supported!";
137 return false;
138 }
139
140 dst->push_back(src.size() < kShapeIndex4th ? 1 : src[src.size() - kShapeIndex4th]);
141 dst->push_back(src.size() < kShapeIndex3rd ? 1 : src[src.size() - kShapeIndex3rd]);
142 dst->push_back(src.size() < kShapeIndex2nd ? 1 : src[src.size() - kShapeIndex2nd]);
143 dst->push_back(src.size() == 0 ? 1 : src[src.size() - kShapeIndex1st]);
144 return true;
145 }
146
AxisTransform(const std::string & origin_data_format,const std::string & cal_format,int axis)147 int AxisTransform(const std::string &origin_data_format, const std::string &cal_format, int axis) {
148 if (((origin_data_format == kOpFormat_DEFAULT) || (origin_data_format == kOpFormat_NCHW)) &&
149 (cal_format == kOpFormat_NHWC)) {
150 return kNCHWToNHWCAxisMap[axis];
151 } else if (((cal_format == kOpFormat_DEFAULT) || (cal_format == kOpFormat_NCHW)) &&
152 (origin_data_format == kOpFormat_NHWC)) {
153 return kNHWCToNCHWAxisMap[axis];
154 } else {
155 return axis;
156 }
157 }
158
ShapeNCHW2NHWC(ShapeVector * shape)159 void ShapeNCHW2NHWC(ShapeVector *shape) {
160 std::swap((*shape)[kShapeIndex1st], (*shape)[kShapeIndex3rd]);
161 std::swap((*shape)[kShapeIndex2nd], (*shape)[kShapeIndex1st]);
162 }
163
ShapeNCDHW2NDHWC(ShapeVector * shape)164 void ShapeNCDHW2NDHWC(ShapeVector *shape) {
165 std::swap((*shape)[kShapeIndex1st], (*shape)[kShapeIndex2nd]);
166 std::swap((*shape)[kShapeIndex2nd], (*shape)[kShapeIndex3rd]);
167 std::swap((*shape)[kShapeIndex3rd], (*shape)[kShapeIndex4th]);
168 }
169
170 ////////// old: string format ///////////
SetDimA(const ShapeVector & shape,int * dimA,size_t len,const std::string & format)171 void SetDimA(const ShapeVector &shape, int *dimA, size_t len, const std::string &format) {
172 if (shape.size() != len) {
173 MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with dimA " << len << "-D.";
174 }
175 if (Anyone(format, "NCHW", "DefaultFormat", "NCDHW")) {
176 for (size_t i = 0; i < len; ++i) {
177 dimA[i] = LongToInt(shape[i]);
178 }
179 } else if (format == "NHWC") {
180 dimA[0] = LongToInt(shape[0]);
181 dimA[kShapeIndex1st] = LongToInt(shape[kShapeIndex3rd]);
182 dimA[kShapeIndex2nd] = LongToInt(shape[kShapeIndex1st]);
183 dimA[kShapeIndex3rd] = LongToInt(shape[kShapeIndex2nd]);
184 } else {
185 MS_LOG(ERROR) << "Unsupported data format " << format;
186 }
187 }
188
SetStrideA(const ShapeVector & shape,int * strideA,size_t len,const std::string & format)189 void SetStrideA(const ShapeVector &shape, int *strideA, size_t len, const std::string &format) {
190 if (shape.size() != len) {
191 MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with strideA " << len << "-D.";
192 }
193 if (Anyone(format, "NCHW", "DefaultFormat", "NCDHW")) {
194 for (size_t i = 0; i < len; ++i) {
195 strideA[i] = LongToInt(accumulate(shape.begin() + i + 1, shape.end(), 1, std::multiplies<int64_t>()));
196 }
197 } else if (format == "NHWC") {
198 strideA[0] = LongToInt(shape[kShapeIndex1st] * shape[kShapeIndex2nd] * shape[kShapeIndex3rd]);
199 strideA[1] = 1;
200 strideA[kShapeIndex2nd] = LongToInt(shape[kShapeIndex2nd] * shape[kShapeIndex3rd]);
201 strideA[kShapeIndex3rd] = LongToInt(shape[kShapeIndex3rd]);
202 } else {
203 MS_LOG(ERROR) << "Unsupported data format " << format;
204 }
205 }
206
SetNCHW(const ShapeVector & shape,int * n,int * c,int * h,int * w,const std::string & format)207 void SetNCHW(const ShapeVector &shape, int *n, int *c, int *h, int *w, const std::string &format) {
208 if (Anyone(format, "NCHW", "DefaultFormat")) {
209 *n = LongToInt(shape[0]);
210 *c = LongToInt(shape[kShapeIndex1st]);
211 *h = LongToInt(shape[kShapeIndex2nd]);
212 *w = LongToInt(shape[kShapeIndex3rd]);
213 } else if (format == "NHWC") {
214 *n = LongToInt(shape[0]);
215 *c = LongToInt(shape[kShapeIndex3rd]);
216 *h = LongToInt(shape[kShapeIndex1st]);
217 *w = LongToInt(shape[kShapeIndex2nd]);
218 } else {
219 MS_LOG(ERROR) << "Unsupported data format " << format;
220 }
221 }
222
SetNCDHW(const ShapeVector & shape,int * n,int * c,int * d,int * h,int * w,const std::string & format)223 void SetNCDHW(const ShapeVector &shape, int *n, int *c, int *d, int *h, int *w, const std::string &format) {
224 if (Anyone(format, "NCDHW", "DefaultFormat")) {
225 *n = LongToInt(shape[0]);
226 *c = LongToInt(shape[kShapeIndex1st]);
227 *d = LongToInt(shape[kShapeIndex2nd]);
228 *h = LongToInt(shape[kShapeIndex3rd]);
229 *w = LongToInt(shape[kShapeIndex4th]);
230 } else if (format == "NDHWC") {
231 *n = LongToInt(shape[0]);
232 *c = LongToInt(shape[kShapeIndex4th]);
233 *d = LongToInt(shape[kShapeIndex1st]);
234 *h = LongToInt(shape[kShapeIndex2nd]);
235 *w = LongToInt(shape[kShapeIndex3rd]);
236 } else {
237 MS_LOG(ERROR) << "Unsupported data format " << format;
238 }
239 }
240 ////////////////////////////////////////
241 ////////// new: enum format ///////////
SetDimA(const ShapeVector & shape,int * dimA,size_t len,const mindspore::Format & format)242 void SetDimA(const ShapeVector &shape, int *dimA, size_t len, const mindspore::Format &format) {
243 if (shape.size() != len) {
244 MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with dimA " << len << "-D.";
245 }
246 if (Anyone(format, mindspore::Format::NCHW, mindspore::Format::DEFAULT_FORMAT, mindspore::Format::NCDHW)) {
247 for (size_t i = 0; i < len; ++i) {
248 dimA[i] = LongToInt(shape[i]);
249 }
250 } else if (format == mindspore::Format::NHWC) {
251 dimA[0] = LongToInt(shape[0]);
252 dimA[kShapeIndex1st] = LongToInt(shape[kShapeIndex3rd]);
253 dimA[kShapeIndex2nd] = LongToInt(shape[kShapeIndex1st]);
254 dimA[kShapeIndex3rd] = LongToInt(shape[kShapeIndex2nd]);
255 } else {
256 MS_LOG(ERROR) << "Unsupported data format " << mindspore::FormatEnumToString(format);
257 }
258 }
259
SetStrideA(const ShapeVector & shape,int * strideA,size_t len,const mindspore::Format & format)260 void SetStrideA(const ShapeVector &shape, int *strideA, size_t len, const mindspore::Format &format) {
261 if (shape.size() != len) {
262 MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with strideA " << len << "-D.";
263 }
264 if (Anyone(format, mindspore::Format::NCHW, mindspore::Format::DEFAULT_FORMAT, mindspore::Format::NCDHW)) {
265 for (size_t i = 0; i < len; ++i) {
266 strideA[i] = LongToInt(accumulate(shape.begin() + i + 1, shape.end(), 1, std::multiplies<int64_t>()));
267 }
268 } else if (format == mindspore::Format::NHWC) {
269 strideA[0] = LongToInt(shape[kShapeIndex1st] * shape[kShapeIndex2nd] * shape[kShapeIndex3rd]);
270 strideA[1] = 1;
271 strideA[kShapeIndex2nd] = LongToInt(shape[kShapeIndex2nd] * shape[kShapeIndex3rd]);
272 strideA[kShapeIndex3rd] = LongToInt(shape[kShapeIndex3rd]);
273 } else {
274 MS_LOG(ERROR) << "Unsupported data format " << mindspore::FormatEnumToString(format);
275 }
276 }
277
SetNCHW(const ShapeVector & shape,int * n,int * c,int * h,int * w,const mindspore::Format & format)278 void SetNCHW(const ShapeVector &shape, int *n, int *c, int *h, int *w, const mindspore::Format &format) {
279 if (Anyone(format, mindspore::Format::NCHW, mindspore::Format::DEFAULT_FORMAT)) {
280 *n = LongToInt(shape[0]);
281 *c = LongToInt(shape[kShapeIndex1st]);
282 *h = LongToInt(shape[kShapeIndex2nd]);
283 *w = LongToInt(shape[kShapeIndex3rd]);
284 } else if (format == mindspore::Format::NHWC) {
285 *n = LongToInt(shape[0]);
286 *c = LongToInt(shape[kShapeIndex3rd]);
287 *h = LongToInt(shape[kShapeIndex1st]);
288 *w = LongToInt(shape[kShapeIndex2nd]);
289 } else {
290 MS_LOG(ERROR) << "Unsupported data format " << mindspore::FormatEnumToString(format);
291 }
292 }
293
SetNCDHW(const ShapeVector & shape,int * n,int * c,int * d,int * h,int * w,const mindspore::Format & format)294 void SetNCDHW(const ShapeVector &shape, int *n, int *c, int *d, int *h, int *w, const mindspore::Format &format) {
295 if (Anyone(format, mindspore::Format::NCDHW, mindspore::Format::DEFAULT_FORMAT)) {
296 *n = LongToInt(shape[0]);
297 *c = LongToInt(shape[kShapeIndex1st]);
298 *d = LongToInt(shape[kShapeIndex2nd]);
299 *h = LongToInt(shape[kShapeIndex3rd]);
300 *w = LongToInt(shape[kShapeIndex4th]);
301 } else if (format == mindspore::Format::NDHWC) {
302 *n = LongToInt(shape[0]);
303 *c = LongToInt(shape[kShapeIndex4th]);
304 *d = LongToInt(shape[kShapeIndex1st]);
305 *h = LongToInt(shape[kShapeIndex2nd]);
306 *w = LongToInt(shape[kShapeIndex3rd]);
307 } else {
308 MS_LOG(ERROR) << "Unsupported data format " << mindspore::FormatEnumToString(format);
309 }
310 }
311 ///////////////////////////////////////
312
CheckBroadcast4TensorOp(const std::vector<int> & A,const std::vector<int> & B,const std::vector<int> & Out)313 bool CheckBroadcast4TensorOp(const std::vector<int> &A, const std::vector<int> &B, const std::vector<int> &Out) {
314 if (A != Out && B != Out) {
315 MS_LOG(ERROR) << "Double-sided broadcast was not supported in cudnn of cudnnOpTensor:\n"
316 "InputA must match the corresponding dimension of the destination tensor outC, and each "
317 "dimension of the inputB "
318 "must match the corresponding dimension of outC or must be equal to 1.";
319 return false;
320 }
321 return true;
322 }
323
CheckTensorSize(const std::initializer_list<ShapeVector> & shapes)324 bool CheckTensorSize(const std::initializer_list<ShapeVector> &shapes) {
325 for (auto shape : shapes) {
326 int64_t total_size = std::accumulate(shape.begin(), shape.end(), int64_t(1), std::multiplies<int64_t>());
327 if (total_size >= SHAPE_SIZE_LIMIT) {
328 MS_LOG(ERROR) << "The total size of the tensor exceeds the max_limit of 2 Giga-elements, which is " << total_size
329 << " elements (" << shape << ").";
330 return false;
331 }
332 }
333 return true;
334 }
335
CudnnSetTensorNdDescriptor(const ShapeVector & shape,cudnnTensorDescriptor_t descriptor,cudnnDataType_t data_type,const std::string & node_name)336 bool CudnnSetTensorNdDescriptor(const ShapeVector &shape, cudnnTensorDescriptor_t descriptor, cudnnDataType_t data_type,
337 const std::string &node_name) {
338 if (shape.size() < 3) {
339 MS_LOG(ERROR) << "cudnnSetTensorNdDescriptor don't support" << shape.size() << "D.";
340 return false;
341 }
342 const int nbDims = shape.size();
343 std::unique_ptr<int[]> dim = std::make_unique<int[]>(nbDims);
344 std::unique_ptr<int[]> stride = std::make_unique<int[]>(nbDims);
345
346 for (int i = 0; i < nbDims; i++) {
347 dim[i] = LongToInt(shape[i]);
348 stride[i] = 1;
349 }
350
351 for (int i = nbDims - 2; i >= 0; i--) {
352 stride[i] = stride[i + 1] * LongToInt(shape[i + 1]);
353 }
354
355 cudnnStatus_t status = cudnnSetTensorNdDescriptor(descriptor, data_type, nbDims, dim.get(), stride.get());
356 if (status != CUDNN_STATUS_SUCCESS) {
357 MS_LOG(ERROR) << "cuDNN Error: cudnnSetTensorNdDescriptor failed | Error Number: " << status << " "
358 << cudnnGetErrorString(status);
359 return false;
360 }
361 return true;
362 }
363
GetCudnnDataType(const std::string & Type,cudnnDataType_t * out_type)364 bool GetCudnnDataType(const std::string &Type, cudnnDataType_t *out_type) {
365 auto type = kCudnnDtypeMap.find(Type);
366 if (type == kCudnnDtypeMap.end()) {
367 MS_LOG(ERROR) << Type << " is not supported.";
368 return false;
369 }
370 *out_type = type->second;
371 return true;
372 }
373
GetCudaDataType(const std::string & Type,cudaDataType_t * out_type)374 bool GetCudaDataType(const std::string &Type, cudaDataType_t *out_type) {
375 auto type = kCudaDtypeMap.find(Type);
376 if (type == kCudaDtypeMap.end()) {
377 MS_LOG(ERROR) << Type << " is not supported.";
378 return false;
379 }
380 *out_type = type->second;
381 return true;
382 }
383
ShapeEqual(const ShapeVector & s1,const ShapeVector & s2)384 bool ShapeEqual(const ShapeVector &s1, const ShapeVector &s2) {
385 return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end());
386 }
387 } // namespace kernel
388 } // namespace mindspore
389