• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "plugin/device/gpu/kernel/gpu_kernel.h"
18 #include <tuple>
19 #include <set>
20 #include <numeric>
21 
22 namespace mindspore {
23 namespace kernel {
24 namespace {
CheckDeviceSm(const KernelAttr & kernel_attr)25 void CheckDeviceSm(const KernelAttr &kernel_attr) {
26   const int major_sm = GET_MAJOR_SM;
27   if (!mindspore::device::gpu::CudaCommon::GetInstance().check_sm() || major_sm >= RECOMMEND_SM) {
28     return;
29   }
30 
31   for (size_t i = 0; i < kernel_attr.GetInputSize(); ++i) {
32     if (kernel_attr.GetInputAttr(i).dtype != kNumberTypeFloat16) {
33       continue;
34     }
35 
36     if (major_sm < MINIUM_SM) {
37       MS_LOG(EXCEPTION) << "Half precision ops can be used on Devices which computing capacity is >= " << MINIUM_SM
38                         << ", but the current device's computing capacity is " << major_sm;
39     }
40     MS_LOG(WARNING) << "It is recommended to use devices with a computing capacity >= " << RECOMMEND_SM
41                     << ", but the current device's computing capacity is " << major_sm;
42     mindspore::device::gpu::CudaCommon::GetInstance().set_check_sm(false);
43     return;
44   }
45 }
46 }  // namespace
47 
GpuCheckSupport(const std::string & kernel_name,const KernelAttr & kernel_attr)48 bool NativeGpuKernelMod::GpuCheckSupport(const std::string &kernel_name, const KernelAttr &kernel_attr) {
49   return kernel::Factory<NativeGpuKernelMod>::Instance().Create(kernel_name)->CheckSupport(kernel_name, kernel_attr);
50 }
51 
GetAllSupportedList(const std::string & kernel_name)52 std::vector<KernelAttr> NativeGpuKernelMod::GetAllSupportedList(const std::string &kernel_name) {
53   auto iter = support_map_.find(kernel_name);
54   if (iter == support_map_.end()) {
55     auto kernel_support = GetOpSupport();
56     (void)support_map_.emplace(kernel_name, kernel_support);
57   }
58   return support_map_[kernel_name];
59 }
60 
CheckSupport(const std::string & kernel_name,const KernelAttr & kernel_attr_to_check)61 bool NativeGpuKernelMod::CheckSupport(const std::string &kernel_name, const KernelAttr &kernel_attr_to_check) {
62   CheckDeviceSm(kernel_attr_to_check);
63   auto kernel_attrs = GetAllSupportedList(kernel_name);
64   bool is_match;
65   std::tie(is_match, std::ignore) = MatchKernelAttr(kernel_attr_to_check, kernel_attrs);
66 
67   if (kernel_attrs[0].GetSkipCheck()) {
68     is_match = true;
69   }
70   return is_match;
71 }
72 
ReducePrecisionCheck(const std::string & kernel_name,const KernelAttr & kernel_attr_to_check)73 NativeGpuKernelMod::ReducePrecisonRes NativeGpuKernelMod::ReducePrecisionCheck(const std::string &kernel_name,
74                                                                                const KernelAttr &kernel_attr_to_check) {
75   std::vector<ReduceDetail> input_reduce_index;
76   std::vector<ReduceDetail> output_reduce_index;
77   std::vector<KernelAttr> kernel_attr_list = this->GetOpSupport();
78 
79   const TypeId from_precision = kNumberTypeInt64;
80   const TypeId to_precision = kNumberTypeInt32;
81   for (size_t attr_index = 0; attr_index < kernel_attr_list.size(); ++attr_index) {
82     auto &cur_kernel_attr = kernel_attr_list[attr_index];
83     auto attr_size = cur_kernel_attr.GetInputSize();
84     MS_EXCEPTION_IF_ZERO("kernel attr input size", attr_size);
85     for (size_t iidx = 0; iidx < kernel_attr_to_check.GetInputSize(); iidx++) {
86       auto cur_input_attr = kernel_attr_to_check.GetInputAttr(iidx);
87       const auto &type_id = cur_input_attr.dtype;
88       if (type_id == from_precision && cur_kernel_attr.GetInputAttr(iidx % attr_size).dtype == to_precision) {
89         (void)input_reduce_index.emplace_back(iidx, from_precision, to_precision);
90         MS_LOG(INFO) << "Kernel [" << kernel_name << "] does not support int64, cast input " << iidx << " to int32.";
91       }
92     }
93     for (size_t oidx = 0; oidx < kernel_attr_to_check.GetOutputSize(); oidx++) {
94       auto cur_output_attr = kernel_attr_to_check.GetOutputAttr(oidx);
95       const auto &type_id = cur_output_attr.dtype;
96       if (type_id == from_precision && cur_kernel_attr.GetOutputAttr(oidx % attr_size).dtype == to_precision) {
97         (void)output_reduce_index.emplace_back(oidx, from_precision, to_precision);
98         MS_LOG(INFO) << "Kernel [" << kernel_name << "] does not support int64, cast output " << oidx << " to int32.";
99       }
100     }
101   }
102 
103   if (input_reduce_index.empty() && output_reduce_index.empty()) {
104     return std::make_tuple(false, input_reduce_index, output_reduce_index);
105   }
106 
107   auto reduce_kernel_attr = kernel_attr_to_check;
108   const size_t kTwo = 2;
109   for (const auto &reduce_item : input_reduce_index) {
110     auto reduce_idx = std::get<0>(reduce_item);
111     auto cur_attr = reduce_kernel_attr.GetInputAttr(reduce_idx);
112     reduce_kernel_attr.SetInputAttr(reduce_idx, std::get<kTwo>(reduce_item), cur_attr.format);
113   }
114   for (const auto &reduce_item : output_reduce_index) {
115     auto reduce_idx = std::get<0>(reduce_item);
116     auto cur_attr = reduce_kernel_attr.GetOutputAttr(reduce_idx);
117     reduce_kernel_attr.SetOutputAttr(reduce_idx, std::get<kTwo>(reduce_item), cur_attr.format);
118   }
119 
120   MS_LOG(INFO) << "Kernel [" << kernel_name << "] reduce precision attr: " << reduce_kernel_attr;
121   return std::make_tuple(CheckSupport(kernel_name, reduce_kernel_attr), input_reduce_index, output_reduce_index);
122 }
123 
124 mindspore::HashMap<std::string, std::vector<KernelAttr>> NativeGpuKernelMod::support_map_{};
125 
ConvertPtrs(const std::vector<KernelTensor * > & input_ptrs)126 std::vector<void *> ConvertPtrs(const std::vector<KernelTensor *> &input_ptrs) {
127   std::vector<void *> out_ptrs;
128   std::transform(input_ptrs.begin(), input_ptrs.end(), std::back_inserter(out_ptrs),
129                  [](const auto &cur_addr) { return (cur_addr == nullptr) ? nullptr : cur_addr->device_ptr(); });
130   return out_ptrs;
131 }
132 
ShapeNdTo4d(const ShapeVector & src,ShapeVector * dst)133 bool ShapeNdTo4d(const ShapeVector &src, ShapeVector *dst) {
134   const size_t nd_maximum_size = 4;
135   if (src.size() > nd_maximum_size) {
136     MS_LOG(ERROR) << src.size() << "-D data is not supported!";
137     return false;
138   }
139 
140   dst->push_back(src.size() < kShapeIndex4th ? 1 : src[src.size() - kShapeIndex4th]);
141   dst->push_back(src.size() < kShapeIndex3rd ? 1 : src[src.size() - kShapeIndex3rd]);
142   dst->push_back(src.size() < kShapeIndex2nd ? 1 : src[src.size() - kShapeIndex2nd]);
143   dst->push_back(src.size() == 0 ? 1 : src[src.size() - kShapeIndex1st]);
144   return true;
145 }
146 
AxisTransform(const std::string & origin_data_format,const std::string & cal_format,int axis)147 int AxisTransform(const std::string &origin_data_format, const std::string &cal_format, int axis) {
148   if (((origin_data_format == kOpFormat_DEFAULT) || (origin_data_format == kOpFormat_NCHW)) &&
149       (cal_format == kOpFormat_NHWC)) {
150     return kNCHWToNHWCAxisMap[axis];
151   } else if (((cal_format == kOpFormat_DEFAULT) || (cal_format == kOpFormat_NCHW)) &&
152              (origin_data_format == kOpFormat_NHWC)) {
153     return kNHWCToNCHWAxisMap[axis];
154   } else {
155     return axis;
156   }
157 }
158 
ShapeNCHW2NHWC(ShapeVector * shape)159 void ShapeNCHW2NHWC(ShapeVector *shape) {
160   std::swap((*shape)[kShapeIndex1st], (*shape)[kShapeIndex3rd]);
161   std::swap((*shape)[kShapeIndex2nd], (*shape)[kShapeIndex1st]);
162 }
163 
ShapeNCDHW2NDHWC(ShapeVector * shape)164 void ShapeNCDHW2NDHWC(ShapeVector *shape) {
165   std::swap((*shape)[kShapeIndex1st], (*shape)[kShapeIndex2nd]);
166   std::swap((*shape)[kShapeIndex2nd], (*shape)[kShapeIndex3rd]);
167   std::swap((*shape)[kShapeIndex3rd], (*shape)[kShapeIndex4th]);
168 }
169 
170 ////////// old: string format ///////////
SetDimA(const ShapeVector & shape,int * dimA,size_t len,const std::string & format)171 void SetDimA(const ShapeVector &shape, int *dimA, size_t len, const std::string &format) {
172   if (shape.size() != len) {
173     MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with dimA " << len << "-D.";
174   }
175   if (Anyone(format, "NCHW", "DefaultFormat", "NCDHW")) {
176     for (size_t i = 0; i < len; ++i) {
177       dimA[i] = LongToInt(shape[i]);
178     }
179   } else if (format == "NHWC") {
180     dimA[0] = LongToInt(shape[0]);
181     dimA[kShapeIndex1st] = LongToInt(shape[kShapeIndex3rd]);
182     dimA[kShapeIndex2nd] = LongToInt(shape[kShapeIndex1st]);
183     dimA[kShapeIndex3rd] = LongToInt(shape[kShapeIndex2nd]);
184   } else {
185     MS_LOG(ERROR) << "Unsupported data format " << format;
186   }
187 }
188 
SetStrideA(const ShapeVector & shape,int * strideA,size_t len,const std::string & format)189 void SetStrideA(const ShapeVector &shape, int *strideA, size_t len, const std::string &format) {
190   if (shape.size() != len) {
191     MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with strideA " << len << "-D.";
192   }
193   if (Anyone(format, "NCHW", "DefaultFormat", "NCDHW")) {
194     for (size_t i = 0; i < len; ++i) {
195       strideA[i] = LongToInt(accumulate(shape.begin() + i + 1, shape.end(), 1, std::multiplies<int64_t>()));
196     }
197   } else if (format == "NHWC") {
198     strideA[0] = LongToInt(shape[kShapeIndex1st] * shape[kShapeIndex2nd] * shape[kShapeIndex3rd]);
199     strideA[1] = 1;
200     strideA[kShapeIndex2nd] = LongToInt(shape[kShapeIndex2nd] * shape[kShapeIndex3rd]);
201     strideA[kShapeIndex3rd] = LongToInt(shape[kShapeIndex3rd]);
202   } else {
203     MS_LOG(ERROR) << "Unsupported data format " << format;
204   }
205 }
206 
SetNCHW(const ShapeVector & shape,int * n,int * c,int * h,int * w,const std::string & format)207 void SetNCHW(const ShapeVector &shape, int *n, int *c, int *h, int *w, const std::string &format) {
208   if (Anyone(format, "NCHW", "DefaultFormat")) {
209     *n = LongToInt(shape[0]);
210     *c = LongToInt(shape[kShapeIndex1st]);
211     *h = LongToInt(shape[kShapeIndex2nd]);
212     *w = LongToInt(shape[kShapeIndex3rd]);
213   } else if (format == "NHWC") {
214     *n = LongToInt(shape[0]);
215     *c = LongToInt(shape[kShapeIndex3rd]);
216     *h = LongToInt(shape[kShapeIndex1st]);
217     *w = LongToInt(shape[kShapeIndex2nd]);
218   } else {
219     MS_LOG(ERROR) << "Unsupported data format " << format;
220   }
221 }
222 
SetNCDHW(const ShapeVector & shape,int * n,int * c,int * d,int * h,int * w,const std::string & format)223 void SetNCDHW(const ShapeVector &shape, int *n, int *c, int *d, int *h, int *w, const std::string &format) {
224   if (Anyone(format, "NCDHW", "DefaultFormat")) {
225     *n = LongToInt(shape[0]);
226     *c = LongToInt(shape[kShapeIndex1st]);
227     *d = LongToInt(shape[kShapeIndex2nd]);
228     *h = LongToInt(shape[kShapeIndex3rd]);
229     *w = LongToInt(shape[kShapeIndex4th]);
230   } else if (format == "NDHWC") {
231     *n = LongToInt(shape[0]);
232     *c = LongToInt(shape[kShapeIndex4th]);
233     *d = LongToInt(shape[kShapeIndex1st]);
234     *h = LongToInt(shape[kShapeIndex2nd]);
235     *w = LongToInt(shape[kShapeIndex3rd]);
236   } else {
237     MS_LOG(ERROR) << "Unsupported data format " << format;
238   }
239 }
240 ////////////////////////////////////////
241 ////////// new: enum format ///////////
SetDimA(const ShapeVector & shape,int * dimA,size_t len,const mindspore::Format & format)242 void SetDimA(const ShapeVector &shape, int *dimA, size_t len, const mindspore::Format &format) {
243   if (shape.size() != len) {
244     MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with dimA " << len << "-D.";
245   }
246   if (Anyone(format, mindspore::Format::NCHW, mindspore::Format::DEFAULT_FORMAT, mindspore::Format::NCDHW)) {
247     for (size_t i = 0; i < len; ++i) {
248       dimA[i] = LongToInt(shape[i]);
249     }
250   } else if (format == mindspore::Format::NHWC) {
251     dimA[0] = LongToInt(shape[0]);
252     dimA[kShapeIndex1st] = LongToInt(shape[kShapeIndex3rd]);
253     dimA[kShapeIndex2nd] = LongToInt(shape[kShapeIndex1st]);
254     dimA[kShapeIndex3rd] = LongToInt(shape[kShapeIndex2nd]);
255   } else {
256     MS_LOG(ERROR) << "Unsupported data format " << mindspore::FormatEnumToString(format);
257   }
258 }
259 
SetStrideA(const ShapeVector & shape,int * strideA,size_t len,const mindspore::Format & format)260 void SetStrideA(const ShapeVector &shape, int *strideA, size_t len, const mindspore::Format &format) {
261   if (shape.size() != len) {
262     MS_EXCEPTION(ValueError) << "Invalid size of input shape " << shape.size() << "-D with strideA " << len << "-D.";
263   }
264   if (Anyone(format, mindspore::Format::NCHW, mindspore::Format::DEFAULT_FORMAT, mindspore::Format::NCDHW)) {
265     for (size_t i = 0; i < len; ++i) {
266       strideA[i] = LongToInt(accumulate(shape.begin() + i + 1, shape.end(), 1, std::multiplies<int64_t>()));
267     }
268   } else if (format == mindspore::Format::NHWC) {
269     strideA[0] = LongToInt(shape[kShapeIndex1st] * shape[kShapeIndex2nd] * shape[kShapeIndex3rd]);
270     strideA[1] = 1;
271     strideA[kShapeIndex2nd] = LongToInt(shape[kShapeIndex2nd] * shape[kShapeIndex3rd]);
272     strideA[kShapeIndex3rd] = LongToInt(shape[kShapeIndex3rd]);
273   } else {
274     MS_LOG(ERROR) << "Unsupported data format " << mindspore::FormatEnumToString(format);
275   }
276 }
277 
SetNCHW(const ShapeVector & shape,int * n,int * c,int * h,int * w,const mindspore::Format & format)278 void SetNCHW(const ShapeVector &shape, int *n, int *c, int *h, int *w, const mindspore::Format &format) {
279   if (Anyone(format, mindspore::Format::NCHW, mindspore::Format::DEFAULT_FORMAT)) {
280     *n = LongToInt(shape[0]);
281     *c = LongToInt(shape[kShapeIndex1st]);
282     *h = LongToInt(shape[kShapeIndex2nd]);
283     *w = LongToInt(shape[kShapeIndex3rd]);
284   } else if (format == mindspore::Format::NHWC) {
285     *n = LongToInt(shape[0]);
286     *c = LongToInt(shape[kShapeIndex3rd]);
287     *h = LongToInt(shape[kShapeIndex1st]);
288     *w = LongToInt(shape[kShapeIndex2nd]);
289   } else {
290     MS_LOG(ERROR) << "Unsupported data format " << mindspore::FormatEnumToString(format);
291   }
292 }
293 
SetNCDHW(const ShapeVector & shape,int * n,int * c,int * d,int * h,int * w,const mindspore::Format & format)294 void SetNCDHW(const ShapeVector &shape, int *n, int *c, int *d, int *h, int *w, const mindspore::Format &format) {
295   if (Anyone(format, mindspore::Format::NCDHW, mindspore::Format::DEFAULT_FORMAT)) {
296     *n = LongToInt(shape[0]);
297     *c = LongToInt(shape[kShapeIndex1st]);
298     *d = LongToInt(shape[kShapeIndex2nd]);
299     *h = LongToInt(shape[kShapeIndex3rd]);
300     *w = LongToInt(shape[kShapeIndex4th]);
301   } else if (format == mindspore::Format::NDHWC) {
302     *n = LongToInt(shape[0]);
303     *c = LongToInt(shape[kShapeIndex4th]);
304     *d = LongToInt(shape[kShapeIndex1st]);
305     *h = LongToInt(shape[kShapeIndex2nd]);
306     *w = LongToInt(shape[kShapeIndex3rd]);
307   } else {
308     MS_LOG(ERROR) << "Unsupported data format " << mindspore::FormatEnumToString(format);
309   }
310 }
311 ///////////////////////////////////////
312 
CheckBroadcast4TensorOp(const std::vector<int> & A,const std::vector<int> & B,const std::vector<int> & Out)313 bool CheckBroadcast4TensorOp(const std::vector<int> &A, const std::vector<int> &B, const std::vector<int> &Out) {
314   if (A != Out && B != Out) {
315     MS_LOG(ERROR) << "Double-sided broadcast was not supported in cudnn of cudnnOpTensor:\n"
316                      "InputA must match the corresponding dimension of the destination tensor outC, and each "
317                      "dimension of the inputB "
318                      "must match the corresponding dimension of outC or must be equal to 1.";
319     return false;
320   }
321   return true;
322 }
323 
CheckTensorSize(const std::initializer_list<ShapeVector> & shapes)324 bool CheckTensorSize(const std::initializer_list<ShapeVector> &shapes) {
325   for (auto shape : shapes) {
326     int64_t total_size = std::accumulate(shape.begin(), shape.end(), int64_t(1), std::multiplies<int64_t>());
327     if (total_size >= SHAPE_SIZE_LIMIT) {
328       MS_LOG(ERROR) << "The total size of the tensor exceeds the max_limit of 2 Giga-elements, which is " << total_size
329                     << " elements (" << shape << ").";
330       return false;
331     }
332   }
333   return true;
334 }
335 
CudnnSetTensorNdDescriptor(const ShapeVector & shape,cudnnTensorDescriptor_t descriptor,cudnnDataType_t data_type,const std::string & node_name)336 bool CudnnSetTensorNdDescriptor(const ShapeVector &shape, cudnnTensorDescriptor_t descriptor, cudnnDataType_t data_type,
337                                 const std::string &node_name) {
338   if (shape.size() < 3) {
339     MS_LOG(ERROR) << "cudnnSetTensorNdDescriptor don't support" << shape.size() << "D.";
340     return false;
341   }
342   const int nbDims = shape.size();
343   std::unique_ptr<int[]> dim = std::make_unique<int[]>(nbDims);
344   std::unique_ptr<int[]> stride = std::make_unique<int[]>(nbDims);
345 
346   for (int i = 0; i < nbDims; i++) {
347     dim[i] = LongToInt(shape[i]);
348     stride[i] = 1;
349   }
350 
351   for (int i = nbDims - 2; i >= 0; i--) {
352     stride[i] = stride[i + 1] * LongToInt(shape[i + 1]);
353   }
354 
355   cudnnStatus_t status = cudnnSetTensorNdDescriptor(descriptor, data_type, nbDims, dim.get(), stride.get());
356   if (status != CUDNN_STATUS_SUCCESS) {
357     MS_LOG(ERROR) << "cuDNN Error: cudnnSetTensorNdDescriptor failed | Error Number: " << status << " "
358                   << cudnnGetErrorString(status);
359     return false;
360   }
361   return true;
362 }
363 
GetCudnnDataType(const std::string & Type,cudnnDataType_t * out_type)364 bool GetCudnnDataType(const std::string &Type, cudnnDataType_t *out_type) {
365   auto type = kCudnnDtypeMap.find(Type);
366   if (type == kCudnnDtypeMap.end()) {
367     MS_LOG(ERROR) << Type << " is not supported.";
368     return false;
369   }
370   *out_type = type->second;
371   return true;
372 }
373 
GetCudaDataType(const std::string & Type,cudaDataType_t * out_type)374 bool GetCudaDataType(const std::string &Type, cudaDataType_t *out_type) {
375   auto type = kCudaDtypeMap.find(Type);
376   if (type == kCudaDtypeMap.end()) {
377     MS_LOG(ERROR) << Type << " is not supported.";
378     return false;
379   }
380   *out_type = type->second;
381   return true;
382 }
383 
ShapeEqual(const ShapeVector & s1,const ShapeVector & s2)384 bool ShapeEqual(const ShapeVector &s1, const ShapeVector &s2) {
385   return std::equal(s1.begin(), s1.end(), s2.begin(), s2.end());
386 }
387 }  // namespace kernel
388 }  // namespace mindspore
389