• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
18 
19 #include "utils/ms_utils.h"
20 #include "include/backend/kernel_info.h"
21 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
22 #include "kernel/common_utils.h"
23 
24 namespace mindspore {
25 namespace kernel {
GetInstance()26 NativeGpuKernelModFactory &NativeGpuKernelModFactory::GetInstance() {
27   static NativeGpuKernelModFactory instance;
28   return instance;
29 }
30 
Register(const std::string & kernel_name,const KernelAttr & kernel_attr,NativeGpuKernelModCreater && creator)31 void NativeGpuKernelModFactory::Register(const std::string &kernel_name, const KernelAttr &kernel_attr,
32                                          NativeGpuKernelModCreater &&creator) {
33   map_kernel_name_to_creater_[kernel_name].emplace_back(kernel_attr, creator);
34 }
35 
CheckIOParam(const std::string & kernel_name,const KernelBuildInfo * kernel_info,std::vector<std::pair<KernelAttr,NativeGpuKernelModCreater>> * iter_second,size_t attr_index)36 bool NativeGpuKernelModFactory::CheckIOParam(const std::string &kernel_name, const KernelBuildInfo *kernel_info,
37                                              std::vector<std::pair<KernelAttr, NativeGpuKernelModCreater>> *iter_second,
38                                              size_t attr_index) {
39   if (kernel_info->GetInputNum() != iter_second->at(attr_index).first.GetInputSize()) {
40     if (!iter_second->at(attr_index).first.GetAllSame()) {
41       return false;
42     }
43   }
44   if (kernel_info->GetOutputNum() != iter_second->at(attr_index).first.GetOutputSize()) {
45     if (!iter_second->at(attr_index).first.GetAllSame()) {
46       return false;
47     }
48   }
49   return true;
50 }
51 
SupportedTypeList(const std::string & kernel_name)52 std::string NativeGpuKernelModFactory::SupportedTypeList(const std::string &kernel_name) {
53   std::string type_lists = "";
54   auto iter = map_kernel_name_to_creater_.find(kernel_name);
55   if (map_kernel_name_to_creater_.end() == iter) {
56     return type_lists;
57   }
58   for (size_t attr_index = 0; attr_index < (iter->second).size(); ++attr_index) {
59     std::string type_list = "input[";
60     auto attr = (iter->second)[attr_index].first;
61     for (size_t input_index = 0; input_index < attr.GetInputSize(); ++input_index) {
62       type_list = type_list + TypeIdToString(attr.GetInputAttr(input_index).dtype) +
63                   ((input_index == (attr.GetInputSize() - 1)) ? "" : " ");
64     }
65     type_list = type_list + "], output[";
66     for (size_t input_index = 0; input_index < attr.GetOutputSize(); ++input_index) {
67       type_list = type_list + TypeIdToString(attr.GetOutputAttr(input_index).dtype) +
68                   ((input_index == (attr.GetOutputSize() - 1)) ? "" : " ");
69     }
70     type_lists = type_lists + type_list + "]; ";
71   }
72   return type_lists;
73 }
74 
GetGpuSupportedList(const std::string & kernel_name)75 std::vector<KernelAttr> NativeGpuKernelModFactory::GetGpuSupportedList(const std::string &kernel_name) {
76   if (kernel::Factory<kernel::NativeGpuKernelMod>::Instance().IsRegistered(kernel_name)) {
77     return kernel::NativeGpuKernelMod::GetGpuSupportedList(kernel_name);
78   } else {
79     std::vector<KernelAttr> kernel_attr_list;
80     auto iter = map_kernel_name_to_creater_.find(kernel_name);
81     if (map_kernel_name_to_creater_.end() == iter) {
82       return kernel_attr_list;
83     }
84 
85     for (size_t attr_index = 0; attr_index < (iter->second).size(); ++attr_index) {
86       auto attr = (iter->second)[attr_index].first;
87       // Skip the invalid attr.
88       if (attr.GetInputSize() > 0 || attr.GetOutputSize() > 0) {
89         kernel_attr_list.push_back(attr);
90       }
91     }
92 
93     return kernel_attr_list;
94   }
95 }
96 
IsRegistered(const std::string & kernel_name)97 bool NativeGpuKernelModFactory::IsRegistered(const std::string &kernel_name) {
98   // New kernel mod registered.
99   if (kernel::Factory<kernel::NativeGpuKernelMod>::Instance().IsRegistered(kernel_name)) {
100     return true;
101   }
102 
103   // Old kernel mod registered.
104   if (map_kernel_name_to_creater_.find(kernel_name) != map_kernel_name_to_creater_.end()) {
105     return true;
106   }
107 
108   return false;
109 }
110 
ReducePrecision(const std::string & kernel_name,std::shared_ptr<mindspore::kernel::KernelBuildInfo::KernelBuildInfoBuilder> builder)111 bool NativeGpuKernelModFactory::ReducePrecision(
112   const std::string &kernel_name, std::shared_ptr<mindspore::kernel::KernelBuildInfo::KernelBuildInfoBuilder> builder) {
113   MS_EXCEPTION_IF_NULL(builder);
114   auto kernel_info = builder->Build();
115   MS_EXCEPTION_IF_NULL(kernel_info);
116   auto iter = map_kernel_name_to_creater_.find(kernel_name);
117   if (map_kernel_name_to_creater_.end() == iter) {
118     MS_LOG(INFO) << "Not registered GPU kernel: op[" << kernel_name << "]!";
119     return false;
120   }
121   reduce_flag_.first.clear();
122   for (size_t attr_index = 0; attr_index < (iter->second).size(); ++attr_index) {
123     auto attr_size = (&(iter->second))->at(attr_index).first.GetInputSize();
124     for (size_t input_index = 0; input_index < kernel_info->GetInputNum(); input_index++) {
125       if (kernel_info->GetInputDeviceType(input_index) == kNumberTypeInt64 &&
126           (iter->second)[attr_index].first.GetInputAttr(input_index % attr_size).dtype == kNumberTypeInt32) {
127         builder->SetInputDeviceType(kNumberTypeInt32, input_index);
128         reduce_flag_.first.push_back(input_index);
129         MS_LOG(INFO) << "Kernel [" << kernel_name << "] does not support int64, cast input " << input_index
130                      << " to int32.";
131       }
132     }
133     for (size_t output_index = 0; output_index < kernel_info->GetOutputNum(); output_index++) {
134       if (kernel_info->GetOutputDeviceType(output_index) == kNumberTypeInt64 &&
135           (iter->second)[attr_index].first.GetOutputAttr(output_index % attr_size).dtype == kNumberTypeInt32) {
136         builder->SetOutputDeviceType(kNumberTypeInt32, output_index);
137         MS_LOG(INFO) << "Kernel [" << kernel_name << "] does not support int64, cast output " << output_index
138                      << " to int32.";
139       }
140     }
141   }
142   return NativeGpuKernelModFactory::SearchRegistered(kernel_name, builder->Build());
143 }
144 
SetRefMapToKernelInfo(const std::string & kernel_name,size_t index,device::KernelInfo * kernel_info)145 void NativeGpuKernelModFactory::SetRefMapToKernelInfo(const std::string &kernel_name, size_t index,
146                                                       device::KernelInfo *kernel_info) {
147   MS_ERROR_IF_NULL_WO_RET_VAL(kernel_info);
148 
149   auto iter = map_kernel_name_to_creater_.find(kernel_name);
150   if (map_kernel_name_to_creater_.end() == iter) {
151     return;
152   }
153 
154   const auto &kernel_attr = (iter->second)[index].first;
155   if (!kernel_attr.GetOutInRefMap().empty()) {
156     kernel_info->set_ref_map(kernel_attr.GetAllOutInRef(), kernel_attr.GetOutInRefMap());
157   }
158 }
159 
CheckSM(const KernelBuildInfo * kernel_info,const size_t & input_index)160 void NativeGpuKernelModFactory::CheckSM(const KernelBuildInfo *kernel_info, const size_t &input_index) {
161   const int major_sm = GET_MAJOR_SM;
162   const bool check_sm = mindspore::device::gpu::CudaCommon::GetInstance().check_sm();
163   if (check_sm && major_sm < RECOMMEND_SM && kernel_info->GetInputDeviceType(input_index) == kNumberTypeFloat16) {
164     if (major_sm < MINIUM_SM) {
165       MS_LOG(EXCEPTION) << "Half precision ops must be used on Devices which computing capacity is >= " << MINIUM_SM
166                         << ", but the current device's computing capacity is " << major_sm;
167     }
168     MS_LOG(WARNING) << "It is recommended to use devices with a computing capacity >= " << RECOMMEND_SM
169                     << ", but the current device's computing capacity is " << major_sm << ". "
170                     << "In this case, the computation may not be accelerated. Architectures with TensorCores can be "
171                        "used to speed up half precision operations, such as Volta and Ampere.";
172     mindspore::device::gpu::CudaCommon::GetInstance().set_check_sm(false);
173   }
174 }
175 
GpuKernelAttrCheck(const std::string & kernel_name,const KernelBuildInfo * kernel_info)176 std::pair<bool, size_t> NativeGpuKernelModFactory::GpuKernelAttrCheck(const std::string &kernel_name,
177                                                                       const KernelBuildInfo *kernel_info) {
178   auto iter = map_kernel_name_to_creater_.find(kernel_name);
179   if (map_kernel_name_to_creater_.end() == iter) {
180     MS_LOG(INFO) << "Not registered GPU kernel: op[" << kernel_name << "]!";
181     return std::make_pair(false, 0);
182   }
183   if (((iter->second).size() == 1 && (iter->second)[0].first.GetInputSize() == 0) ||
184       (iter->second)[0].first.GetSkipCheck()) {
185     return std::make_pair(true, 0);
186   }
187 
188   for (size_t attr_index = 0; attr_index < (iter->second).size(); ++attr_index) {
189     if (!CheckIOParam(kernel_name, kernel_info, &(iter->second), attr_index)) {
190       continue;
191     }
192     bool flag = true;
193     auto cur_kernel_attr = (iter->second)[attr_index].first;
194     auto attr_size = cur_kernel_attr.GetInputSize();
195     auto input_size = kernel_info->GetInputNum();
196     if (kernel_info->GetInputNum() > 0) {
197       MS_EXCEPTION_IF_ZERO("attr size", attr_size);
198     }
199     std::vector<mindspore::TypeId> input_types;
200     (void)std::transform(kernel_info->GetAllInputDeviceTypes().begin(), kernel_info->GetAllInputDeviceTypes().end(),
201                          std::back_inserter(input_types), [](const TypeId &type) { return type; });
202     flag = !CheckAttrForAllSameInput(input_size, input_types, cur_kernel_attr);
203     if (!flag) {
204       continue;
205     }
206     attr_size = (&(iter->second))->at(attr_index).first.GetOutputSize();
207     if (kernel_info->GetOutputNum() > 0) {
208       MS_EXCEPTION_IF_ZERO("attr size", attr_size);
209     }
210     // data type matching check of all output parameters of kernel
211     for (size_t output_index = 0; output_index < kernel_info->GetOutputNum(); output_index++) {
212       if (kernel_info->GetOutputDeviceType(output_index) !=
213           (iter->second)[attr_index].first.GetOutputAttr(output_index % attr_size).dtype) {
214         flag = false;
215         break;
216       }
217     }
218     // finish data type matching check and return a pair maintain the whether matching is success,
219     // if first is true, second is index of matching KernelAttr and creator pair in vector;
220     if (flag) {
221       size_t match_index = attr_index;
222       return std::make_pair(true, match_index);
223     }
224   }
225   return std::make_pair(false, 0);
226 }
227 
Create(const std::string & kernel_name,const CNodePtr & apply_kernel)228 NativeGpuKernelMod *NativeGpuKernelModFactory::Create(const std::string &kernel_name, const CNodePtr &apply_kernel) {
229   auto kernel_info = dynamic_cast<device::KernelInfo *>(apply_kernel->kernel_info());
230   MS_EXCEPTION_IF_NULL(kernel_info);
231   const KernelBuildInfo *kernel_build_Info = kernel_info->select_kernel_build_info();
232   MS_EXCEPTION_IF_NULL(kernel_build_Info);
233   std::pair<bool, size_t> ret_pair = GpuKernelAttrCheck(kernel_name, kernel_build_Info);
234   if (ret_pair.first) {
235     SetRefMapToKernelInfo(kernel_name, ret_pair.second, kernel_info);
236     return (map_kernel_name_to_creater_.find(kernel_name)->second)[ret_pair.second].second();
237   }
238   return nullptr;
239 }
240 
SearchRegistered(const std::string & kernel_name,const KernelBuildInfoPtr & kernel_build_info)241 bool NativeGpuKernelModFactory::SearchRegistered(const std::string &kernel_name,
242                                                  const KernelBuildInfoPtr &kernel_build_info) {
243   std::pair<bool, size_t> ret_pair = GpuKernelAttrCheck(kernel_name, kernel_build_info.get());
244   return ret_pair.first;
245 }
246 }  // namespace kernel
247 }  // namespace mindspore
248