1 /**
2 * Copyright 2019-2022 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
18
19 #include "utils/ms_utils.h"
20 #include "include/backend/kernel_info.h"
21 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
22 #include "kernel/common_utils.h"
23
24 namespace mindspore {
25 namespace kernel {
GetInstance()26 NativeGpuKernelModFactory &NativeGpuKernelModFactory::GetInstance() {
27 static NativeGpuKernelModFactory instance;
28 return instance;
29 }
30
Register(const std::string & kernel_name,const KernelAttr & kernel_attr,NativeGpuKernelModCreater && creator)31 void NativeGpuKernelModFactory::Register(const std::string &kernel_name, const KernelAttr &kernel_attr,
32 NativeGpuKernelModCreater &&creator) {
33 map_kernel_name_to_creater_[kernel_name].emplace_back(kernel_attr, creator);
34 }
35
CheckIOParam(const std::string & kernel_name,const KernelBuildInfo * kernel_info,std::vector<std::pair<KernelAttr,NativeGpuKernelModCreater>> * iter_second,size_t attr_index)36 bool NativeGpuKernelModFactory::CheckIOParam(const std::string &kernel_name, const KernelBuildInfo *kernel_info,
37 std::vector<std::pair<KernelAttr, NativeGpuKernelModCreater>> *iter_second,
38 size_t attr_index) {
39 if (kernel_info->GetInputNum() != iter_second->at(attr_index).first.GetInputSize()) {
40 if (!iter_second->at(attr_index).first.GetAllSame()) {
41 return false;
42 }
43 }
44 if (kernel_info->GetOutputNum() != iter_second->at(attr_index).first.GetOutputSize()) {
45 if (!iter_second->at(attr_index).first.GetAllSame()) {
46 return false;
47 }
48 }
49 return true;
50 }
51
SupportedTypeList(const std::string & kernel_name)52 std::string NativeGpuKernelModFactory::SupportedTypeList(const std::string &kernel_name) {
53 std::string type_lists = "";
54 auto iter = map_kernel_name_to_creater_.find(kernel_name);
55 if (map_kernel_name_to_creater_.end() == iter) {
56 return type_lists;
57 }
58 for (size_t attr_index = 0; attr_index < (iter->second).size(); ++attr_index) {
59 std::string type_list = "input[";
60 auto attr = (iter->second)[attr_index].first;
61 for (size_t input_index = 0; input_index < attr.GetInputSize(); ++input_index) {
62 type_list = type_list + TypeIdToString(attr.GetInputAttr(input_index).dtype) +
63 ((input_index == (attr.GetInputSize() - 1)) ? "" : " ");
64 }
65 type_list = type_list + "], output[";
66 for (size_t input_index = 0; input_index < attr.GetOutputSize(); ++input_index) {
67 type_list = type_list + TypeIdToString(attr.GetOutputAttr(input_index).dtype) +
68 ((input_index == (attr.GetOutputSize() - 1)) ? "" : " ");
69 }
70 type_lists = type_lists + type_list + "]; ";
71 }
72 return type_lists;
73 }
74
GetGpuSupportedList(const std::string & kernel_name)75 std::vector<KernelAttr> NativeGpuKernelModFactory::GetGpuSupportedList(const std::string &kernel_name) {
76 if (kernel::Factory<kernel::NativeGpuKernelMod>::Instance().IsRegistered(kernel_name)) {
77 return kernel::NativeGpuKernelMod::GetGpuSupportedList(kernel_name);
78 } else {
79 std::vector<KernelAttr> kernel_attr_list;
80 auto iter = map_kernel_name_to_creater_.find(kernel_name);
81 if (map_kernel_name_to_creater_.end() == iter) {
82 return kernel_attr_list;
83 }
84
85 for (size_t attr_index = 0; attr_index < (iter->second).size(); ++attr_index) {
86 auto attr = (iter->second)[attr_index].first;
87 // Skip the invalid attr.
88 if (attr.GetInputSize() > 0 || attr.GetOutputSize() > 0) {
89 kernel_attr_list.push_back(attr);
90 }
91 }
92
93 return kernel_attr_list;
94 }
95 }
96
IsRegistered(const std::string & kernel_name)97 bool NativeGpuKernelModFactory::IsRegistered(const std::string &kernel_name) {
98 // New kernel mod registered.
99 if (kernel::Factory<kernel::NativeGpuKernelMod>::Instance().IsRegistered(kernel_name)) {
100 return true;
101 }
102
103 // Old kernel mod registered.
104 if (map_kernel_name_to_creater_.find(kernel_name) != map_kernel_name_to_creater_.end()) {
105 return true;
106 }
107
108 return false;
109 }
110
ReducePrecision(const std::string & kernel_name,std::shared_ptr<mindspore::kernel::KernelBuildInfo::KernelBuildInfoBuilder> builder)111 bool NativeGpuKernelModFactory::ReducePrecision(
112 const std::string &kernel_name, std::shared_ptr<mindspore::kernel::KernelBuildInfo::KernelBuildInfoBuilder> builder) {
113 MS_EXCEPTION_IF_NULL(builder);
114 auto kernel_info = builder->Build();
115 MS_EXCEPTION_IF_NULL(kernel_info);
116 auto iter = map_kernel_name_to_creater_.find(kernel_name);
117 if (map_kernel_name_to_creater_.end() == iter) {
118 MS_LOG(INFO) << "Not registered GPU kernel: op[" << kernel_name << "]!";
119 return false;
120 }
121 reduce_flag_.first.clear();
122 for (size_t attr_index = 0; attr_index < (iter->second).size(); ++attr_index) {
123 auto attr_size = (&(iter->second))->at(attr_index).first.GetInputSize();
124 for (size_t input_index = 0; input_index < kernel_info->GetInputNum(); input_index++) {
125 if (kernel_info->GetInputDeviceType(input_index) == kNumberTypeInt64 &&
126 (iter->second)[attr_index].first.GetInputAttr(input_index % attr_size).dtype == kNumberTypeInt32) {
127 builder->SetInputDeviceType(kNumberTypeInt32, input_index);
128 reduce_flag_.first.push_back(input_index);
129 MS_LOG(INFO) << "Kernel [" << kernel_name << "] does not support int64, cast input " << input_index
130 << " to int32.";
131 }
132 }
133 for (size_t output_index = 0; output_index < kernel_info->GetOutputNum(); output_index++) {
134 if (kernel_info->GetOutputDeviceType(output_index) == kNumberTypeInt64 &&
135 (iter->second)[attr_index].first.GetOutputAttr(output_index % attr_size).dtype == kNumberTypeInt32) {
136 builder->SetOutputDeviceType(kNumberTypeInt32, output_index);
137 MS_LOG(INFO) << "Kernel [" << kernel_name << "] does not support int64, cast output " << output_index
138 << " to int32.";
139 }
140 }
141 }
142 return NativeGpuKernelModFactory::SearchRegistered(kernel_name, builder->Build());
143 }
144
SetRefMapToKernelInfo(const std::string & kernel_name,size_t index,device::KernelInfo * kernel_info)145 void NativeGpuKernelModFactory::SetRefMapToKernelInfo(const std::string &kernel_name, size_t index,
146 device::KernelInfo *kernel_info) {
147 MS_ERROR_IF_NULL_WO_RET_VAL(kernel_info);
148
149 auto iter = map_kernel_name_to_creater_.find(kernel_name);
150 if (map_kernel_name_to_creater_.end() == iter) {
151 return;
152 }
153
154 const auto &kernel_attr = (iter->second)[index].first;
155 if (!kernel_attr.GetOutInRefMap().empty()) {
156 kernel_info->set_ref_map(kernel_attr.GetAllOutInRef(), kernel_attr.GetOutInRefMap());
157 }
158 }
159
CheckSM(const KernelBuildInfo * kernel_info,const size_t & input_index)160 void NativeGpuKernelModFactory::CheckSM(const KernelBuildInfo *kernel_info, const size_t &input_index) {
161 const int major_sm = GET_MAJOR_SM;
162 const bool check_sm = mindspore::device::gpu::CudaCommon::GetInstance().check_sm();
163 if (check_sm && major_sm < RECOMMEND_SM && kernel_info->GetInputDeviceType(input_index) == kNumberTypeFloat16) {
164 if (major_sm < MINIUM_SM) {
165 MS_LOG(EXCEPTION) << "Half precision ops must be used on Devices which computing capacity is >= " << MINIUM_SM
166 << ", but the current device's computing capacity is " << major_sm;
167 }
168 MS_LOG(WARNING) << "It is recommended to use devices with a computing capacity >= " << RECOMMEND_SM
169 << ", but the current device's computing capacity is " << major_sm << ". "
170 << "In this case, the computation may not be accelerated. Architectures with TensorCores can be "
171 "used to speed up half precision operations, such as Volta and Ampere.";
172 mindspore::device::gpu::CudaCommon::GetInstance().set_check_sm(false);
173 }
174 }
175
GpuKernelAttrCheck(const std::string & kernel_name,const KernelBuildInfo * kernel_info)176 std::pair<bool, size_t> NativeGpuKernelModFactory::GpuKernelAttrCheck(const std::string &kernel_name,
177 const KernelBuildInfo *kernel_info) {
178 auto iter = map_kernel_name_to_creater_.find(kernel_name);
179 if (map_kernel_name_to_creater_.end() == iter) {
180 MS_LOG(INFO) << "Not registered GPU kernel: op[" << kernel_name << "]!";
181 return std::make_pair(false, 0);
182 }
183 if (((iter->second).size() == 1 && (iter->second)[0].first.GetInputSize() == 0) ||
184 (iter->second)[0].first.GetSkipCheck()) {
185 return std::make_pair(true, 0);
186 }
187
188 for (size_t attr_index = 0; attr_index < (iter->second).size(); ++attr_index) {
189 if (!CheckIOParam(kernel_name, kernel_info, &(iter->second), attr_index)) {
190 continue;
191 }
192 bool flag = true;
193 auto cur_kernel_attr = (iter->second)[attr_index].first;
194 auto attr_size = cur_kernel_attr.GetInputSize();
195 auto input_size = kernel_info->GetInputNum();
196 if (kernel_info->GetInputNum() > 0) {
197 MS_EXCEPTION_IF_ZERO("attr size", attr_size);
198 }
199 std::vector<mindspore::TypeId> input_types;
200 (void)std::transform(kernel_info->GetAllInputDeviceTypes().begin(), kernel_info->GetAllInputDeviceTypes().end(),
201 std::back_inserter(input_types), [](const TypeId &type) { return type; });
202 flag = !CheckAttrForAllSameInput(input_size, input_types, cur_kernel_attr);
203 if (!flag) {
204 continue;
205 }
206 attr_size = (&(iter->second))->at(attr_index).first.GetOutputSize();
207 if (kernel_info->GetOutputNum() > 0) {
208 MS_EXCEPTION_IF_ZERO("attr size", attr_size);
209 }
210 // data type matching check of all output parameters of kernel
211 for (size_t output_index = 0; output_index < kernel_info->GetOutputNum(); output_index++) {
212 if (kernel_info->GetOutputDeviceType(output_index) !=
213 (iter->second)[attr_index].first.GetOutputAttr(output_index % attr_size).dtype) {
214 flag = false;
215 break;
216 }
217 }
218 // finish data type matching check and return a pair maintain the whether matching is success,
219 // if first is true, second is index of matching KernelAttr and creator pair in vector;
220 if (flag) {
221 size_t match_index = attr_index;
222 return std::make_pair(true, match_index);
223 }
224 }
225 return std::make_pair(false, 0);
226 }
227
Create(const std::string & kernel_name,const CNodePtr & apply_kernel)228 NativeGpuKernelMod *NativeGpuKernelModFactory::Create(const std::string &kernel_name, const CNodePtr &apply_kernel) {
229 auto kernel_info = dynamic_cast<device::KernelInfo *>(apply_kernel->kernel_info());
230 MS_EXCEPTION_IF_NULL(kernel_info);
231 const KernelBuildInfo *kernel_build_Info = kernel_info->select_kernel_build_info();
232 MS_EXCEPTION_IF_NULL(kernel_build_Info);
233 std::pair<bool, size_t> ret_pair = GpuKernelAttrCheck(kernel_name, kernel_build_Info);
234 if (ret_pair.first) {
235 SetRefMapToKernelInfo(kernel_name, ret_pair.second, kernel_info);
236 return (map_kernel_name_to_creater_.find(kernel_name)->second)[ret_pair.second].second();
237 }
238 return nullptr;
239 }
240
SearchRegistered(const std::string & kernel_name,const KernelBuildInfoPtr & kernel_build_info)241 bool NativeGpuKernelModFactory::SearchRegistered(const std::string &kernel_name,
242 const KernelBuildInfoPtr &kernel_build_info) {
243 std::pair<bool, size_t> ret_pair = GpuKernelAttrCheck(kernel_name, kernel_build_info.get());
244 return ret_pair.first;
245 }
246 } // namespace kernel
247 } // namespace mindspore
248