1 /**
2 * Copyright 2019 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
18 #include <memory>
19 #include <map>
20 #include <list>
21 #include <algorithm>
22 #include "base/core_ops.h"
23 #include "frontend/parallel/ops_info/ops_utils.h"
24 #include "backend/session/anf_runtime_algorithm.h"
25 #include "backend/kernel_compiler/tbe/tbe_adapter.h"
26 #include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
27 #include "backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.h"
28 #include "backend/kernel_compiler/tbe/tbe_utils.h"
29 #include "utils/ms_context.h"
30 #include "runtime/dev.h"
31 #include "utils/trace_base.h"
32 #include "utils/convert_utils_base.h"
33 #include "utils/ms_utils.h"
34 #include "runtime/device/ascend/lic_manager.h"
35
36 namespace mindspore {
37 namespace kernel {
38 using mindspore::kernel::tbe::TbeAdapter;
39 using mindspore::kernel::tbe::TbeUtils;
40 namespace {
41 constexpr auto kFusionOpList = "op_list";
42 constexpr auto kFusionKernelNamePrfix = "te_fusion";
43 constexpr auto kOptional = "optional_";
44 constexpr auto kOpFormat_FRACTAL_Z = "FRACTAL_Z";
45 constexpr auto kPlatform = "platform";
46 constexpr auto kPlatTBE = "TBE";
47 constexpr auto kGenModel = "gen_model";
48 constexpr auto kSingle = "single";
49 constexpr auto kImplPath = "impl_path";
50 constexpr auto kJInputs = "inputs";
51 constexpr auto kJOutputs = "outputs";
52 constexpr auto kJAttrs = "attrs";
53 constexpr auto kJKernelName = "kernel_name";
54 constexpr auto kJFullName = "full_name";
55 constexpr auto kJOpInfo = "op_info";
56 constexpr auto kJDtype = "dtype";
57 constexpr auto kJtype = "type";
58 constexpr auto kJName = "name";
59 constexpr auto kJOriShape = "ori_shape";
60 constexpr auto kJOriFormat = "ori_format";
61 constexpr auto kJShape = "shape";
62 constexpr auto kJFormat = "format";
63 constexpr auto kJValid = "valid";
64 constexpr auto kJParamType = "param_type";
65 constexpr auto kParamDynamic = "dynamic";
66 constexpr auto kParamRequred = "required";
67 constexpr auto kJDataType = "data_type";
68 constexpr auto kJOutputIndex = "output_index";
69 constexpr auto kJOutputDataDesc = "output_data_desc";
70 constexpr auto kJOutputDesc = "output_desc";
71 constexpr auto kJInputDesc = "input_desc";
72 constexpr auto kJRange = "range";
73 constexpr auto kVTypeInt = "int";
74 constexpr auto kVTypeStr = "str";
75 constexpr auto kVTypeBool = "bool";
76 constexpr auto kVTypeFloat = "float";
77 constexpr auto kVTypeListInt = "listInt";
78 constexpr auto kVTypeInt32 = "Int32";
79 constexpr auto kVTypeInt64 = "Int64";
80 constexpr auto kVTypeListUInt64 = "listUInt64";
81 constexpr auto kVTypeListFloat = "listFloat";
82 constexpr auto kVTypeListListInt = "listListInt";
83 constexpr auto kJValue = "value";
84 constexpr auto kJDynIndex = "dyn_index";
85 constexpr auto kJFuncName = "func_name";
86 constexpr auto kJL1AddrOffset = "L1_addr_offset";
87 constexpr auto kJL1FusionType = "L1_fusion_type";
88 constexpr auto kJL1WorkspaceSize = "L1_workspace_size";
89 constexpr auto kJAddrType = "addr_type";
90 constexpr auto kJSliceOffset = "slice_offset";
91 constexpr auto kJSplitIndex = "split_index";
92 constexpr auto kJTotalShape = "total_shape";
93 constexpr auto kJDynamicCompileStatic = "dynamic_compile_static";
94 constexpr auto kJInt64Mode = "int64mode";
95 constexpr auto kJValidShape = "valid_shape";
96 constexpr auto kJModuleName = "module_name";
97 constexpr auto kJPattern = "pattern";
98 constexpr auto kJPyModulePath = "py_module_path";
99 constexpr auto kJAttrDesc = "attr_desc";
100 constexpr auto kJSocVersion = "socVersion";
101 constexpr auto kAutoTilingMode = "autoTilingMode";
102 constexpr auto kSOC_VERSION = "SOC_VERSION";
103 constexpr auto kJIsDynamicShape = "is_dynamic_shape";
104 constexpr auto kJDynamicIndex = "dynamic_index";
105 constexpr auto kJSocInfo = "SocInfo";
106 constexpr auto kNCHWShapeSize = 4;
107 constexpr auto kJRlTuneSwitch = "rl_tune_switch";
108 constexpr auto kJRlTuneList = "rl_tune_list";
109 constexpr auto kJOpTuneSwitch = "op_tune_switch";
110 constexpr auto kJOpTuneList = "op_tune_list";
111 constexpr auto kJPassList = "pass_list";
112
113 const auto kPyPath = "/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe";
114
IsNeedChangeDefaultFormat(const CNodePtr & cnode)115 bool IsNeedChangeDefaultFormat(const CNodePtr &cnode) {
116 MS_EXCEPTION_IF_NULL(cnode);
117 if (AnfAlgo::HasNodeAttr(kAttrFormat, cnode->cast<CNodePtr>())) {
118 auto attr = AnfAlgo::GetNodeAttr<std::string>(cnode, kAttrFormat);
119 return attr == kOpFormat_NCDHW;
120 }
121 return false;
122 }
123
SetLicInfo(nlohmann::json * op_info_json)124 void SetLicInfo(nlohmann::json *op_info_json) {
125 MS_EXCEPTION_IF_NULL(op_info_json);
126 (*op_info_json)[kJRlTuneSwitch] = LicManager::GetInstance().GetRlTuneSwitch();
127 (*op_info_json)[kJRlTuneList] = LicManager::GetInstance().GetRlTuneList();
128 (*op_info_json)[kJOpTuneSwitch] = LicManager::GetInstance().GetOpTuneSwitch();
129 (*op_info_json)[kJOpTuneList] = LicManager::GetInstance().GetOpTuneList();
130 (*op_info_json)[kJPassList] = LicManager::GetInstance().GetPassSwitch();
131 }
132
GetOutputShapeForTbeBuild(const AnfNodePtr & anf_node,size_t real_index)133 std::vector<int64_t> GetOutputShapeForTbeBuild(const AnfNodePtr &anf_node, size_t real_index) {
134 MS_EXCEPTION_IF_NULL(anf_node);
135 std::vector<int64_t> shape;
136 auto output_shape = AnfAlgo::GetOutputDetailShape(anf_node, real_index);
137 MS_EXCEPTION_IF_NULL(output_shape);
138 if (output_shape->isa<abstract::Shape>()) {
139 auto shape_ptr = output_shape->cast<abstract::ShapePtr>();
140 MS_EXCEPTION_IF_NULL(shape_ptr);
141 shape = shape_ptr->shape();
142 }
143 if (shape.empty()) {
144 shape.emplace_back(1);
145 }
146 return shape;
147 }
148
GetOutputDeviceShapeForTbeBuild(const kCreaterType creater_type,const AnfNodePtr & anf_node,const size_t real_index)149 std::vector<int64_t> GetOutputDeviceShapeForTbeBuild(const kCreaterType creater_type, const AnfNodePtr &anf_node,
150 const size_t real_index) {
151 MS_EXCEPTION_IF_NULL(anf_node);
152 std::vector<int64_t> shape;
153 if (creater_type == OP_SELECT_FORMAT || creater_type == CHECK_SUPPORTED) {
154 shape = GetOutputShapeForTbeBuild(anf_node, real_index);
155 } else {
156 auto format = AnfAlgo::GetOutputFormat(anf_node, real_index);
157 shape = AnfAlgo::GetOutputDeviceShapeForTbeBuild(anf_node, real_index, format);
158 }
159 if (shape.empty()) {
160 shape.emplace_back(1);
161 }
162 return shape;
163 }
164
GetInputShapeForTbeBuild(const AnfNodePtr & anf_node,size_t real_index)165 std::vector<int64_t> GetInputShapeForTbeBuild(const AnfNodePtr &anf_node, size_t real_index) {
166 MS_EXCEPTION_IF_NULL(anf_node);
167 session::KernelWithIndex kernel_with_index = AnfAlgo::GetPrevNodeOutput(anf_node, real_index);
168 return GetOutputShapeForTbeBuild(kernel_with_index.first, kernel_with_index.second);
169 }
170
GetInputDeviceShapeForTbeBuild(const kCreaterType creater_type,const AnfNodePtr & anf_node,const size_t real_index)171 std::vector<int64_t> GetInputDeviceShapeForTbeBuild(const kCreaterType creater_type, const AnfNodePtr &anf_node,
172 const size_t real_index) {
173 MS_EXCEPTION_IF_NULL(anf_node);
174 std::vector<int64_t> shape;
175 session::KernelWithIndex kernel_with_index = AnfAlgo::GetPrevNodeOutput(anf_node, real_index);
176 if (creater_type == OP_SELECT_FORMAT || creater_type == CHECK_SUPPORTED) {
177 shape = GetOutputShapeForTbeBuild(kernel_with_index.first, kernel_with_index.second);
178 } else {
179 auto format = AnfAlgo::GetInputFormat(anf_node, real_index);
180 shape = AnfAlgo::GetOutputDeviceShapeForTbeBuild(kernel_with_index.first, kernel_with_index.second, format);
181 }
182 if (shape.empty()) {
183 shape.emplace_back(1);
184 }
185 return shape;
186 }
187 } // namespace
GenTbeSingleKernelJson(const std::shared_ptr<mindspore::AnfNode> & anf_node,nlohmann::json * kernel_json)188 bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspore::AnfNode> &anf_node,
189 nlohmann::json *kernel_json) {
190 MS_EXCEPTION_IF_NULL(anf_node);
191 MS_EXCEPTION_IF_NULL(kernel_json);
192 std::string op_name = AnfAlgo::GetCNodeName(anf_node);
193 auto op_info_ptr = mindspore::kernel::tbe::TbeDynamicShapeUtil::FindOp(op_name, anf_node);
194 MS_EXCEPTION_IF_NULL(op_info_ptr);
195 (*kernel_json)[kPlatform] = kPlatTBE;
196 (*kernel_json)[kImplPath] = op_info_ptr->impl_path();
197 nlohmann::json op_info_json;
198 SetLicInfo(&op_info_json);
199 op_info_json[kJIsDynamicShape] = tbe::TbeDynamicShapeUtil::GetDynamicShapeAttr(anf_node->cast<CNodePtr>());
200 auto func_name = op_info_ptr->kernel_name();
201 op_info_json[kJName] = func_name;
202 op_info_json[kJModuleName] = std::string("impl.") + func_name;
203 op_info_json[kJPyModulePath] = kPyPath;
204 // generate inputs json
205 nlohmann::json inputs_json;
206 if (!GenTbeInputsJson(anf_node, op_info_ptr, &inputs_json)) {
207 MS_LOG(ERROR) << "Anf Node [" << op_name << "] generate inputs json failed";
208 return false;
209 }
210 op_info_json[kJInputs] = inputs_json;
211 // generate outputs json
212 nlohmann::json outputs_json;
213 if (!GenTbeOutputsJson(anf_node, op_info_ptr, &outputs_json)) {
214 MS_LOG(ERROR) << "Anf Node [" << op_name << "] generate outputs json failed";
215 return false;
216 }
217 op_info_json[kJOutputs] = outputs_json;
218 // generate attrs json
219 nlohmann::json attrs_json;
220 GenTbeAttrJson(anf_node, op_info_ptr, &attrs_json);
221 op_info_json[kJAttrs] = attrs_json;
222 auto soc_version = TbeKernelJsonCreator::GetSocVersion();
223 op_info_json[kJSocVersion] = soc_version;
224 if (op_info_json[kJIsDynamicShape]) {
225 static int32_t dynamic_index = 0;
226 op_info_json[kJDynamicIndex] = dynamic_index++;
227 }
228 auto context_ptr = MsContext::GetInstance();
229 MS_EXCEPTION_IF_NULL(context_ptr);
230 auto tune_mode = context_ptr->get_param<std::string>(MS_CTX_TUNE_MODE);
231 // generate soc info json
232 nlohmann::json soc_info_json = TbeUtils::GenSocInfo();
233 soc_info_json[kAutoTilingMode] = tune_mode;
234
235 std::string json_str = op_info_json.dump() + soc_info_json.dump();
236 size_t hash_id = std::hash<std::string>()(json_str);
237 auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
238
239 op_info_json[kJFullName] = anf_node->fullname_with_scope();
240 json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id);
241 json_info_ = json_str;
242 auto iter = tbe::opTypeAdapter.find(op_name);
243 if (iter != tbe::opTypeAdapter.end()) {
244 op_name = iter->second;
245 }
246 op_info_json["Type"] = op_name;
247 op_info_json["graph_id"] = AnfAlgo::GetGraphId(anf_node.get());
248 op_info_json[kJKernelName] = json_name_;
249 op_info_json[kGenModel] = kSingle;
250
251 // create attr_desc
252 nlohmann::json attr_desc;
253 for (const auto &attr : attrs_json) {
254 if (attr[kJName] != "isRef" && attr[kJValid] == true) {
255 attr_desc.push_back(attr[kJValue]);
256 }
257 }
258 if (!attr_desc.empty()) {
259 op_info_json[kJAttrDesc] = attr_desc;
260 }
261
262 // merge json
263 soc_info_json[kJSocVersion] = soc_version;
264 (*kernel_json)[kJSocInfo] = soc_info_json;
265 (*kernel_json)[kJOpInfo] = op_info_json;
266
267 MS_LOG(INFO) << "Operate type:" << creater_type_ << ", full scope name is :" << anf_node->fullname_with_scope()
268 << ", json info name is : " << json_name_ << ", kernel json:" << kernel_json->dump();
269
270 return true;
271 }
272
GenNoneInputDescJson(const std::shared_ptr<OpIOInfo> & input_ptr,size_t input_i,std::vector<nlohmann::json> * const input_list)273 void GenNoneInputDescJson(const std::shared_ptr<OpIOInfo> &input_ptr, size_t input_i,
274 std::vector<nlohmann::json> *const input_list) {
275 MS_EXCEPTION_IF_NULL(input_ptr);
276 MS_EXCEPTION_IF_NULL(input_list);
277 nlohmann::json input_desc_json;
278 auto in_name = input_ptr->name();
279 input_desc_json[kJName] = in_name + std::to_string(input_i);
280 input_desc_json[kJValid] = false;
281 input_list->emplace_back(input_desc_json);
282 }
283
GenValidInputDescJson(const std::shared_ptr<AnfNode> & anf_node,size_t real_input_index,bool value,const std::shared_ptr<OpIOInfo> & input_ptr,const string & op_input_name,size_t input_i,std::vector<nlohmann::json> * const input_list)284 void TbeKernelJsonCreator::GenValidInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index,
285 bool value, const std::shared_ptr<OpIOInfo> &input_ptr,
286 const string &op_input_name, size_t input_i,
287 std::vector<nlohmann::json> *const input_list) {
288 MS_EXCEPTION_IF_NULL(anf_node);
289 MS_EXCEPTION_IF_NULL(input_ptr);
290 MS_EXCEPTION_IF_NULL(input_list);
291 auto def_format = kOpFormat_NCHW;
292 auto dtype = GetDeviceInputType(anf_node, real_input_index);
293 auto format = GetDeviceInputFormat(anf_node, real_input_index);
294 auto shape = GetInputDeviceShapeForTbeBuild(creater_type_, anf_node, real_input_index);
295 auto ori_shape = GetInputShapeForTbeBuild(anf_node, real_input_index);
296 if (anf_node->isa<CNode>() && IsNeedChangeDefaultFormat(anf_node->cast<CNodePtr>())) {
297 def_format = kOpFormat_NCDHW;
298 }
299 if (def_format == kOpFormat_NCDHW && k3DFormatSet.find(format) == k3DFormatSet.end()) {
300 format = kOpFormat_NCDHW;
301 }
302 nlohmann::json input_desc_json;
303 input_desc_json[kJDtype] = dtype;
304 input_desc_json[kJName] = op_input_name + std::to_string(input_i);
305 input_desc_json[kJOriShape] = ori_shape;
306 input_desc_json[kJOriFormat] = def_format;
307 input_desc_json[kJShape] = shape;
308 input_desc_json[kJFormat] = format;
309 input_desc_json[kJValid] = value;
310 input_desc_json[kJAddrType] = 0;
311 input_desc_json[kJParamType] = input_ptr->param_type();
312 input_desc_json[kJRange] = tbe::TbeDynamicShapeUtil::GetInputDynamicRange(anf_node, real_input_index, format);
313 input_list->emplace_back(input_desc_json);
314 }
315
GenInputDescJson(const std::shared_ptr<AnfNode> & anf_node,size_t real_input_index,bool value,const std::shared_ptr<OpIOInfo> & input_ptr,const string & op_input_name,size_t input_i,std::vector<nlohmann::json> * input_list)316 bool TbeKernelJsonCreator::GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index,
317 bool value, const std::shared_ptr<OpIOInfo> &input_ptr,
318 const string &op_input_name, size_t input_i,
319 std::vector<nlohmann::json> *input_list) {
320 MS_EXCEPTION_IF_NULL(anf_node);
321 MS_EXCEPTION_IF_NULL(input_ptr);
322 MS_EXCEPTION_IF_NULL(input_list);
323 std::string op_name = AnfAlgo::GetCNodeName(anf_node);
324 if (op_name == kDynamicRNNOpName && input_ptr->name() == "seq_length") {
325 GenNoneInputDescJson(input_ptr, input_i, input_list);
326 } else if (op_name == kDynamicGRUV2OpName) {
327 auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(anf_node, "placeholder_index");
328 auto item = find(none_index.begin(), none_index.end(), input_ptr->index());
329 if (item != none_index.end()) {
330 GenNoneInputDescJson(input_ptr, input_i, input_list);
331 } else {
332 GenValidInputDescJson(anf_node, real_input_index, value, input_ptr, op_input_name, input_i, input_list);
333 }
334 } else if (input_ptr->name() == "input_indices" && op_name == kTopKOpName) {
335 TbeAdapter::GenTopKV2IndicesTensorInfo(anf_node, real_input_index, input_list, creater_type_);
336 } else {
337 GenValidInputDescJson(anf_node, real_input_index, value, input_ptr, op_input_name, input_i, input_list);
338 }
339 return true;
340 }
341
GenInputList(const std::shared_ptr<AnfNode> & anf_node,size_t input_tensor_num,const std::shared_ptr<OpIOInfo> & input_ptr,size_t * real_input_index,string * op_input_name,std::vector<nlohmann::json> * input_list)342 bool TbeKernelJsonCreator::GenInputList(const std::shared_ptr<AnfNode> &anf_node, size_t input_tensor_num,
343 const std::shared_ptr<OpIOInfo> &input_ptr, size_t *real_input_index,
344 string *op_input_name, std::vector<nlohmann::json> *input_list) {
345 MS_EXCEPTION_IF_NULL(anf_node);
346 MS_EXCEPTION_IF_NULL(input_ptr);
347 MS_EXCEPTION_IF_NULL(real_input_index);
348 MS_EXCEPTION_IF_NULL(op_input_name);
349 MS_EXCEPTION_IF_NULL(input_list);
350 std::string op_name = AnfAlgo::GetCNodeName(anf_node);
351 auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
352 size_t real_input_num = AnfAlgo::GetInputTensorNum(anf_node);
353 bool value = true;
354 for (size_t input_i = 0; input_i < input_tensor_num; input_i++) {
355 if (*real_input_index >= real_input_num) {
356 if (input_ptr->param_type() == "optional") {
357 *op_input_name = input_ptr->name() + "_optional_";
358 nlohmann::json input_desc_json;
359 input_desc_json[kJValid] = false;
360 input_desc_json[kJName] = *op_input_name + std::to_string(*real_input_index);
361 input_list->emplace_back(input_desc_json);
362 continue;
363 }
364 MS_LOG(ERROR) << "Input num: " << *real_input_index << " is not match op inputs."
365 << "\n trace:" << trace::DumpSourceLines(anf_node);
366 return false;
367 }
368 if (op_name == "BatchNorm") {
369 if (input_ptr->name() == "mean" || input_ptr->name() == "variance") {
370 auto attr = primitive->GetAttr("is_training");
371 MS_EXCEPTION_IF_NULL(attr);
372 bool is_training = GetValue<bool>(attr);
373 MS_LOG(INFO) << "Op_name" << op_name << ", tensor_name " << input_ptr->name() << ", is_training "
374 << is_training;
375 if (is_training) {
376 (*real_input_index)++;
377 break;
378 }
379 }
380 }
381 bool ret = GenInputDescJson(anf_node, *real_input_index, value, input_ptr, *op_input_name, input_i, input_list);
382 (*real_input_index)++;
383 if (!ret) {
384 return false;
385 }
386 }
387 return true;
388 }
389
GetInputNameAndRealNum(const std::shared_ptr<AnfNode> & anf_node,const std::shared_ptr<OpIOInfo> & input_ptr,size_t * dyn_input_index,size_t * input_num,std::string * op_input_name)390 bool GetInputNameAndRealNum(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpIOInfo> &input_ptr,
391 size_t *dyn_input_index, size_t *input_num, std::string *op_input_name) {
392 MS_EXCEPTION_IF_NULL(anf_node);
393 MS_EXCEPTION_IF_NULL(input_ptr);
394 MS_EXCEPTION_IF_NULL(dyn_input_index);
395 MS_EXCEPTION_IF_NULL(input_num);
396 MS_EXCEPTION_IF_NULL(op_input_name);
397 auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
398 // for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input.
399 std::vector<int64_t> dyn_input_sizes;
400 if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) {
401 dyn_input_sizes = GetValue<const std::vector<int64_t>>(primitive->GetAttr(kAttrDynInputSizes));
402 }
403
404 if (input_ptr->param_type() == kParamDynamic) {
405 if (*dyn_input_index >= dyn_input_sizes.size()) {
406 MS_LOG(ERROR) << "Dyn input index" << *dyn_input_index << "is over dyn input num" << dyn_input_sizes.size();
407 return false;
408 }
409 *input_num = LongToSize(dyn_input_sizes[*dyn_input_index]);
410 *op_input_name = input_ptr->name() + "_dynamic_";
411 (*dyn_input_index)++;
412 // if optional input is exist
413 } else {
414 *input_num = 1;
415 *op_input_name = input_ptr->name() + "_";
416 }
417 return true;
418 }
419
GenTbeInputsJson(const std::shared_ptr<AnfNode> & anf_node,const std::shared_ptr<OpInfo> & op_info,nlohmann::json * inputs_json)420 bool TbeKernelJsonCreator::GenTbeInputsJson(const std::shared_ptr<AnfNode> &anf_node,
421 const std::shared_ptr<OpInfo> &op_info, nlohmann::json *inputs_json) {
422 MS_EXCEPTION_IF_NULL(anf_node);
423 MS_EXCEPTION_IF_NULL(op_info);
424 MS_EXCEPTION_IF_NULL(inputs_json);
425 std::string op_name = AnfAlgo::GetCNodeName(anf_node);
426 if (op_name == kAtomicAddrCleanOpName) {
427 return true;
428 }
429 std::vector<std::shared_ptr<OpIOInfo>> inputs_ptr = op_info->inputs_ptr();
430 if (inputs_ptr.empty()) {
431 MS_LOG(INFO) << "Apply kernel " << op_name << "registration info has no input info";
432 return true;
433 }
434 auto op_info_input_num = inputs_ptr.size();
435 size_t dyn_input_index = 0;
436 size_t real_input_index = 0;
437 std::vector<std::vector<nlohmann::json>> inputs_list;
438 for (size_t i = 0; i < op_info_input_num; i++) {
439 size_t input_tensor_num;
440 std::shared_ptr<OpIOInfo> input_ptr = inputs_ptr[i];
441 std::string op_input_name;
442 MS_EXCEPTION_IF_NULL(input_ptr);
443 if (!GetInputNameAndRealNum(anf_node, input_ptr, &dyn_input_index, &input_tensor_num, &op_input_name)) {
444 return false;
445 }
446 std::vector<nlohmann::json> input_list;
447 if (!GenInputList(anf_node, input_tensor_num, input_ptr, &real_input_index, &op_input_name, &input_list)) {
448 return false;
449 }
450 inputs_list.emplace_back(input_list);
451 }
452
453 TbeAdapter::InputOrderPass(anf_node, inputs_list, inputs_json);
454 return true;
455 }
456
GenTbeOutputsJson(const std::shared_ptr<AnfNode> & anf_node,const std::shared_ptr<OpInfo> & op_info,nlohmann::json * outputs_json)457 bool TbeKernelJsonCreator::GenTbeOutputsJson(const std::shared_ptr<AnfNode> &anf_node,
458 const std::shared_ptr<OpInfo> &op_info, nlohmann::json *outputs_json) {
459 MS_EXCEPTION_IF_NULL(anf_node);
460 MS_EXCEPTION_IF_NULL(op_info);
461 MS_EXCEPTION_IF_NULL(outputs_json);
462 auto op_name = AnfAlgo::GetCNodeName(anf_node);
463 if (op_name == kAtomicAddrCleanOpName) {
464 return true;
465 }
466 auto outputs_ptr = op_info->outputs_ptr();
467 return GenOutputDescJson(anf_node, outputs_ptr, outputs_json);
468 }
469
GenOutputDescJson(const std::shared_ptr<mindspore::AnfNode> & anf_node,const std::vector<std::shared_ptr<mindspore::kernel::OpIOInfo>> & outputs_ptr,nlohmann::json * outputs_json)470 bool TbeKernelJsonCreator::GenOutputDescJson(
471 const std::shared_ptr<mindspore::AnfNode> &anf_node,
472 const std::vector<std::shared_ptr<mindspore::kernel::OpIOInfo>> &outputs_ptr, nlohmann::json *outputs_json) {
473 MS_EXCEPTION_IF_NULL(outputs_json);
474 size_t output_idx = 0;
475 auto op_name = AnfAlgo::GetCNodeName(anf_node);
476 size_t real_output_num = AnfAlgo::GetOutputTensorNum(anf_node);
477
478 for (const auto &output_ptr : outputs_ptr) {
479 size_t output_obj_num = 0;
480 if (output_ptr->param_type() == kParamRequred) {
481 output_obj_num = 1;
482 } else if (output_ptr->param_type() == kParamDynamic) {
483 if (outputs_ptr.size() > 1) {
484 MS_LOG(ERROR) << "Dynamic output is unsupported multi output!";
485 return false;
486 }
487 output_obj_num = real_output_num;
488 } else {
489 if (output_idx >= real_output_num) {
490 MS_LOG(INFO) << "Op:" << op_name << ", output" << output_ptr->name() << " is optional, output is none.";
491 std::vector<nlohmann::json> output_list;
492 nlohmann::json output_obj;
493 output_obj[kJName] = output_ptr->name();
494 output_obj[kJValid] = false;
495 output_list.emplace_back(output_obj);
496 (*outputs_json).push_back(output_list);
497 continue;
498 } else {
499 output_obj_num = 1;
500 }
501 }
502 std::vector<nlohmann::json> output_list;
503 GenOutputList(anf_node, output_obj_num, output_ptr, &output_idx, &output_list);
504 (*outputs_json).push_back(output_list);
505 }
506 return true;
507 }
508
GenOutputList(const std::shared_ptr<AnfNode> & anf_node,const size_t & output_obj_num,const std::shared_ptr<OpIOInfo> & output_ptr,size_t * output_idx,std::vector<nlohmann::json> * output_list)509 void TbeKernelJsonCreator::GenOutputList(const std::shared_ptr<AnfNode> &anf_node, const size_t &output_obj_num,
510 const std::shared_ptr<OpIOInfo> &output_ptr, size_t *output_idx,
511 std::vector<nlohmann::json> *output_list) {
512 MS_EXCEPTION_IF_NULL(output_idx);
513 MS_EXCEPTION_IF_NULL(output_list);
514 auto def_format = kOpFormat_NCHW;
515 if (anf_node->isa<CNode>() && IsNeedChangeDefaultFormat(anf_node->cast<CNodePtr>())) {
516 def_format = kOpFormat_NCDHW;
517 }
518 for (size_t i = 0; i < output_obj_num; i++) {
519 auto dtype = GetDeviceOutputType(anf_node, *output_idx);
520 auto format = GetDeviceOutputFormat(anf_node, *output_idx);
521
522 std::vector<int64_t> shape = GetOutputDeviceShapeForTbeBuild(creater_type_, anf_node, *output_idx);
523 std::vector<int64_t> ori_shape = GetOutputShapeForTbeBuild(anf_node, *output_idx);
524
525 if (def_format == kOpFormat_NCDHW && k3DFormatSet.find(format) == k3DFormatSet.end()) {
526 format = kOpFormat_NCDHW;
527 }
528 nlohmann::json output_obj;
529 output_obj[kJDtype] = dtype;
530 output_obj[kJShape] = shape;
531 output_obj[kJFormat] = format;
532 output_obj[kJOriShape] = ori_shape;
533 output_obj[kJOriFormat] = def_format;
534 output_obj[kJName] = output_ptr->name();
535 output_obj[kJValid] = true;
536 output_obj[kJAddrType] = 0;
537 output_obj[kJParamType] = output_ptr->param_type();
538 output_obj[kJRange] = tbe::TbeDynamicShapeUtil::GetOutputDynamicRange(anf_node, *output_idx, format);
539 output_list->emplace_back(output_obj);
540 (*output_idx)++;
541 }
542 }
543
GenTbeAttrJson(const std::shared_ptr<AnfNode> & anf_node,const std::shared_ptr<OpInfo> & op_info,nlohmann::json * attrs_json)544 void TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node,
545 const std::shared_ptr<OpInfo> &op_info, nlohmann::json *attrs_json) {
546 MS_EXCEPTION_IF_NULL(anf_node);
547 MS_EXCEPTION_IF_NULL(op_info);
548 MS_EXCEPTION_IF_NULL(attrs_json);
549 auto attrs_ptr = op_info->attrs_ptr();
550 std::string op_name = AnfAlgo::GetCNodeName(anf_node);
551 if (TbeAdapter::RunAttrPass(anf_node, attrs_ptr, attrs_json)) {
552 return;
553 }
554 auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
555 MS_EXCEPTION_IF_NULL(primitive);
556 for (const auto &attr_ptr : attrs_ptr) {
557 std::string attr_name = attr_ptr->name();
558 nlohmann::json attr_obj;
559 attr_obj[kJName] = attr_name;
560 if (op_name == parallel::LAYER_NORM && attr_obj[kJName] == "epsilon" && creater_type_ == OP_SELECT_FORMAT) {
561 continue;
562 }
563 if (primitive->GetAttr(attr_name) != nullptr) {
564 auto value = primitive->GetAttr(attr_name);
565 std::string type = attr_ptr->type();
566 if (!ParseAttrValue(type, value, &attr_obj)) {
567 const int kRecursive_level = 2;
568 MS_LOG(EXCEPTION) << "Op name: " << op_info->op_name() << " attr: " << attr_name
569 << ", node debug: " << anf_node->DebugString(kRecursive_level);
570 }
571 attr_obj[kJValid] = true;
572 } else {
573 auto default_value = attr_ptr->default_value();
574 if (!default_value.empty()) {
575 std::string type = attr_ptr->type();
576 ParseAttrDefaultValue(type, default_value, &attr_obj);
577 attr_obj[kJValid] = true;
578 } else {
579 MS_LOG(INFO) << "op " << op_name << "'s attr \"" << attr_name << "\" should have a default value.";
580 if (op_info->impl_path().empty()) {
581 attr_obj[kJValid] = false;
582 } else {
583 if (attr_ptr->param_type() == kParamRequred && creater_type_ == SINGLE_BUILD) {
584 MS_LOG(EXCEPTION) << "Op name: " << op_info->op_name() << " attr: " << attr_name
585 << " is required, but not set.";
586 } else {
587 attr_obj[kJValid] = false;
588 }
589 }
590 }
591 }
592 (*attrs_json).push_back(attr_obj);
593 }
594 }
595
GetSocVersion()596 string TbeKernelJsonCreator::GetSocVersion() {
597 // Get default soc version.
598 static std::string version = "";
599 if (version.empty()) {
600 const int kSocVersionLen = 50;
601 char soc_version[kSocVersionLen] = {0};
602 auto ret = rtGetSocVersion(soc_version, kSocVersionLen);
603 if (ret != RT_ERROR_NONE) {
604 MS_LOG(EXCEPTION) << "GetSocVersion failed.";
605 }
606 // Get soc version from env value.
607 const char *soc_version_env = nullptr;
608 std::string str_soc_version_env = common::GetEnv(kSOC_VERSION);
609 if (!str_soc_version_env.empty()) {
610 soc_version_env = common::SafeCStr(str_soc_version_env);
611 }
612 if (soc_version_env != nullptr) {
613 if (std::strcmp(soc_version, soc_version_env) != 0) {
614 MS_LOG(DEBUG) << "Detected the env SOC_VERSION, so the SocVersion will be changed to " << str_soc_version_env
615 << ".";
616 ret = rtSetSocVersion(soc_version_env);
617 if (ret != RT_ERROR_NONE) {
618 MS_LOG(EXCEPTION) << "SetSocVersion failed, errorno: " << ret;
619 }
620 return soc_version_env;
621 }
622 }
623 version = soc_version;
624 }
625 return version;
626 }
627
ParseListIntAttrValue(const mindspore::ValuePtr & value,nlohmann::json * attr_obj)628 bool ParseListIntAttrValue(const mindspore::ValuePtr &value, nlohmann::json *attr_obj) {
629 std::vector<int64_t> attr_value;
630 auto value_type = value->type();
631 if (!value_type) {
632 MS_LOG(ERROR) << "value_type is null.";
633 return false;
634 }
635 auto value_type_str = value_type->ToString();
636 if (value_type_str == kVTypeInt64) {
637 auto data = GetValue<int64_t>(value);
638 attr_value.push_back(data);
639 } else {
640 auto vec = value->isa<ValueTuple>() ? value->cast<ValueTuplePtr>()->value() : value->cast<ValueListPtr>()->value();
641 if (!vec.empty()) {
642 if (vec[0]->isa<Int32Imm>()) {
643 std::vector<int32_t> attr_value_me = GetValue<std::vector<int32_t>>(value);
644 (void)std::transform(attr_value_me.begin(), attr_value_me.end(), std::back_inserter(attr_value),
645 [](const int &value) { return static_cast<int64_t>(value); });
646 } else {
647 attr_value = GetValue<std::vector<int64_t>>(value);
648 }
649 }
650 }
651 (*attr_obj)[kJValue] = attr_value;
652 return true;
653 }
654
ParseAttrValue(const std::string & type,const mindspore::ValuePtr & value,nlohmann::json * attr_obj)655 bool TbeKernelJsonCreator::ParseAttrValue(const std::string &type, const mindspore::ValuePtr &value,
656 nlohmann::json *attr_obj) {
657 if (!value) {
658 MS_LOG(ERROR) << "value ptr is null.";
659 return false;
660 }
661 if (!attr_obj) {
662 MS_LOG(ERROR) << "attr_obj ptr is null.";
663 return false;
664 }
665 if (type == kVTypeInt) {
666 if (value->isa<Int32Imm>()) {
667 (*attr_obj)[kJValue] = GetValue<int>(value);
668 } else {
669 (*attr_obj)[kJValue] = GetValue<int64_t>(value);
670 }
671 } else if (type == kVTypeInt64) {
672 (*attr_obj)[kJValue] = GetValue<int64_t>(value);
673 } else if (type == kVTypeStr) {
674 auto attr_str_value = GetValue<std::string>(value);
675 if (attr_str_value == kOpFormat_FRAC_Z) {
676 attr_str_value = kOpFormat_FRACTAL_Z;
677 }
678 (*attr_obj)[kJValue] = attr_str_value;
679 } else if (type == kVTypeBool) {
680 (*attr_obj)[kJValue] = GetValue<bool>(value);
681 } else if (type == kVTypeFloat) {
682 (*attr_obj)[kJValue] = GetValue<float>(value);
683 } else if (type == kVTypeListInt) {
684 if (!ParseListIntAttrValue(value, attr_obj)) {
685 return false;
686 }
687 } else if (type == kVTypeListFloat) {
688 std::vector<float> attr_value;
689 auto value_type = value->type();
690 auto value_type_str = value_type->ToString();
691 if (value_type_str == kVTypeFloat) {
692 auto data = GetValue<float>(value);
693 attr_value.push_back(data);
694 } else {
695 attr_value = GetValue<std::vector<float>>(value);
696 }
697 (*attr_obj)[kJValue] = attr_value;
698 } else if (type == kVTypeListUInt64) {
699 (*attr_obj)[kJValue] = GetValue<std::vector<size_t>>(value);
700 } else if (type == kVTypeListListInt) {
701 (*attr_obj)[kJValue] = GetValue<std::vector<std::vector<int64_t>>>(value);
702 } else {
703 MS_LOG(ERROR) << "Type: " << type << "not support";
704 return false;
705 }
706 return true;
707 }
708
ParseAttrDefaultValue(const std::string & type,const std::string & value,nlohmann::json * attr_obj)709 void TbeKernelJsonCreator::ParseAttrDefaultValue(const std::string &type, const std::string &value,
710 nlohmann::json *attr_obj) {
711 MS_EXCEPTION_IF_NULL(attr_obj);
712 if (type == kVTypeInt) {
713 (*attr_obj)[kJValue] = std::stoi(value);
714 } else if (type == kVTypeInt64) {
715 (*attr_obj)[kJValue] = std::stoll(value);
716 } else if (type == kVTypeStr) {
717 (*attr_obj)[kJValue] = value;
718 } else if (type == kVTypeBool) {
719 bool attr_value = false;
720 std::istringstream(value) >> std::boolalpha >> attr_value;
721 (*attr_obj)[kJValue] = attr_value;
722 } else if (type == kVTypeFloat) {
723 (*attr_obj)[kJValue] = std::stof(value);
724 } else if (type == kVTypeListInt) {
725 std::stringstream string_value(value);
726 std::string list_elem;
727 std::vector<int64_t> attr_value;
728 while (std::getline(string_value, list_elem, ',')) {
729 attr_value.push_back(std::stoi(list_elem));
730 }
731 (*attr_obj)[kJValue] = attr_value;
732 } else {
733 MS_LOG(EXCEPTION) << "Type: " << type << "not support";
734 }
735 }
736
GetDeviceInputShape(const AnfNodePtr & anf_node,size_t real_index) const737 std::vector<size_t> TbeKernelJsonCreator::GetDeviceInputShape(const AnfNodePtr &anf_node, size_t real_index) const {
738 MS_EXCEPTION_IF_NULL(anf_node);
739 std::vector<size_t> shape;
740 if (creater_type_ == OP_SELECT_FORMAT || creater_type_ == CHECK_SUPPORTED) {
741 shape = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, real_index);
742 } else {
743 shape = AnfAlgo::GetInputDeviceShape(anf_node, real_index);
744 }
745 if (shape.empty()) {
746 shape.emplace_back(1);
747 }
748 return shape;
749 }
750
GetDeviceInputType(const AnfNodePtr & anf_node,size_t real_index) const751 std::string TbeKernelJsonCreator::GetDeviceInputType(const AnfNodePtr &anf_node, size_t real_index) const {
752 MS_EXCEPTION_IF_NULL(anf_node);
753 TypeId type_id;
754 if (creater_type_ == OP_SELECT_FORMAT) {
755 type_id = AnfAlgo::GetPrevNodeOutputInferDataType(anf_node, real_index);
756 } else {
757 type_id = AnfAlgo::GetInputDeviceDataType(anf_node, real_index);
758 }
759 return tbe::TypeIdToString(type_id);
760 }
761
GetDeviceInputFormat(const AnfNodePtr & anf_node,size_t real_index) const762 std::string TbeKernelJsonCreator::GetDeviceInputFormat(const AnfNodePtr &anf_node, size_t real_index) const {
763 MS_EXCEPTION_IF_NULL(anf_node);
764 std::string format = kOpFormat_NCHW;
765 if (anf_node->isa<CNode>() && IsNeedChangeDefaultFormat(anf_node->cast<CNodePtr>())) {
766 format = kOpFormat_NCDHW;
767 }
768 if (creater_type_ != OP_SELECT_FORMAT && creater_type_ != CHECK_SUPPORTED) {
769 format = AnfAlgo::GetInputFormat(anf_node, real_index);
770 if (format == kOpFormat_FRAC_Z) {
771 format = kOpFormat_FRACTAL_Z;
772 } else if (format == kOpFormat_DEFAULT) {
773 format = kOpFormat_NCHW;
774 }
775 }
776 return format;
777 }
778
GetDeviceOutputShape(const AnfNodePtr & anf_node,size_t real_index) const779 std::vector<size_t> TbeKernelJsonCreator::GetDeviceOutputShape(const AnfNodePtr &anf_node, size_t real_index) const {
780 MS_EXCEPTION_IF_NULL(anf_node);
781 std::vector<size_t> shape;
782 if (creater_type_ == OP_SELECT_FORMAT || creater_type_ == CHECK_SUPPORTED) {
783 shape = AnfAlgo::GetOutputInferShape(anf_node, real_index);
784 } else {
785 shape = AnfAlgo::GetOutputDeviceShape(anf_node, real_index);
786 }
787 if (shape.empty()) {
788 shape.emplace_back(1);
789 }
790 return shape;
791 }
792
GetDeviceOutputType(const AnfNodePtr & anf_node,size_t real_index) const793 std::string TbeKernelJsonCreator::GetDeviceOutputType(const AnfNodePtr &anf_node, size_t real_index) const {
794 MS_EXCEPTION_IF_NULL(anf_node);
795 TypeId type_id;
796 if (creater_type_ == OP_SELECT_FORMAT) {
797 type_id = AnfAlgo::GetOutputInferDataType(anf_node, real_index);
798 } else {
799 type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, real_index);
800 }
801 return tbe::TypeIdToString(type_id);
802 }
803
GetDeviceOutputFormat(const AnfNodePtr & anf_node,size_t real_index) const804 std::string TbeKernelJsonCreator::GetDeviceOutputFormat(const AnfNodePtr &anf_node, size_t real_index) const {
805 MS_EXCEPTION_IF_NULL(anf_node);
806 std::string format = kOpFormat_NCHW;
807 if (anf_node->isa<CNode>() && IsNeedChangeDefaultFormat(anf_node->cast<CNodePtr>())) {
808 format = kOpFormat_NCDHW;
809 }
810 if (creater_type_ != OP_SELECT_FORMAT && creater_type_ != CHECK_SUPPORTED) {
811 format = AnfAlgo::GetOutputFormat(anf_node, real_index);
812 if (format == kOpFormat_FRAC_Z) {
813 format = kOpFormat_FRACTAL_Z;
814 } else if (format == kOpFormat_DEFAULT) {
815 format = kOpFormat_NCHW;
816 }
817 }
818 return format;
819 }
820
GetInputSizeList(const nlohmann::json & input_json,std::vector<size_t> * input_size_list)821 void GetInputSizeList(const nlohmann::json &input_json, std::vector<size_t> *input_size_list) {
822 MS_EXCEPTION_IF_NULL(input_size_list);
823 for (size_t i = 0; i < input_json.size(); i++) {
824 for (size_t m = 0; m < input_json[i].size(); m++) {
825 size_t size_i = 1;
826 if (input_json[i][m][kJValid] == false) {
827 continue;
828 }
829 for (size_t j = 0; j < input_json[i][m][kJShape].size(); ++j) {
830 if (input_json[i][m][kJShape][j] == -1) {
831 auto input_max_shape = input_json[i][m][kJRange];
832 if (j >= input_max_shape.size()) {
833 MS_LOG(EXCEPTION) << "Invalid Dynamic Shape Max Shape";
834 }
835 MS_LOG(INFO) << "Change -1 Shape to Max Shape:" << input_max_shape[j][1];
836 size_i *= LongToSize(input_max_shape[j][1]);
837 continue;
838 }
839 size_i *= static_cast<size_t>(input_json[i][m][kJShape][j]);
840 }
841 std::string dtype = input_json[i][m][kJDtype];
842 size_t nbyte = tbe::GetDtypeNbyte(dtype);
843 size_i *= nbyte;
844 input_size_list->push_back(size_i);
845 }
846 }
847 }
848
GetOutputSizeList(const nlohmann::json & output_json,std::vector<size_t> * output_size_list)849 void GetOutputSizeList(const nlohmann::json &output_json, std::vector<size_t> *output_size_list) {
850 MS_EXCEPTION_IF_NULL(output_size_list);
851 for (size_t i = 0; i < output_json.size(); i++) {
852 for (size_t m = 0; m < output_json[i].size(); m++) {
853 size_t size_i = 1;
854 if (output_json[i][m][kJValid] == false) {
855 std::string output_name = output_json[i][m][kJName];
856 MS_LOG(INFO) << "Output name:" << output_name << " is optional, valid is false.";
857 continue;
858 }
859 for (size_t j = 0; j < output_json[i][m][kJShape].size(); ++j) {
860 if (output_json[i][m][kJShape][j] == -1) {
861 auto output_max_shape = output_json[i][m][kJRange];
862 if (j >= output_max_shape.size()) {
863 MS_LOG(EXCEPTION) << "Invalid Dynamic Shape Max Shape";
864 }
865 MS_LOG(INFO) << "Change -1 Shape to Max Shape:" << output_max_shape[j][1];
866 size_i *= LongToSize(output_max_shape[j][1]);
867 continue;
868 }
869 size_i *= static_cast<size_t>(output_json[i][m][kJShape][j]);
870 }
871 std::string dtype = output_json[i][m][kJDtype];
872 size_t nbyte = tbe::GetDtypeNbyte(dtype);
873 size_i *= nbyte;
874 output_size_list->push_back(size_i);
875 }
876 }
877 }
878
GetIOSize(const nlohmann::json & kernel_json,std::vector<size_t> * input_size_list,std::vector<size_t> * output_size_list)879 bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list,
880 std::vector<size_t> *output_size_list) {
881 if (input_size_list == nullptr || output_size_list == nullptr) {
882 MS_LOG(ERROR) << "Input size or output size is nullptr";
883 return false;
884 }
885 input_size_list->clear();
886 output_size_list->clear();
887 GetInputSizeList(kernel_json[kJOpInfo][kJInputs], input_size_list);
888 GetOutputSizeList(kernel_json[kJOpInfo][kJOutputs], output_size_list);
889 return true;
890 }
891
GetRealInputSize(const nlohmann::json & input_json,std::vector<size_t> * input_size_list,size_t * size_i)892 void GetRealInputSize(const nlohmann::json &input_json, std::vector<size_t> *input_size_list, size_t *size_i) {
893 for (size_t j = 0; j < input_json[kJShape].size(); ++j) {
894 if (input_json[kJShape][j] == -1) {
895 auto input_max_shape = input_json[kJRange];
896 if (j >= input_max_shape.size()) {
897 MS_LOG(EXCEPTION) << "Invalid Dynamic Shape Max Shape";
898 }
899 MS_LOG(INFO) << "Change -1 Shape to Max Shape:" << input_max_shape[j][1];
900 (*size_i) *= LongToSize(input_max_shape[j][1]);
901 continue;
902 }
903 (*size_i) *= static_cast<size_t>(input_json[kJShape][j]);
904 }
905 std::string dtype = input_json[kJDtype];
906 size_t nbyte = tbe::GetDtypeNbyte(dtype);
907 (*size_i) *= nbyte;
908 input_size_list->push_back((*size_i));
909 }
910
GetInputSizeList2(const nlohmann::json & input_json,std::vector<size_t> * input_size_list)911 void GetInputSizeList2(const nlohmann::json &input_json, std::vector<size_t> *input_size_list) {
912 for (size_t i = 0; i < input_json.size(); i++) {
913 if (input_json[i].is_array()) {
914 for (size_t m = 0; m < input_json[i].size(); m++) {
915 size_t size_i = 1;
916 if (input_json[i][m][kJValid] == false) {
917 continue;
918 }
919 GetRealInputSize(input_json[i][m], input_size_list, &size_i);
920 }
921 } else {
922 size_t size_i = 1;
923 if (input_json[i][kJValid] == false) {
924 continue;
925 }
926 GetRealInputSize(input_json[i], input_size_list, &size_i);
927 }
928 }
929 }
930
GetRealOutputSize(const nlohmann::json & output_json,std::vector<size_t> * output_size_list,size_t * size_i)931 void GetRealOutputSize(const nlohmann::json &output_json, std::vector<size_t> *output_size_list, size_t *size_i) {
932 for (size_t j = 0; j < output_json[kJShape].size(); ++j) {
933 if (output_json[kJShape][j] == -1) {
934 auto output_max_shape = output_json[kJRange];
935 if (j >= output_max_shape.size()) {
936 MS_LOG(EXCEPTION) << "Invalid Dynamic Shape Max Shape";
937 }
938 MS_LOG(INFO) << "Change -1 Shape to Max Shape:" << output_max_shape[j][1];
939 (*size_i) *= LongToSize(output_max_shape[j][1]);
940 continue;
941 }
942 (*size_i) *= static_cast<size_t>(output_json[kJShape][j]);
943 }
944 std::string dtype = output_json[kJDtype];
945 size_t nbyte = tbe::GetDtypeNbyte(dtype);
946 (*size_i) *= nbyte;
947 output_size_list->push_back((*size_i));
948 }
949
GetOutputSizeList2(const nlohmann::json & output_json,std::vector<size_t> * output_size_list)950 void GetOutputSizeList2(const nlohmann::json &output_json, std::vector<size_t> *output_size_list) {
951 for (size_t i = 0; i < output_json.size(); i++) {
952 if (output_json[i].is_array()) {
953 for (size_t m = 0; m < output_json[i].size(); m++) {
954 size_t size_i = 1;
955 if (output_json[i][m][kJValid] == false) {
956 std::string output_name = output_json[i][m][kJName];
957 MS_LOG(INFO) << "Output name:" << output_name << " is optional, valid is false.";
958 continue;
959 }
960 GetRealOutputSize(output_json[i][m], output_size_list, &size_i);
961 }
962 } else {
963 size_t size_i = 1;
964 if (output_json[i][kJValid] == false) {
965 std::string output_name = output_json[i][kJName];
966 MS_LOG(INFO) << "Output name:" << output_name << " is optional, valid is false.";
967 continue;
968 }
969 GetRealOutputSize(output_json[i], output_size_list, &size_i);
970 }
971 }
972 }
973
GetIOSize2(const nlohmann::json & kernel_json,std::vector<size_t> * input_size_list,std::vector<size_t> * output_size_list,const AnfNodePtr & anf_node)974 bool TbeKernelBuild::GetIOSize2(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list,
975 std::vector<size_t> *output_size_list, const AnfNodePtr &anf_node) {
976 if (input_size_list == nullptr || output_size_list == nullptr) {
977 MS_LOG(ERROR) << "Input size or output size is nullptr";
978 return false;
979 }
980 input_size_list->clear();
981 output_size_list->clear();
982 auto op_list = kernel_json["op_list"];
983 for (size_t i = 0; i < op_list.size(); i++) {
984 auto op_info = op_list[i];
985 if (op_info["type"] != "Data") {
986 GetInputSizeList2(op_info["input_desc"], input_size_list);
987 GetOutputSizeList2(op_info["output_desc"], output_size_list);
988 }
989 }
990 return true;
991 }
992
GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr> & input_nodes,const std::vector<mindspore::AnfNodePtr> & compute_nodes,nlohmann::json * fusion_json,std::string * fusion_kernel_name)993 bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr> &input_nodes,
994 const std::vector<mindspore::AnfNodePtr> &compute_nodes,
995 nlohmann::json *fusion_json, std::string *fusion_kernel_name) {
996 MS_EXCEPTION_IF_NULL(fusion_json);
997 MS_EXCEPTION_IF_NULL(fusion_kernel_name);
998 SetLicInfo(fusion_json);
999 // get input layer info
1000 std::vector<std::vector<mindspore::AnfNodePtr>> input_layers;
1001 std::map<const AnfNodePtr, FusionDataType> spec_data_input;
1002 if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) {
1003 return false;
1004 }
1005 // gen fusion scopre_op json
1006 std::vector<nlohmann::json> compute_list;
1007 (*fusion_kernel_name) = kFusionKernelNamePrfix;
1008 // index: fusion build option input record, next one from 0
1009 static size_t index = 0;
1010 auto layer_iter = input_layers.begin();
1011 auto compute_op_iter = compute_nodes.begin();
1012 for (; compute_op_iter != compute_nodes.end(); ++compute_op_iter, ++layer_iter) {
1013 nlohmann::json compute_op_str;
1014 (void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel_name, &index);
1015 compute_list.push_back(compute_op_str);
1016 }
1017 index = 0;
1018 // gen data input json
1019 std::vector<nlohmann::json> data_list;
1020 for (const auto &layer : input_layers) {
1021 for (const auto &data_input : layer) {
1022 nlohmann::json data_str;
1023 if (!GenFusionDataInputJson(data_input, spec_data_input, &data_str, &index)) {
1024 MS_LOG(INFO) << "Fusion error: gen fusion data input json failed.";
1025 return false;
1026 }
1027 data_list.push_back(data_str);
1028 }
1029 }
1030 index = 0;
1031 data_list.insert(data_list.end(), compute_list.begin(), compute_list.end());
1032 (*fusion_json)[kFusionOpList] = data_list;
1033 auto soc_version = TbeKernelJsonCreator::GetSocVersion();
1034 (*fusion_json)[kJSocVersion] = soc_version;
1035 return true;
1036 }
1037
GenPreDescJson(nlohmann::json * output_desc)1038 void TbeKernelBuild::GenPreDescJson(nlohmann::json *output_desc) {
1039 MS_EXCEPTION_IF_NULL(output_desc);
1040 (*output_desc)[kJL1AddrOffset] = 0;
1041 (*output_desc)[kJL1FusionType] = -1;
1042 (*output_desc)[kJL1WorkspaceSize] = -1;
1043 (*output_desc)[kJAddrType] = 0;
1044 }
1045
GenFusionComputeCommonJson(const mindspore::CNodePtr & cnode,nlohmann::json * compute_op_str,std::string * fusion_kernel_name)1046 void TbeKernelBuild::GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str,
1047 std::string *fusion_kernel_name) {
1048 MS_EXCEPTION_IF_NULL(compute_op_str);
1049 MS_EXCEPTION_IF_NULL(fusion_kernel_name);
1050 MS_EXCEPTION_IF_NULL(cnode);
1051 // gen others
1052 auto origin_type = AnfAlgo::GetCNodeName(cnode);
1053 auto op_info_ptr = tbe::TbeDynamicShapeUtil::FindOp(origin_type, cnode);
1054 // replace special op type for buffer fusion op
1055 auto type = GetRealOpType(origin_type);
1056 (*compute_op_str)[kJtype] = type;
1057 (*compute_op_str)[kJDynamicCompileStatic] = op_info_ptr->dynamic_compile_static();
1058 auto func_name = op_info_ptr->kernel_name();
1059 (*compute_op_str)[kJFuncName] = func_name;
1060 (*compute_op_str)[kJInt64Mode] = false;
1061 (*compute_op_str)[kJModuleName] = std::string("impl.") + func_name;
1062 (*compute_op_str)[kJName] = cnode->fullname_with_scope();
1063 (*compute_op_str)[kJPattern] = GetNodeFusionType(cnode);
1064 (*compute_op_str)[kJPyModulePath] = kPyPath;
1065 (void)(*fusion_kernel_name).append("_");
1066 (void)(*fusion_kernel_name).append(func_name);
1067 // attr_desc
1068 TbeKernelJsonCreator json_creater(SINGLE_BUILD);
1069 nlohmann::json json_attr_args;
1070 json_creater.GenTbeAttrJson(cnode, op_info_ptr, &json_attr_args);
1071 nlohmann::json attr_desc;
1072 for (const auto &attr : json_attr_args) {
1073 if (attr[kJName] != "isRef" && attr[kJValid] == true) {
1074 attr_desc.push_back(attr[kJValue]);
1075 }
1076 }
1077 if (!attr_desc.empty()) {
1078 (*compute_op_str)[kJAttrDesc] = attr_desc;
1079 }
1080 }
1081
GenSuffixDescJson(nlohmann::json * output_desc)1082 void TbeKernelBuild::GenSuffixDescJson(nlohmann::json *output_desc) {
1083 MS_EXCEPTION_IF_NULL(output_desc);
1084 (*output_desc)[kJSliceOffset] = nlohmann::json::array();
1085 (*output_desc)[kJSplitIndex] = 0;
1086 (*output_desc)[kJTotalShape] = nlohmann::json::array();
1087 (*output_desc)[kJValidShape] = nlohmann::json::array();
1088 }
1089
1090 // anf_node: this node is used to get output desc(type\format\shape ...)
1091 // node_out_idx: node output index
1092 // desc_output_idx: this index use to add json
1093 // nlohmann::json *output_desc: for return
1094 // FusionDataType fusion_data_type: speceial process json desc output shape [kFusionAddN, kFusionReLUGradV2]
GenDescJson(const std::shared_ptr<mindspore::AnfNode> & anf_node,size_t node_out_idx,size_t desc_output_idx,nlohmann::json * output_desc,FusionDataType fusion_data_type)1095 void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
1096 size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) {
1097 GenPreDescJson(output_desc);
1098 auto def_format = kOpFormat_NCHW;
1099 if (anf_node->isa<CNode>() && IsNeedChangeDefaultFormat(anf_node->cast<CNodePtr>())) {
1100 def_format = kOpFormat_NCDHW;
1101 }
1102 // data_type
1103 auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx);
1104 (*output_desc)[kJDataType] = tbe::TypeIdToString(type_id);
1105 // name
1106 std::string output_desc_name = anf_node->fullname_with_scope();
1107 if (node_out_idx > 0) {
1108 output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx);
1109 }
1110 (*output_desc)[kJName] = output_desc_name;
1111 // ori_format
1112 (*output_desc)[kJOriFormat] = def_format;
1113 // ori_shape
1114 auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, node_out_idx);
1115 if (ori_shape.empty()) {
1116 ori_shape.emplace_back(1);
1117 }
1118 (*output_desc)[kJOriShape] = ori_shape;
1119 (*output_desc)[kJAddrType] = 0;
1120 // !! Note: output_index, only node's output use it
1121 (*output_desc)[kJOutputIndex] = desc_output_idx;
1122 // shape
1123 auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, node_out_idx);
1124 if (shape.empty()) {
1125 shape.emplace_back(1);
1126 }
1127 (*output_desc)[kJShape] = shape;
1128 // !! Note: format: only data node's output use it
1129 auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx);
1130 if (format == kOpFormat_DEFAULT) {
1131 format = ori_shape.size() == kNCHWShapeSize ? kOpFormat_NCHW : kOpFormat_ND;
1132 } else if (format == kOpFormat_FRAC_Z) {
1133 format = kOpFormat_FRACTAL_Z;
1134 }
1135 (*output_desc)[kJFormat] = format;
1136 // special node
1137 constexpr size_t DIM0 = 0;
1138 constexpr size_t DIM1 = 1;
1139 constexpr size_t DIM2 = 2;
1140 constexpr size_t DIM3 = 3;
1141 constexpr size_t DIM4 = 4;
1142 constexpr size_t C0 = 16;
1143 constexpr size_t kShapeSize5 = 5;
1144 if ((fusion_data_type == kFusionAddN || fusion_data_type == kFusionAdd) && shape.size() == kShapeSize5) {
1145 std::vector<size_t> spec_shape = {};
1146 (void)spec_shape.emplace_back(shape[DIM0]);
1147 (void)spec_shape.emplace_back(shape[DIM1]);
1148 (void)spec_shape.emplace_back(shape[DIM2] * shape[DIM3]);
1149 (void)spec_shape.emplace_back(shape[DIM4]);
1150 (*output_desc)[kJShape] = spec_shape;
1151 } else if (fusion_data_type == kFusionReLUGradV2) {
1152 std::vector<size_t> spec_shape = {};
1153 (void)spec_shape.emplace_back(shape[DIM0]);
1154 (void)spec_shape.emplace_back(shape[DIM1]);
1155 (void)spec_shape.emplace_back(shape[DIM2] * shape[DIM3]);
1156 (void)spec_shape.emplace_back(C0);
1157 (*output_desc)[kJShape] = spec_shape;
1158 (*output_desc)[kJDataType] = kVTypeBool;
1159 }
1160 GenSuffixDescJson(output_desc);
1161 }
1162
GenFusionOutputDescJson(const std::shared_ptr<mindspore::AnfNode> & anf_node,size_t node_out_idx,size_t desc_output_idx,nlohmann::json * output_desc,nlohmann::json * output_data_desc)1163 void TbeKernelBuild::GenFusionOutputDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
1164 size_t desc_output_idx, nlohmann::json *output_desc,
1165 nlohmann::json *output_data_desc) {
1166 MS_EXCEPTION_IF_NULL(output_desc);
1167 MS_EXCEPTION_IF_NULL(output_data_desc);
1168 MS_EXCEPTION_IF_NULL(anf_node);
1169 GenDescJson(anf_node, node_out_idx, desc_output_idx, output_desc);
1170 *output_data_desc = *output_desc;
1171 (*output_data_desc)[kJDtype] = (*output_desc)[kJDataType];
1172 output_data_desc->erase(kJDataType);
1173 output_data_desc->erase(kJName);
1174 }
1175
GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> & anf_node,size_t index,size_t output_index,nlohmann::json * output_desc,const size_t out_size)1176 void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
1177 size_t output_index, nlohmann::json *output_desc, const size_t out_size) {
1178 (*output_desc)[kJAddrType] = 0;
1179 std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index);
1180 (*output_desc)[kJName] = output_desc_name;
1181 (*output_desc)[kJOutputIndex] = output_index;
1182 std::vector<size_t> shape;
1183 (*output_desc)[kJShape] = shape;
1184 auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, out_size - 1);
1185 (*output_desc)[kJDataType] = tbe::TypeIdToString(type_id);
1186 GenSuffixDescJson(output_desc);
1187 }
1188
GetSpecInputLayers(const std::string & op_name,const std::vector<mindspore::AnfNodePtr> & reorder_layer,std::map<const AnfNodePtr,FusionDataType> * spec_data_input)1189 bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name,
1190 const std::vector<mindspore::AnfNodePtr> &reorder_layer,
1191 std::map<const AnfNodePtr, FusionDataType> *spec_data_input) {
1192 MS_EXCEPTION_IF_NULL(spec_data_input);
1193 if ((op_name == kReluGradV2OpName || op_name == kAddNOpName || op_name == kTensorAddOpName) &&
1194 reorder_layer.empty()) {
1195 MS_LOG(INFO) << "Fusion error: node(" << op_name << " )'s input is null. ";
1196 return false;
1197 }
1198 if (op_name == kReluGradV2OpName) {
1199 (*spec_data_input)[reorder_layer[0]] = kFusionReLUGradV2;
1200 } else if (op_name == kAddNOpName) {
1201 for (const auto &it : reorder_layer) {
1202 (*spec_data_input)[it] = kFusionAddN;
1203 }
1204 } else if (op_name == kTensorAddOpName) {
1205 (*spec_data_input)[reorder_layer[0]] = kFusionAdd;
1206 }
1207 return true;
1208 }
1209
1210 // <input_nodes> : contains parameter/data node, input order may doesn't match tbe input order;
1211 // <compute_nodes> : contains cnode, inputs order may doesn't match tbe input order;
1212 // Special process node list: reference tbe_adapter.cc [except: Conv2DBackpropInput]
GetInputLayers(const std::vector<mindspore::AnfNodePtr> & input_nodes,const std::vector<mindspore::AnfNodePtr> & compute_nodes,std::vector<std::vector<mindspore::AnfNodePtr>> * input_layers,std::map<const AnfNodePtr,FusionDataType> * spec_data_input)1213 bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes,
1214 const std::vector<mindspore::AnfNodePtr> &compute_nodes,
1215 std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers,
1216 std::map<const AnfNodePtr, FusionDataType> *spec_data_input) {
1217 MS_EXCEPTION_IF_NULL(input_layers);
1218 MS_EXCEPTION_IF_NULL(spec_data_input);
1219 auto result = std::find_if(compute_nodes.begin(), compute_nodes.end(), [](const auto &it) {
1220 auto node_name = AnfAlgo::GetCNodeName(it);
1221 return (node_name == kConv2DBackpropInputOpName || node_name == kConv2DOpName);
1222 });
1223 bool need_spec = (result != compute_nodes.end());
1224 size_t input_size = 0;
1225 for (const auto &compute_node : compute_nodes) {
1226 std::vector<mindspore::AnfNodePtr> layer = {};
1227 std::vector<mindspore::AnfNodePtr> reorder_layer = {};
1228 MS_EXCEPTION_IF_NULL(compute_node);
1229 auto op_name = AnfAlgo::GetCNodeName(compute_node);
1230 auto ccompute_node = compute_node->cast<CNodePtr>();
1231 if (ccompute_node == nullptr) {
1232 MS_LOG(INFO) << "Fusion error: fusion compute node must be cnode";
1233 return false;
1234 }
1235 for (size_t i = 1; i < ccompute_node->inputs().size(); ++i) {
1236 auto input = ccompute_node->input(i);
1237 auto find_iter = std::find(input_nodes.begin(), input_nodes.end(), input);
1238 if (find_iter != input_nodes.end()) {
1239 layer.emplace_back((*find_iter));
1240 }
1241 }
1242 TbeAdapter::FusionDataOrderPass(op_name, layer, &reorder_layer);
1243 if (need_spec) {
1244 if (!GetSpecInputLayers(op_name, reorder_layer, spec_data_input)) {
1245 return false;
1246 }
1247 }
1248 input_size += reorder_layer.size();
1249 input_layers->emplace_back(reorder_layer);
1250 }
1251 if (input_nodes.size() != input_size) {
1252 MS_LOG(INFO) << "Fusion error: fusion scope error, layer input:" << input_size
1253 << ", input_node:" << input_nodes.size();
1254 return false;
1255 }
1256 return true;
1257 }
1258
GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> & data_input,const std::map<const AnfNodePtr,FusionDataType> & spec_data_input,nlohmann::json * data_str,size_t * index)1259 bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input,
1260 const std::map<const AnfNodePtr, FusionDataType> &spec_data_input,
1261 nlohmann::json *data_str, size_t *index) {
1262 MS_EXCEPTION_IF_NULL(data_str);
1263 MS_EXCEPTION_IF_NULL(index);
1264 std::vector<nlohmann::json> output_desc_list;
1265 // if data_input is null, this is optional input.
1266 if (!data_input) {
1267 auto name = std::string(kOptional) + std::to_string(*index);
1268 (*data_str)[kJName] = name;
1269 nlohmann::json output_desc;
1270 output_desc[kJName] = name;
1271 output_desc[kJDataType] = 0;
1272 output_desc[kJShape] = "NULL";
1273 output_desc_list.push_back(output_desc);
1274 (*index)++;
1275 } else {
1276 FusionDataType fusion_data_type = kFusionNormal;
1277 if (spec_data_input.find(data_input) != spec_data_input.end()) {
1278 fusion_data_type = spec_data_input.at(data_input);
1279 }
1280 auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0);
1281 auto real_node = kernel_idx.first;
1282 size_t real_idx = kernel_idx.second;
1283 // kJOutputDesc
1284 nlohmann::json output_desc;
1285 GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type);
1286 output_desc_list.push_back(output_desc);
1287 auto full_name = real_node->fullname_with_scope();
1288 if (real_idx > 0) {
1289 full_name = full_name.append("_").append(std::to_string(real_idx));
1290 }
1291 (*data_str)[kJName] = full_name;
1292 }
1293 (*data_str)[kJOutputDesc] = output_desc_list;
1294 (*data_str)[kJtype] = "Data";
1295 return true;
1296 }
1297
IsDynamicInput(const mindspore::CNodePtr & cnode)1298 bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) {
1299 MS_EXCEPTION_IF_NULL(cnode);
1300 auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
1301 MS_EXCEPTION_IF_NULL(primitive);
1302 // for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input.
1303 bool ret = false;
1304 std::vector<int64_t> dyn_input_sizes;
1305 auto dynamic_input_attr = primitive->GetAttr(kAttrDynInputSizes);
1306 if (dynamic_input_attr != nullptr) {
1307 dyn_input_sizes = GetValue<const std::vector<int64_t>>(dynamic_input_attr);
1308 auto real_input_size = cnode->inputs().size() - 1;
1309 auto dyn_input_size = dyn_input_sizes.size();
1310 if (dyn_input_size != 1) {
1311 MS_LOG(INFO) << "Fusion error: fusion build not support dyn_input_sizes > 1";
1312 return ret;
1313 }
1314 if (LongToSize(dyn_input_sizes[0]) != real_input_size) {
1315 MS_LOG(INFO) << "Fusion error: dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size"
1316 << real_input_size;
1317 return ret;
1318 }
1319 ret = true;
1320 }
1321 return ret;
1322 }
1323
GetOptionalInput(const mindspore::CNodePtr & cnode,bool is_dynamic_input)1324 size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool is_dynamic_input) {
1325 MS_EXCEPTION_IF_NULL(cnode);
1326 if (is_dynamic_input) {
1327 // Node can not have optional & dynamic input.
1328 return 0;
1329 }
1330 MS_EXCEPTION_IF_NULL(cnode);
1331 auto node_name = AnfAlgo::GetCNodeName(cnode);
1332 auto op_info = tbe::TbeDynamicShapeUtil::FindOp(node_name, cnode);
1333 MS_EXCEPTION_IF_NULL(cnode);
1334 auto node_inputs_size = cnode->inputs().size();
1335 for (auto &input : cnode->inputs()) {
1336 if (HasAbstractMonad(input)) {
1337 node_inputs_size--;
1338 }
1339 }
1340 if (op_info->inputs_ptr().size() < (node_inputs_size - 1)) {
1341 MS_EXCEPTION(ArgumentError) << "op info error, node name:" << cnode->fullname_with_scope();
1342 }
1343 return (op_info->inputs_ptr().size() + 1 - node_inputs_size);
1344 }
1345
GetRealOpType(const std::string & origin_type)1346 std::string TbeKernelBuild::GetRealOpType(const std::string &origin_type) {
1347 static std::map<std::string, std::string> buffer_fussion_op_map = {
1348 {parallel::DEPTHWISE_CONV2D_NATIVE, parallel::DEPTHWISE_CONV2D}};
1349 string result = origin_type;
1350 auto iter = buffer_fussion_op_map.find(origin_type);
1351 if (iter != buffer_fussion_op_map.end()) {
1352 result = iter->second;
1353 }
1354 return result;
1355 }
1356
GetNodeFusionType(const mindspore::CNodePtr & cnode)1357 std::string TbeKernelBuild::GetNodeFusionType(const mindspore::CNodePtr &cnode) {
1358 MS_EXCEPTION_IF_NULL(cnode);
1359 auto node_type = AnfAlgo::GetCNodeName(cnode);
1360 static std::map<std::string, std::string> fusion_type_map = {{kConv2DOpName, "Convolution"},
1361 {kBNTrainingReduceOpName, "bn_reduce"},
1362 {kBNTrainingUpdateOpName, "bn_update"},
1363 {kReluV2OpName, "ElemWise"},
1364 {kTensorAddOpName, "ElemWise"},
1365 {kConv2DBackpropInputOpName, "Conv2d_backprop_input"},
1366 {kConv2DBackpropFilterOpName, "Conv2d_backprop_filter"},
1367 {kDepthwiseConv2dNativeOpName, "DepthwiseConvolution"},
1368 {kAddNOpName, "ElemWise"},
1369 {kReluGradV2OpName, "ElemWise"},
1370 {kRealDivOpName, "ElemWise"},
1371 {kBiasAddOpName, "BiasAdd"}};
1372 auto find = fusion_type_map.find(node_type);
1373 if (find == fusion_type_map.end()) {
1374 MS_LOG(INFO) << "Fusion warning: get node fusion type failed from lists, origin node type: " << node_type;
1375 auto op_info = mindspore::kernel::tbe::TbeDynamicShapeUtil::FindOp(node_type, cnode);
1376 MS_EXCEPTION_IF_NULL(op_info);
1377 return op_info->fusion_type();
1378 } else {
1379 return find->second;
1380 }
1381 }
1382
GenFusionComputeInputJson(const mindspore::CNodePtr & cnode,std::vector<std::vector<mindspore::AnfNodePtr>>::iterator * layer_iter,std::vector<nlohmann::json> * input_desc_list,size_t * index)1383 bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
1384 std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
1385 std::vector<nlohmann::json> *input_desc_list, size_t *index) {
1386 MS_EXCEPTION_IF_NULL(cnode);
1387 MS_EXCEPTION_IF_NULL(input_desc_list);
1388 MS_EXCEPTION_IF_NULL(layer_iter);
1389 MS_EXCEPTION_IF_NULL(index);
1390 std::vector<nlohmann::json> input_desc_list_tmp = {};
1391 // 1. input json
1392 bool is_dynamic_input = IsDynamicInput(cnode);
1393 for (size_t i = 1; i < cnode->inputs().size(); ++i) {
1394 auto input = cnode->input(i);
1395 if (HasAbstractMonad(input)) {
1396 continue;
1397 }
1398 auto kernel_idx = AnfAlgo::VisitKernel(input, 0);
1399 auto real_node = kernel_idx.first;
1400 size_t real_idx = kernel_idx.second;
1401 nlohmann::json input_desc;
1402 GenDescJson(real_node, real_idx, real_idx, &input_desc);
1403 if (is_dynamic_input) {
1404 // 2. dynamic input json
1405 MS_LOG(INFO) << "Node has dynamic input.";
1406 input_desc[kJDynIndex] = (i - 1);
1407 }
1408 input_desc_list_tmp.emplace_back(input_desc);
1409 }
1410 size_t optional_num = GetOptionalInput(cnode, is_dynamic_input);
1411 if (optional_num > 0) {
1412 // 3. optional input
1413 for (size_t i = 0; i < optional_num; ++i) {
1414 nlohmann::json optional_input_desc;
1415 optional_input_desc[kJName] = std::string(kOptional) + std::to_string(*index);
1416 optional_input_desc[kJShape] = "NULL";
1417 (*index)++;
1418 (*layer_iter)->emplace_back(nullptr);
1419 input_desc_list_tmp.emplace_back(optional_input_desc);
1420 }
1421 }
1422 TbeAdapter::FusionInputOrderPass(cnode, input_desc_list_tmp, input_desc_list);
1423 return true;
1424 }
1425
GetDescOutputIndex(const std::vector<int64_t> & output_used_nums)1426 std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int64_t> &output_used_nums) {
1427 std::vector<size_t> desc_output_index = {};
1428 for (size_t idx = 0; idx < output_used_nums.size(); ++idx) {
1429 auto output_use_num_item = output_used_nums[idx];
1430 desc_output_index.emplace_back(idx);
1431 if (output_use_num_item > 1) {
1432 desc_output_index.emplace_back(idx);
1433 }
1434 }
1435 return desc_output_index;
1436 }
1437
GenFusionComputeOutputJson(const mindspore::CNodePtr & cnode,std::vector<nlohmann::json> * output_desc_list,std::vector<nlohmann::json> * output_data_desc_list)1438 bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
1439 std::vector<nlohmann::json> *output_desc_list,
1440 std::vector<nlohmann::json> *output_data_desc_list) {
1441 MS_EXCEPTION_IF_NULL(output_desc_list);
1442 MS_EXCEPTION_IF_NULL(output_data_desc_list);
1443 auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
1444 if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) {
1445 auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(cnode, kAttrOutputUsedNum);
1446 if (output_used_nums.size() != output_size) {
1447 MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")"
1448 << " is not match output used num(" << output_used_nums.size() << ")";
1449 return false;
1450 }
1451 auto desc_output_index = GetDescOutputIndex(output_used_nums);
1452 for (size_t i = 0; i < output_size; ++i) {
1453 MS_LOG(INFO) << "Fusion index: " << i << ", desc_output_index: " << desc_output_index[i];
1454 nlohmann::json output_desc;
1455 nlohmann::json output_data_desc;
1456 GenFusionOutputDescJson(cnode, i, desc_output_index[i], &output_desc, &output_data_desc);
1457 output_data_desc_list->emplace_back(output_data_desc);
1458 output_desc_list->emplace_back(output_desc);
1459 }
1460 for (size_t j = output_size; j < desc_output_index.size(); ++j) {
1461 MS_LOG(INFO) << "Fusion index: " << j << ", desc_output_index: " << desc_output_index[j];
1462 nlohmann::json output_desc;
1463 GenReusedOutputDesc(cnode, j, desc_output_index[j], &output_desc, output_size);
1464 output_desc_list->emplace_back(output_desc);
1465 }
1466 } else {
1467 for (size_t i = 0; i < output_size; ++i) {
1468 nlohmann::json output_desc;
1469 nlohmann::json output_data_desc;
1470 GenFusionOutputDescJson(cnode, i, i, &output_desc, &output_data_desc);
1471 output_data_desc_list->emplace_back(output_data_desc);
1472 output_desc_list->emplace_back(output_desc);
1473 }
1474 }
1475 return true;
1476 }
1477
GenFusionComputeJson(const mindspore::AnfNodePtr & compute_node,std::vector<std::vector<mindspore::AnfNodePtr>>::iterator * layer_iter,nlohmann::json * compute_op_str,std::string * fusion_kernel_name,size_t * index)1478 bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_node,
1479 std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
1480 nlohmann::json *compute_op_str, std::string *fusion_kernel_name,
1481 size_t *index) {
1482 MS_EXCEPTION_IF_NULL(compute_node);
1483 auto cnode = compute_node->cast<CNodePtr>();
1484 MS_EXCEPTION_IF_NULL(cnode);
1485 // gen input desc
1486 std::vector<nlohmann::json> input_desc_list;
1487 (void)GenFusionComputeInputJson(cnode, layer_iter, &input_desc_list, index);
1488 (*compute_op_str)[kJInputDesc] = input_desc_list;
1489 // gen output desc
1490 std::vector<nlohmann::json> output_desc_list;
1491 std::vector<nlohmann::json> output_data_desc_list;
1492 if (!GenFusionComputeOutputJson(cnode, &output_desc_list, &output_data_desc_list)) {
1493 MS_LOG(INFO) << "Fusion Error: gen fusion output desc failed, node full name: " << cnode->fullname_with_scope();
1494 return false;
1495 }
1496 (*compute_op_str)[kJOutputDataDesc] = output_data_desc_list;
1497 (*compute_op_str)[kJOutputDesc] = output_desc_list;
1498 // gen common desc
1499 GenFusionComputeCommonJson(cnode, compute_op_str, fusion_kernel_name);
1500 return true;
1501 }
1502
GetIOSizeImpl(const nlohmann::json & desc)1503 size_t TbeKernelBuild::GetIOSizeImpl(const nlohmann::json &desc) {
1504 size_t ret = 1;
1505 for (const auto &shape_item : desc[kJShape]) {
1506 ret *= static_cast<size_t>(shape_item);
1507 }
1508 std::string data_type = desc[kJDataType];
1509 size_t nbyte = tbe::GetDtypeNbyte(data_type);
1510 ret *= nbyte;
1511 return ret;
1512 }
1513
CalInputSize(const nlohmann::json & fusion_op_list,std::vector<size_t> * input_size_list)1514 void TbeKernelBuild::CalInputSize(const nlohmann::json &fusion_op_list, std::vector<size_t> *input_size_list) {
1515 MS_EXCEPTION_IF_NULL(input_size_list);
1516 // cal input size for malloc
1517 for (const auto &op : fusion_op_list) {
1518 if (op[kJtype] == "Data") {
1519 const auto &data_output_desc = op[kJOutputDesc];
1520 for (const auto &data_output : data_output_desc) {
1521 if (data_output[kJShape] == "NULL") {
1522 break;
1523 }
1524 input_size_list->push_back(GetIOSizeImpl(data_output));
1525 }
1526 }
1527 }
1528 }
1529
CalOutputSize(const nlohmann::json & fusion_op_list,const std::vector<mindspore::AnfNodePtr> & output_nodes,std::vector<size_t> * output_size_list)1530 bool TbeKernelBuild::CalOutputSize(const nlohmann::json &fusion_op_list,
1531 const std::vector<mindspore::AnfNodePtr> &output_nodes,
1532 std::vector<size_t> *output_size_list) {
1533 MS_EXCEPTION_IF_NULL(output_size_list);
1534 // cal output size for malloc
1535 for (const auto &output_node : output_nodes) {
1536 auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0);
1537 auto real_node = kernel_idx.first;
1538 size_t real_idx = kernel_idx.second;
1539 auto full_name = real_node->fullname_with_scope();
1540 for (const auto &op : fusion_op_list) {
1541 if (op[kJName] != full_name) {
1542 continue;
1543 }
1544 auto op_output_desces = op[kJOutputDesc];
1545 if (output_node != real_node) {
1546 // tuple_get item
1547 auto output_desc = op_output_desces[real_idx];
1548 if (output_desc[kJShape].empty()) {
1549 MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx;
1550 return false;
1551 }
1552 output_size_list->push_back(GetIOSizeImpl(output_desc));
1553 } else {
1554 for (const auto &output_desc : op_output_desces) {
1555 if (output_desc[kJShape].empty()) {
1556 continue;
1557 }
1558 output_size_list->push_back(GetIOSizeImpl(output_desc));
1559 }
1560 }
1561 }
1562 }
1563 return true;
1564 }
1565
GetIOSize(const nlohmann::json & fusion_op_list,const std::vector<mindspore::AnfNodePtr> & output_nodes,std::vector<size_t> * input_size_list,std::vector<size_t> * output_size_list)1566 bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list,
1567 const std::vector<mindspore::AnfNodePtr> &output_nodes,
1568 std::vector<size_t> *input_size_list, std::vector<size_t> *output_size_list) {
1569 MS_EXCEPTION_IF_NULL(input_size_list);
1570 MS_EXCEPTION_IF_NULL(output_size_list);
1571 input_size_list->clear();
1572 output_size_list->clear();
1573 // cal input size for malloc
1574 CalInputSize(fusion_op_list, input_size_list);
1575 // cal output size for malloc
1576 return CalOutputSize(fusion_op_list, output_nodes, output_size_list);
1577 }
1578 } // namespace kernel
1579 } // namespace mindspore
1580