• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "backend/kernel_compiler/tbe/tbe_kernel_build.h"
18 #include <memory>
19 #include <map>
20 #include <list>
21 #include <algorithm>
22 #include "base/core_ops.h"
23 #include "frontend/parallel/ops_info/ops_utils.h"
24 #include "backend/session/anf_runtime_algorithm.h"
25 #include "backend/kernel_compiler/tbe/tbe_adapter.h"
26 #include "backend/kernel_compiler/tbe/tbe_convert_utils.h"
27 #include "backend/kernel_compiler/tbe/tbe_dynaminc_shape_util.h"
28 #include "backend/kernel_compiler/tbe/tbe_utils.h"
29 #include "utils/ms_context.h"
30 #include "runtime/dev.h"
31 #include "utils/trace_base.h"
32 #include "utils/convert_utils_base.h"
33 #include "utils/ms_utils.h"
34 #include "runtime/device/ascend/lic_manager.h"
35 
36 namespace mindspore {
37 namespace kernel {
38 using mindspore::kernel::tbe::TbeAdapter;
39 using mindspore::kernel::tbe::TbeUtils;
40 namespace {
41 constexpr auto kFusionOpList = "op_list";
42 constexpr auto kFusionKernelNamePrfix = "te_fusion";
43 constexpr auto kOptional = "optional_";
44 constexpr auto kOpFormat_FRACTAL_Z = "FRACTAL_Z";
45 constexpr auto kPlatform = "platform";
46 constexpr auto kPlatTBE = "TBE";
47 constexpr auto kGenModel = "gen_model";
48 constexpr auto kSingle = "single";
49 constexpr auto kImplPath = "impl_path";
50 constexpr auto kJInputs = "inputs";
51 constexpr auto kJOutputs = "outputs";
52 constexpr auto kJAttrs = "attrs";
53 constexpr auto kJKernelName = "kernel_name";
54 constexpr auto kJFullName = "full_name";
55 constexpr auto kJOpInfo = "op_info";
56 constexpr auto kJDtype = "dtype";
57 constexpr auto kJtype = "type";
58 constexpr auto kJName = "name";
59 constexpr auto kJOriShape = "ori_shape";
60 constexpr auto kJOriFormat = "ori_format";
61 constexpr auto kJShape = "shape";
62 constexpr auto kJFormat = "format";
63 constexpr auto kJValid = "valid";
64 constexpr auto kJParamType = "param_type";
65 constexpr auto kParamDynamic = "dynamic";
66 constexpr auto kParamRequred = "required";
67 constexpr auto kJDataType = "data_type";
68 constexpr auto kJOutputIndex = "output_index";
69 constexpr auto kJOutputDataDesc = "output_data_desc";
70 constexpr auto kJOutputDesc = "output_desc";
71 constexpr auto kJInputDesc = "input_desc";
72 constexpr auto kJRange = "range";
73 constexpr auto kVTypeInt = "int";
74 constexpr auto kVTypeStr = "str";
75 constexpr auto kVTypeBool = "bool";
76 constexpr auto kVTypeFloat = "float";
77 constexpr auto kVTypeListInt = "listInt";
78 constexpr auto kVTypeInt32 = "Int32";
79 constexpr auto kVTypeInt64 = "Int64";
80 constexpr auto kVTypeListUInt64 = "listUInt64";
81 constexpr auto kVTypeListFloat = "listFloat";
82 constexpr auto kVTypeListListInt = "listListInt";
83 constexpr auto kJValue = "value";
84 constexpr auto kJDynIndex = "dyn_index";
85 constexpr auto kJFuncName = "func_name";
86 constexpr auto kJL1AddrOffset = "L1_addr_offset";
87 constexpr auto kJL1FusionType = "L1_fusion_type";
88 constexpr auto kJL1WorkspaceSize = "L1_workspace_size";
89 constexpr auto kJAddrType = "addr_type";
90 constexpr auto kJSliceOffset = "slice_offset";
91 constexpr auto kJSplitIndex = "split_index";
92 constexpr auto kJTotalShape = "total_shape";
93 constexpr auto kJDynamicCompileStatic = "dynamic_compile_static";
94 constexpr auto kJInt64Mode = "int64mode";
95 constexpr auto kJValidShape = "valid_shape";
96 constexpr auto kJModuleName = "module_name";
97 constexpr auto kJPattern = "pattern";
98 constexpr auto kJPyModulePath = "py_module_path";
99 constexpr auto kJAttrDesc = "attr_desc";
100 constexpr auto kJSocVersion = "socVersion";
101 constexpr auto kAutoTilingMode = "autoTilingMode";
102 constexpr auto kSOC_VERSION = "SOC_VERSION";
103 constexpr auto kJIsDynamicShape = "is_dynamic_shape";
104 constexpr auto kJDynamicIndex = "dynamic_index";
105 constexpr auto kJSocInfo = "SocInfo";
106 constexpr auto kNCHWShapeSize = 4;
107 constexpr auto kJRlTuneSwitch = "rl_tune_switch";
108 constexpr auto kJRlTuneList = "rl_tune_list";
109 constexpr auto kJOpTuneSwitch = "op_tune_switch";
110 constexpr auto kJOpTuneList = "op_tune_list";
111 constexpr auto kJPassList = "pass_list";
112 
113 const auto kPyPath = "/usr/local/Ascend/opp/op_impl/built-in/ai_core/tbe";
114 
IsNeedChangeDefaultFormat(const CNodePtr & cnode)115 bool IsNeedChangeDefaultFormat(const CNodePtr &cnode) {
116   MS_EXCEPTION_IF_NULL(cnode);
117   if (AnfAlgo::HasNodeAttr(kAttrFormat, cnode->cast<CNodePtr>())) {
118     auto attr = AnfAlgo::GetNodeAttr<std::string>(cnode, kAttrFormat);
119     return attr == kOpFormat_NCDHW;
120   }
121   return false;
122 }
123 
SetLicInfo(nlohmann::json * op_info_json)124 void SetLicInfo(nlohmann::json *op_info_json) {
125   MS_EXCEPTION_IF_NULL(op_info_json);
126   (*op_info_json)[kJRlTuneSwitch] = LicManager::GetInstance().GetRlTuneSwitch();
127   (*op_info_json)[kJRlTuneList] = LicManager::GetInstance().GetRlTuneList();
128   (*op_info_json)[kJOpTuneSwitch] = LicManager::GetInstance().GetOpTuneSwitch();
129   (*op_info_json)[kJOpTuneList] = LicManager::GetInstance().GetOpTuneList();
130   (*op_info_json)[kJPassList] = LicManager::GetInstance().GetPassSwitch();
131 }
132 
GetOutputShapeForTbeBuild(const AnfNodePtr & anf_node,size_t real_index)133 std::vector<int64_t> GetOutputShapeForTbeBuild(const AnfNodePtr &anf_node, size_t real_index) {
134   MS_EXCEPTION_IF_NULL(anf_node);
135   std::vector<int64_t> shape;
136   auto output_shape = AnfAlgo::GetOutputDetailShape(anf_node, real_index);
137   MS_EXCEPTION_IF_NULL(output_shape);
138   if (output_shape->isa<abstract::Shape>()) {
139     auto shape_ptr = output_shape->cast<abstract::ShapePtr>();
140     MS_EXCEPTION_IF_NULL(shape_ptr);
141     shape = shape_ptr->shape();
142   }
143   if (shape.empty()) {
144     shape.emplace_back(1);
145   }
146   return shape;
147 }
148 
GetOutputDeviceShapeForTbeBuild(const kCreaterType creater_type,const AnfNodePtr & anf_node,const size_t real_index)149 std::vector<int64_t> GetOutputDeviceShapeForTbeBuild(const kCreaterType creater_type, const AnfNodePtr &anf_node,
150                                                      const size_t real_index) {
151   MS_EXCEPTION_IF_NULL(anf_node);
152   std::vector<int64_t> shape;
153   if (creater_type == OP_SELECT_FORMAT || creater_type == CHECK_SUPPORTED) {
154     shape = GetOutputShapeForTbeBuild(anf_node, real_index);
155   } else {
156     auto format = AnfAlgo::GetOutputFormat(anf_node, real_index);
157     shape = AnfAlgo::GetOutputDeviceShapeForTbeBuild(anf_node, real_index, format);
158   }
159   if (shape.empty()) {
160     shape.emplace_back(1);
161   }
162   return shape;
163 }
164 
GetInputShapeForTbeBuild(const AnfNodePtr & anf_node,size_t real_index)165 std::vector<int64_t> GetInputShapeForTbeBuild(const AnfNodePtr &anf_node, size_t real_index) {
166   MS_EXCEPTION_IF_NULL(anf_node);
167   session::KernelWithIndex kernel_with_index = AnfAlgo::GetPrevNodeOutput(anf_node, real_index);
168   return GetOutputShapeForTbeBuild(kernel_with_index.first, kernel_with_index.second);
169 }
170 
GetInputDeviceShapeForTbeBuild(const kCreaterType creater_type,const AnfNodePtr & anf_node,const size_t real_index)171 std::vector<int64_t> GetInputDeviceShapeForTbeBuild(const kCreaterType creater_type, const AnfNodePtr &anf_node,
172                                                     const size_t real_index) {
173   MS_EXCEPTION_IF_NULL(anf_node);
174   std::vector<int64_t> shape;
175   session::KernelWithIndex kernel_with_index = AnfAlgo::GetPrevNodeOutput(anf_node, real_index);
176   if (creater_type == OP_SELECT_FORMAT || creater_type == CHECK_SUPPORTED) {
177     shape = GetOutputShapeForTbeBuild(kernel_with_index.first, kernel_with_index.second);
178   } else {
179     auto format = AnfAlgo::GetInputFormat(anf_node, real_index);
180     shape = AnfAlgo::GetOutputDeviceShapeForTbeBuild(kernel_with_index.first, kernel_with_index.second, format);
181   }
182   if (shape.empty()) {
183     shape.emplace_back(1);
184   }
185   return shape;
186 }
187 }  // namespace
GenTbeSingleKernelJson(const std::shared_ptr<mindspore::AnfNode> & anf_node,nlohmann::json * kernel_json)188 bool TbeKernelJsonCreator::GenTbeSingleKernelJson(const std::shared_ptr<mindspore::AnfNode> &anf_node,
189                                                   nlohmann::json *kernel_json) {
190   MS_EXCEPTION_IF_NULL(anf_node);
191   MS_EXCEPTION_IF_NULL(kernel_json);
192   std::string op_name = AnfAlgo::GetCNodeName(anf_node);
193   auto op_info_ptr = mindspore::kernel::tbe::TbeDynamicShapeUtil::FindOp(op_name, anf_node);
194   MS_EXCEPTION_IF_NULL(op_info_ptr);
195   (*kernel_json)[kPlatform] = kPlatTBE;
196   (*kernel_json)[kImplPath] = op_info_ptr->impl_path();
197   nlohmann::json op_info_json;
198   SetLicInfo(&op_info_json);
199   op_info_json[kJIsDynamicShape] = tbe::TbeDynamicShapeUtil::GetDynamicShapeAttr(anf_node->cast<CNodePtr>());
200   auto func_name = op_info_ptr->kernel_name();
201   op_info_json[kJName] = func_name;
202   op_info_json[kJModuleName] = std::string("impl.") + func_name;
203   op_info_json[kJPyModulePath] = kPyPath;
204   // generate inputs json
205   nlohmann::json inputs_json;
206   if (!GenTbeInputsJson(anf_node, op_info_ptr, &inputs_json)) {
207     MS_LOG(ERROR) << "Anf Node [" << op_name << "] generate inputs json failed";
208     return false;
209   }
210   op_info_json[kJInputs] = inputs_json;
211   // generate outputs json
212   nlohmann::json outputs_json;
213   if (!GenTbeOutputsJson(anf_node, op_info_ptr, &outputs_json)) {
214     MS_LOG(ERROR) << "Anf Node [" << op_name << "] generate outputs json failed";
215     return false;
216   }
217   op_info_json[kJOutputs] = outputs_json;
218   // generate attrs json
219   nlohmann::json attrs_json;
220   GenTbeAttrJson(anf_node, op_info_ptr, &attrs_json);
221   op_info_json[kJAttrs] = attrs_json;
222   auto soc_version = TbeKernelJsonCreator::GetSocVersion();
223   op_info_json[kJSocVersion] = soc_version;
224   if (op_info_json[kJIsDynamicShape]) {
225     static int32_t dynamic_index = 0;
226     op_info_json[kJDynamicIndex] = dynamic_index++;
227   }
228   auto context_ptr = MsContext::GetInstance();
229   MS_EXCEPTION_IF_NULL(context_ptr);
230   auto tune_mode = context_ptr->get_param<std::string>(MS_CTX_TUNE_MODE);
231   // generate soc info json
232   nlohmann::json soc_info_json = TbeUtils::GenSocInfo();
233   soc_info_json[kAutoTilingMode] = tune_mode;
234 
235   std::string json_str = op_info_json.dump() + soc_info_json.dump();
236   size_t hash_id = std::hash<std::string>()(json_str);
237   auto device_id = context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID);
238 
239   op_info_json[kJFullName] = anf_node->fullname_with_scope();
240   json_name_ = op_name + "_" + std::to_string(hash_id) + "_" + std::to_string(device_id);
241   json_info_ = json_str;
242   auto iter = tbe::opTypeAdapter.find(op_name);
243   if (iter != tbe::opTypeAdapter.end()) {
244     op_name = iter->second;
245   }
246   op_info_json["Type"] = op_name;
247   op_info_json["graph_id"] = AnfAlgo::GetGraphId(anf_node.get());
248   op_info_json[kJKernelName] = json_name_;
249   op_info_json[kGenModel] = kSingle;
250 
251   // create attr_desc
252   nlohmann::json attr_desc;
253   for (const auto &attr : attrs_json) {
254     if (attr[kJName] != "isRef" && attr[kJValid] == true) {
255       attr_desc.push_back(attr[kJValue]);
256     }
257   }
258   if (!attr_desc.empty()) {
259     op_info_json[kJAttrDesc] = attr_desc;
260   }
261 
262   // merge json
263   soc_info_json[kJSocVersion] = soc_version;
264   (*kernel_json)[kJSocInfo] = soc_info_json;
265   (*kernel_json)[kJOpInfo] = op_info_json;
266 
267   MS_LOG(INFO) << "Operate type:" << creater_type_ << ", full scope name is :" << anf_node->fullname_with_scope()
268                << ", json info name is : " << json_name_ << ", kernel json:" << kernel_json->dump();
269 
270   return true;
271 }
272 
GenNoneInputDescJson(const std::shared_ptr<OpIOInfo> & input_ptr,size_t input_i,std::vector<nlohmann::json> * const input_list)273 void GenNoneInputDescJson(const std::shared_ptr<OpIOInfo> &input_ptr, size_t input_i,
274                           std::vector<nlohmann::json> *const input_list) {
275   MS_EXCEPTION_IF_NULL(input_ptr);
276   MS_EXCEPTION_IF_NULL(input_list);
277   nlohmann::json input_desc_json;
278   auto in_name = input_ptr->name();
279   input_desc_json[kJName] = in_name + std::to_string(input_i);
280   input_desc_json[kJValid] = false;
281   input_list->emplace_back(input_desc_json);
282 }
283 
GenValidInputDescJson(const std::shared_ptr<AnfNode> & anf_node,size_t real_input_index,bool value,const std::shared_ptr<OpIOInfo> & input_ptr,const string & op_input_name,size_t input_i,std::vector<nlohmann::json> * const input_list)284 void TbeKernelJsonCreator::GenValidInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index,
285                                                  bool value, const std::shared_ptr<OpIOInfo> &input_ptr,
286                                                  const string &op_input_name, size_t input_i,
287                                                  std::vector<nlohmann::json> *const input_list) {
288   MS_EXCEPTION_IF_NULL(anf_node);
289   MS_EXCEPTION_IF_NULL(input_ptr);
290   MS_EXCEPTION_IF_NULL(input_list);
291   auto def_format = kOpFormat_NCHW;
292   auto dtype = GetDeviceInputType(anf_node, real_input_index);
293   auto format = GetDeviceInputFormat(anf_node, real_input_index);
294   auto shape = GetInputDeviceShapeForTbeBuild(creater_type_, anf_node, real_input_index);
295   auto ori_shape = GetInputShapeForTbeBuild(anf_node, real_input_index);
296   if (anf_node->isa<CNode>() && IsNeedChangeDefaultFormat(anf_node->cast<CNodePtr>())) {
297     def_format = kOpFormat_NCDHW;
298   }
299   if (def_format == kOpFormat_NCDHW && k3DFormatSet.find(format) == k3DFormatSet.end()) {
300     format = kOpFormat_NCDHW;
301   }
302   nlohmann::json input_desc_json;
303   input_desc_json[kJDtype] = dtype;
304   input_desc_json[kJName] = op_input_name + std::to_string(input_i);
305   input_desc_json[kJOriShape] = ori_shape;
306   input_desc_json[kJOriFormat] = def_format;
307   input_desc_json[kJShape] = shape;
308   input_desc_json[kJFormat] = format;
309   input_desc_json[kJValid] = value;
310   input_desc_json[kJAddrType] = 0;
311   input_desc_json[kJParamType] = input_ptr->param_type();
312   input_desc_json[kJRange] = tbe::TbeDynamicShapeUtil::GetInputDynamicRange(anf_node, real_input_index, format);
313   input_list->emplace_back(input_desc_json);
314 }
315 
GenInputDescJson(const std::shared_ptr<AnfNode> & anf_node,size_t real_input_index,bool value,const std::shared_ptr<OpIOInfo> & input_ptr,const string & op_input_name,size_t input_i,std::vector<nlohmann::json> * input_list)316 bool TbeKernelJsonCreator::GenInputDescJson(const std::shared_ptr<AnfNode> &anf_node, size_t real_input_index,
317                                             bool value, const std::shared_ptr<OpIOInfo> &input_ptr,
318                                             const string &op_input_name, size_t input_i,
319                                             std::vector<nlohmann::json> *input_list) {
320   MS_EXCEPTION_IF_NULL(anf_node);
321   MS_EXCEPTION_IF_NULL(input_ptr);
322   MS_EXCEPTION_IF_NULL(input_list);
323   std::string op_name = AnfAlgo::GetCNodeName(anf_node);
324   if (op_name == kDynamicRNNOpName && input_ptr->name() == "seq_length") {
325     GenNoneInputDescJson(input_ptr, input_i, input_list);
326   } else if (op_name == kDynamicGRUV2OpName) {
327     auto none_index = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(anf_node, "placeholder_index");
328     auto item = find(none_index.begin(), none_index.end(), input_ptr->index());
329     if (item != none_index.end()) {
330       GenNoneInputDescJson(input_ptr, input_i, input_list);
331     } else {
332       GenValidInputDescJson(anf_node, real_input_index, value, input_ptr, op_input_name, input_i, input_list);
333     }
334   } else if (input_ptr->name() == "input_indices" && op_name == kTopKOpName) {
335     TbeAdapter::GenTopKV2IndicesTensorInfo(anf_node, real_input_index, input_list, creater_type_);
336   } else {
337     GenValidInputDescJson(anf_node, real_input_index, value, input_ptr, op_input_name, input_i, input_list);
338   }
339   return true;
340 }
341 
GenInputList(const std::shared_ptr<AnfNode> & anf_node,size_t input_tensor_num,const std::shared_ptr<OpIOInfo> & input_ptr,size_t * real_input_index,string * op_input_name,std::vector<nlohmann::json> * input_list)342 bool TbeKernelJsonCreator::GenInputList(const std::shared_ptr<AnfNode> &anf_node, size_t input_tensor_num,
343                                         const std::shared_ptr<OpIOInfo> &input_ptr, size_t *real_input_index,
344                                         string *op_input_name, std::vector<nlohmann::json> *input_list) {
345   MS_EXCEPTION_IF_NULL(anf_node);
346   MS_EXCEPTION_IF_NULL(input_ptr);
347   MS_EXCEPTION_IF_NULL(real_input_index);
348   MS_EXCEPTION_IF_NULL(op_input_name);
349   MS_EXCEPTION_IF_NULL(input_list);
350   std::string op_name = AnfAlgo::GetCNodeName(anf_node);
351   auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
352   size_t real_input_num = AnfAlgo::GetInputTensorNum(anf_node);
353   bool value = true;
354   for (size_t input_i = 0; input_i < input_tensor_num; input_i++) {
355     if (*real_input_index >= real_input_num) {
356       if (input_ptr->param_type() == "optional") {
357         *op_input_name = input_ptr->name() + "_optional_";
358         nlohmann::json input_desc_json;
359         input_desc_json[kJValid] = false;
360         input_desc_json[kJName] = *op_input_name + std::to_string(*real_input_index);
361         input_list->emplace_back(input_desc_json);
362         continue;
363       }
364       MS_LOG(ERROR) << "Input num: " << *real_input_index << " is not match op inputs."
365                     << "\n trace:" << trace::DumpSourceLines(anf_node);
366       return false;
367     }
368     if (op_name == "BatchNorm") {
369       if (input_ptr->name() == "mean" || input_ptr->name() == "variance") {
370         auto attr = primitive->GetAttr("is_training");
371         MS_EXCEPTION_IF_NULL(attr);
372         bool is_training = GetValue<bool>(attr);
373         MS_LOG(INFO) << "Op_name" << op_name << ", tensor_name " << input_ptr->name() << ", is_training "
374                      << is_training;
375         if (is_training) {
376           (*real_input_index)++;
377           break;
378         }
379       }
380     }
381     bool ret = GenInputDescJson(anf_node, *real_input_index, value, input_ptr, *op_input_name, input_i, input_list);
382     (*real_input_index)++;
383     if (!ret) {
384       return false;
385     }
386   }
387   return true;
388 }
389 
GetInputNameAndRealNum(const std::shared_ptr<AnfNode> & anf_node,const std::shared_ptr<OpIOInfo> & input_ptr,size_t * dyn_input_index,size_t * input_num,std::string * op_input_name)390 bool GetInputNameAndRealNum(const std::shared_ptr<AnfNode> &anf_node, const std::shared_ptr<OpIOInfo> &input_ptr,
391                             size_t *dyn_input_index, size_t *input_num, std::string *op_input_name) {
392   MS_EXCEPTION_IF_NULL(anf_node);
393   MS_EXCEPTION_IF_NULL(input_ptr);
394   MS_EXCEPTION_IF_NULL(dyn_input_index);
395   MS_EXCEPTION_IF_NULL(input_num);
396   MS_EXCEPTION_IF_NULL(op_input_name);
397   auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
398   // for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input.
399   std::vector<int64_t> dyn_input_sizes;
400   if (primitive->GetAttr(kAttrDynInputSizes) != nullptr) {
401     dyn_input_sizes = GetValue<const std::vector<int64_t>>(primitive->GetAttr(kAttrDynInputSizes));
402   }
403 
404   if (input_ptr->param_type() == kParamDynamic) {
405     if (*dyn_input_index >= dyn_input_sizes.size()) {
406       MS_LOG(ERROR) << "Dyn input index" << *dyn_input_index << "is over dyn input num" << dyn_input_sizes.size();
407       return false;
408     }
409     *input_num = LongToSize(dyn_input_sizes[*dyn_input_index]);
410     *op_input_name = input_ptr->name() + "_dynamic_";
411     (*dyn_input_index)++;
412     // if optional input is exist
413   } else {
414     *input_num = 1;
415     *op_input_name = input_ptr->name() + "_";
416   }
417   return true;
418 }
419 
GenTbeInputsJson(const std::shared_ptr<AnfNode> & anf_node,const std::shared_ptr<OpInfo> & op_info,nlohmann::json * inputs_json)420 bool TbeKernelJsonCreator::GenTbeInputsJson(const std::shared_ptr<AnfNode> &anf_node,
421                                             const std::shared_ptr<OpInfo> &op_info, nlohmann::json *inputs_json) {
422   MS_EXCEPTION_IF_NULL(anf_node);
423   MS_EXCEPTION_IF_NULL(op_info);
424   MS_EXCEPTION_IF_NULL(inputs_json);
425   std::string op_name = AnfAlgo::GetCNodeName(anf_node);
426   if (op_name == kAtomicAddrCleanOpName) {
427     return true;
428   }
429   std::vector<std::shared_ptr<OpIOInfo>> inputs_ptr = op_info->inputs_ptr();
430   if (inputs_ptr.empty()) {
431     MS_LOG(INFO) << "Apply kernel " << op_name << "registration info has no input info";
432     return true;
433   }
434   auto op_info_input_num = inputs_ptr.size();
435   size_t dyn_input_index = 0;
436   size_t real_input_index = 0;
437   std::vector<std::vector<nlohmann::json>> inputs_list;
438   for (size_t i = 0; i < op_info_input_num; i++) {
439     size_t input_tensor_num;
440     std::shared_ptr<OpIOInfo> input_ptr = inputs_ptr[i];
441     std::string op_input_name;
442     MS_EXCEPTION_IF_NULL(input_ptr);
443     if (!GetInputNameAndRealNum(anf_node, input_ptr, &dyn_input_index, &input_tensor_num, &op_input_name)) {
444       return false;
445     }
446     std::vector<nlohmann::json> input_list;
447     if (!GenInputList(anf_node, input_tensor_num, input_ptr, &real_input_index, &op_input_name, &input_list)) {
448       return false;
449     }
450     inputs_list.emplace_back(input_list);
451   }
452 
453   TbeAdapter::InputOrderPass(anf_node, inputs_list, inputs_json);
454   return true;
455 }
456 
GenTbeOutputsJson(const std::shared_ptr<AnfNode> & anf_node,const std::shared_ptr<OpInfo> & op_info,nlohmann::json * outputs_json)457 bool TbeKernelJsonCreator::GenTbeOutputsJson(const std::shared_ptr<AnfNode> &anf_node,
458                                              const std::shared_ptr<OpInfo> &op_info, nlohmann::json *outputs_json) {
459   MS_EXCEPTION_IF_NULL(anf_node);
460   MS_EXCEPTION_IF_NULL(op_info);
461   MS_EXCEPTION_IF_NULL(outputs_json);
462   auto op_name = AnfAlgo::GetCNodeName(anf_node);
463   if (op_name == kAtomicAddrCleanOpName) {
464     return true;
465   }
466   auto outputs_ptr = op_info->outputs_ptr();
467   return GenOutputDescJson(anf_node, outputs_ptr, outputs_json);
468 }
469 
GenOutputDescJson(const std::shared_ptr<mindspore::AnfNode> & anf_node,const std::vector<std::shared_ptr<mindspore::kernel::OpIOInfo>> & outputs_ptr,nlohmann::json * outputs_json)470 bool TbeKernelJsonCreator::GenOutputDescJson(
471   const std::shared_ptr<mindspore::AnfNode> &anf_node,
472   const std::vector<std::shared_ptr<mindspore::kernel::OpIOInfo>> &outputs_ptr, nlohmann::json *outputs_json) {
473   MS_EXCEPTION_IF_NULL(outputs_json);
474   size_t output_idx = 0;
475   auto op_name = AnfAlgo::GetCNodeName(anf_node);
476   size_t real_output_num = AnfAlgo::GetOutputTensorNum(anf_node);
477 
478   for (const auto &output_ptr : outputs_ptr) {
479     size_t output_obj_num = 0;
480     if (output_ptr->param_type() == kParamRequred) {
481       output_obj_num = 1;
482     } else if (output_ptr->param_type() == kParamDynamic) {
483       if (outputs_ptr.size() > 1) {
484         MS_LOG(ERROR) << "Dynamic output is unsupported multi output!";
485         return false;
486       }
487       output_obj_num = real_output_num;
488     } else {
489       if (output_idx >= real_output_num) {
490         MS_LOG(INFO) << "Op:" << op_name << ", output" << output_ptr->name() << " is optional, output is none.";
491         std::vector<nlohmann::json> output_list;
492         nlohmann::json output_obj;
493         output_obj[kJName] = output_ptr->name();
494         output_obj[kJValid] = false;
495         output_list.emplace_back(output_obj);
496         (*outputs_json).push_back(output_list);
497         continue;
498       } else {
499         output_obj_num = 1;
500       }
501     }
502     std::vector<nlohmann::json> output_list;
503     GenOutputList(anf_node, output_obj_num, output_ptr, &output_idx, &output_list);
504     (*outputs_json).push_back(output_list);
505   }
506   return true;
507 }
508 
GenOutputList(const std::shared_ptr<AnfNode> & anf_node,const size_t & output_obj_num,const std::shared_ptr<OpIOInfo> & output_ptr,size_t * output_idx,std::vector<nlohmann::json> * output_list)509 void TbeKernelJsonCreator::GenOutputList(const std::shared_ptr<AnfNode> &anf_node, const size_t &output_obj_num,
510                                          const std::shared_ptr<OpIOInfo> &output_ptr, size_t *output_idx,
511                                          std::vector<nlohmann::json> *output_list) {
512   MS_EXCEPTION_IF_NULL(output_idx);
513   MS_EXCEPTION_IF_NULL(output_list);
514   auto def_format = kOpFormat_NCHW;
515   if (anf_node->isa<CNode>() && IsNeedChangeDefaultFormat(anf_node->cast<CNodePtr>())) {
516     def_format = kOpFormat_NCDHW;
517   }
518   for (size_t i = 0; i < output_obj_num; i++) {
519     auto dtype = GetDeviceOutputType(anf_node, *output_idx);
520     auto format = GetDeviceOutputFormat(anf_node, *output_idx);
521 
522     std::vector<int64_t> shape = GetOutputDeviceShapeForTbeBuild(creater_type_, anf_node, *output_idx);
523     std::vector<int64_t> ori_shape = GetOutputShapeForTbeBuild(anf_node, *output_idx);
524 
525     if (def_format == kOpFormat_NCDHW && k3DFormatSet.find(format) == k3DFormatSet.end()) {
526       format = kOpFormat_NCDHW;
527     }
528     nlohmann::json output_obj;
529     output_obj[kJDtype] = dtype;
530     output_obj[kJShape] = shape;
531     output_obj[kJFormat] = format;
532     output_obj[kJOriShape] = ori_shape;
533     output_obj[kJOriFormat] = def_format;
534     output_obj[kJName] = output_ptr->name();
535     output_obj[kJValid] = true;
536     output_obj[kJAddrType] = 0;
537     output_obj[kJParamType] = output_ptr->param_type();
538     output_obj[kJRange] = tbe::TbeDynamicShapeUtil::GetOutputDynamicRange(anf_node, *output_idx, format);
539     output_list->emplace_back(output_obj);
540     (*output_idx)++;
541   }
542 }
543 
GenTbeAttrJson(const std::shared_ptr<AnfNode> & anf_node,const std::shared_ptr<OpInfo> & op_info,nlohmann::json * attrs_json)544 void TbeKernelJsonCreator::GenTbeAttrJson(const std::shared_ptr<AnfNode> &anf_node,
545                                           const std::shared_ptr<OpInfo> &op_info, nlohmann::json *attrs_json) {
546   MS_EXCEPTION_IF_NULL(anf_node);
547   MS_EXCEPTION_IF_NULL(op_info);
548   MS_EXCEPTION_IF_NULL(attrs_json);
549   auto attrs_ptr = op_info->attrs_ptr();
550   std::string op_name = AnfAlgo::GetCNodeName(anf_node);
551   if (TbeAdapter::RunAttrPass(anf_node, attrs_ptr, attrs_json)) {
552     return;
553   }
554   auto primitive = AnfAlgo::GetCNodePrimitive(anf_node);
555   MS_EXCEPTION_IF_NULL(primitive);
556   for (const auto &attr_ptr : attrs_ptr) {
557     std::string attr_name = attr_ptr->name();
558     nlohmann::json attr_obj;
559     attr_obj[kJName] = attr_name;
560     if (op_name == parallel::LAYER_NORM && attr_obj[kJName] == "epsilon" && creater_type_ == OP_SELECT_FORMAT) {
561       continue;
562     }
563     if (primitive->GetAttr(attr_name) != nullptr) {
564       auto value = primitive->GetAttr(attr_name);
565       std::string type = attr_ptr->type();
566       if (!ParseAttrValue(type, value, &attr_obj)) {
567         const int kRecursive_level = 2;
568         MS_LOG(EXCEPTION) << "Op name: " << op_info->op_name() << " attr: " << attr_name
569                           << ", node debug: " << anf_node->DebugString(kRecursive_level);
570       }
571       attr_obj[kJValid] = true;
572     } else {
573       auto default_value = attr_ptr->default_value();
574       if (!default_value.empty()) {
575         std::string type = attr_ptr->type();
576         ParseAttrDefaultValue(type, default_value, &attr_obj);
577         attr_obj[kJValid] = true;
578       } else {
579         MS_LOG(INFO) << "op " << op_name << "'s attr \"" << attr_name << "\" should have a default value.";
580         if (op_info->impl_path().empty()) {
581           attr_obj[kJValid] = false;
582         } else {
583           if (attr_ptr->param_type() == kParamRequred && creater_type_ == SINGLE_BUILD) {
584             MS_LOG(EXCEPTION) << "Op name: " << op_info->op_name() << " attr: " << attr_name
585                               << " is required, but not set.";
586           } else {
587             attr_obj[kJValid] = false;
588           }
589         }
590       }
591     }
592     (*attrs_json).push_back(attr_obj);
593   }
594 }
595 
GetSocVersion()596 string TbeKernelJsonCreator::GetSocVersion() {
597   // Get default soc version.
598   static std::string version = "";
599   if (version.empty()) {
600     const int kSocVersionLen = 50;
601     char soc_version[kSocVersionLen] = {0};
602     auto ret = rtGetSocVersion(soc_version, kSocVersionLen);
603     if (ret != RT_ERROR_NONE) {
604       MS_LOG(EXCEPTION) << "GetSocVersion failed.";
605     }
606     // Get soc version from env value.
607     const char *soc_version_env = nullptr;
608     std::string str_soc_version_env = common::GetEnv(kSOC_VERSION);
609     if (!str_soc_version_env.empty()) {
610       soc_version_env = common::SafeCStr(str_soc_version_env);
611     }
612     if (soc_version_env != nullptr) {
613       if (std::strcmp(soc_version, soc_version_env) != 0) {
614         MS_LOG(DEBUG) << "Detected the env SOC_VERSION, so the SocVersion will be changed to " << str_soc_version_env
615                       << ".";
616         ret = rtSetSocVersion(soc_version_env);
617         if (ret != RT_ERROR_NONE) {
618           MS_LOG(EXCEPTION) << "SetSocVersion failed, errorno: " << ret;
619         }
620         return soc_version_env;
621       }
622     }
623     version = soc_version;
624   }
625   return version;
626 }
627 
ParseListIntAttrValue(const mindspore::ValuePtr & value,nlohmann::json * attr_obj)628 bool ParseListIntAttrValue(const mindspore::ValuePtr &value, nlohmann::json *attr_obj) {
629   std::vector<int64_t> attr_value;
630   auto value_type = value->type();
631   if (!value_type) {
632     MS_LOG(ERROR) << "value_type is null.";
633     return false;
634   }
635   auto value_type_str = value_type->ToString();
636   if (value_type_str == kVTypeInt64) {
637     auto data = GetValue<int64_t>(value);
638     attr_value.push_back(data);
639   } else {
640     auto vec = value->isa<ValueTuple>() ? value->cast<ValueTuplePtr>()->value() : value->cast<ValueListPtr>()->value();
641     if (!vec.empty()) {
642       if (vec[0]->isa<Int32Imm>()) {
643         std::vector<int32_t> attr_value_me = GetValue<std::vector<int32_t>>(value);
644         (void)std::transform(attr_value_me.begin(), attr_value_me.end(), std::back_inserter(attr_value),
645                              [](const int &value) { return static_cast<int64_t>(value); });
646       } else {
647         attr_value = GetValue<std::vector<int64_t>>(value);
648       }
649     }
650   }
651   (*attr_obj)[kJValue] = attr_value;
652   return true;
653 }
654 
ParseAttrValue(const std::string & type,const mindspore::ValuePtr & value,nlohmann::json * attr_obj)655 bool TbeKernelJsonCreator::ParseAttrValue(const std::string &type, const mindspore::ValuePtr &value,
656                                           nlohmann::json *attr_obj) {
657   if (!value) {
658     MS_LOG(ERROR) << "value ptr is null.";
659     return false;
660   }
661   if (!attr_obj) {
662     MS_LOG(ERROR) << "attr_obj ptr is null.";
663     return false;
664   }
665   if (type == kVTypeInt) {
666     if (value->isa<Int32Imm>()) {
667       (*attr_obj)[kJValue] = GetValue<int>(value);
668     } else {
669       (*attr_obj)[kJValue] = GetValue<int64_t>(value);
670     }
671   } else if (type == kVTypeInt64) {
672     (*attr_obj)[kJValue] = GetValue<int64_t>(value);
673   } else if (type == kVTypeStr) {
674     auto attr_str_value = GetValue<std::string>(value);
675     if (attr_str_value == kOpFormat_FRAC_Z) {
676       attr_str_value = kOpFormat_FRACTAL_Z;
677     }
678     (*attr_obj)[kJValue] = attr_str_value;
679   } else if (type == kVTypeBool) {
680     (*attr_obj)[kJValue] = GetValue<bool>(value);
681   } else if (type == kVTypeFloat) {
682     (*attr_obj)[kJValue] = GetValue<float>(value);
683   } else if (type == kVTypeListInt) {
684     if (!ParseListIntAttrValue(value, attr_obj)) {
685       return false;
686     }
687   } else if (type == kVTypeListFloat) {
688     std::vector<float> attr_value;
689     auto value_type = value->type();
690     auto value_type_str = value_type->ToString();
691     if (value_type_str == kVTypeFloat) {
692       auto data = GetValue<float>(value);
693       attr_value.push_back(data);
694     } else {
695       attr_value = GetValue<std::vector<float>>(value);
696     }
697     (*attr_obj)[kJValue] = attr_value;
698   } else if (type == kVTypeListUInt64) {
699     (*attr_obj)[kJValue] = GetValue<std::vector<size_t>>(value);
700   } else if (type == kVTypeListListInt) {
701     (*attr_obj)[kJValue] = GetValue<std::vector<std::vector<int64_t>>>(value);
702   } else {
703     MS_LOG(ERROR) << "Type: " << type << "not support";
704     return false;
705   }
706   return true;
707 }
708 
ParseAttrDefaultValue(const std::string & type,const std::string & value,nlohmann::json * attr_obj)709 void TbeKernelJsonCreator::ParseAttrDefaultValue(const std::string &type, const std::string &value,
710                                                  nlohmann::json *attr_obj) {
711   MS_EXCEPTION_IF_NULL(attr_obj);
712   if (type == kVTypeInt) {
713     (*attr_obj)[kJValue] = std::stoi(value);
714   } else if (type == kVTypeInt64) {
715     (*attr_obj)[kJValue] = std::stoll(value);
716   } else if (type == kVTypeStr) {
717     (*attr_obj)[kJValue] = value;
718   } else if (type == kVTypeBool) {
719     bool attr_value = false;
720     std::istringstream(value) >> std::boolalpha >> attr_value;
721     (*attr_obj)[kJValue] = attr_value;
722   } else if (type == kVTypeFloat) {
723     (*attr_obj)[kJValue] = std::stof(value);
724   } else if (type == kVTypeListInt) {
725     std::stringstream string_value(value);
726     std::string list_elem;
727     std::vector<int64_t> attr_value;
728     while (std::getline(string_value, list_elem, ',')) {
729       attr_value.push_back(std::stoi(list_elem));
730     }
731     (*attr_obj)[kJValue] = attr_value;
732   } else {
733     MS_LOG(EXCEPTION) << "Type: " << type << "not support";
734   }
735 }
736 
GetDeviceInputShape(const AnfNodePtr & anf_node,size_t real_index) const737 std::vector<size_t> TbeKernelJsonCreator::GetDeviceInputShape(const AnfNodePtr &anf_node, size_t real_index) const {
738   MS_EXCEPTION_IF_NULL(anf_node);
739   std::vector<size_t> shape;
740   if (creater_type_ == OP_SELECT_FORMAT || creater_type_ == CHECK_SUPPORTED) {
741     shape = AnfAlgo::GetPrevNodeOutputInferShape(anf_node, real_index);
742   } else {
743     shape = AnfAlgo::GetInputDeviceShape(anf_node, real_index);
744   }
745   if (shape.empty()) {
746     shape.emplace_back(1);
747   }
748   return shape;
749 }
750 
GetDeviceInputType(const AnfNodePtr & anf_node,size_t real_index) const751 std::string TbeKernelJsonCreator::GetDeviceInputType(const AnfNodePtr &anf_node, size_t real_index) const {
752   MS_EXCEPTION_IF_NULL(anf_node);
753   TypeId type_id;
754   if (creater_type_ == OP_SELECT_FORMAT) {
755     type_id = AnfAlgo::GetPrevNodeOutputInferDataType(anf_node, real_index);
756   } else {
757     type_id = AnfAlgo::GetInputDeviceDataType(anf_node, real_index);
758   }
759   return tbe::TypeIdToString(type_id);
760 }
761 
GetDeviceInputFormat(const AnfNodePtr & anf_node,size_t real_index) const762 std::string TbeKernelJsonCreator::GetDeviceInputFormat(const AnfNodePtr &anf_node, size_t real_index) const {
763   MS_EXCEPTION_IF_NULL(anf_node);
764   std::string format = kOpFormat_NCHW;
765   if (anf_node->isa<CNode>() && IsNeedChangeDefaultFormat(anf_node->cast<CNodePtr>())) {
766     format = kOpFormat_NCDHW;
767   }
768   if (creater_type_ != OP_SELECT_FORMAT && creater_type_ != CHECK_SUPPORTED) {
769     format = AnfAlgo::GetInputFormat(anf_node, real_index);
770     if (format == kOpFormat_FRAC_Z) {
771       format = kOpFormat_FRACTAL_Z;
772     } else if (format == kOpFormat_DEFAULT) {
773       format = kOpFormat_NCHW;
774     }
775   }
776   return format;
777 }
778 
GetDeviceOutputShape(const AnfNodePtr & anf_node,size_t real_index) const779 std::vector<size_t> TbeKernelJsonCreator::GetDeviceOutputShape(const AnfNodePtr &anf_node, size_t real_index) const {
780   MS_EXCEPTION_IF_NULL(anf_node);
781   std::vector<size_t> shape;
782   if (creater_type_ == OP_SELECT_FORMAT || creater_type_ == CHECK_SUPPORTED) {
783     shape = AnfAlgo::GetOutputInferShape(anf_node, real_index);
784   } else {
785     shape = AnfAlgo::GetOutputDeviceShape(anf_node, real_index);
786   }
787   if (shape.empty()) {
788     shape.emplace_back(1);
789   }
790   return shape;
791 }
792 
GetDeviceOutputType(const AnfNodePtr & anf_node,size_t real_index) const793 std::string TbeKernelJsonCreator::GetDeviceOutputType(const AnfNodePtr &anf_node, size_t real_index) const {
794   MS_EXCEPTION_IF_NULL(anf_node);
795   TypeId type_id;
796   if (creater_type_ == OP_SELECT_FORMAT) {
797     type_id = AnfAlgo::GetOutputInferDataType(anf_node, real_index);
798   } else {
799     type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, real_index);
800   }
801   return tbe::TypeIdToString(type_id);
802 }
803 
GetDeviceOutputFormat(const AnfNodePtr & anf_node,size_t real_index) const804 std::string TbeKernelJsonCreator::GetDeviceOutputFormat(const AnfNodePtr &anf_node, size_t real_index) const {
805   MS_EXCEPTION_IF_NULL(anf_node);
806   std::string format = kOpFormat_NCHW;
807   if (anf_node->isa<CNode>() && IsNeedChangeDefaultFormat(anf_node->cast<CNodePtr>())) {
808     format = kOpFormat_NCDHW;
809   }
810   if (creater_type_ != OP_SELECT_FORMAT && creater_type_ != CHECK_SUPPORTED) {
811     format = AnfAlgo::GetOutputFormat(anf_node, real_index);
812     if (format == kOpFormat_FRAC_Z) {
813       format = kOpFormat_FRACTAL_Z;
814     } else if (format == kOpFormat_DEFAULT) {
815       format = kOpFormat_NCHW;
816     }
817   }
818   return format;
819 }
820 
GetInputSizeList(const nlohmann::json & input_json,std::vector<size_t> * input_size_list)821 void GetInputSizeList(const nlohmann::json &input_json, std::vector<size_t> *input_size_list) {
822   MS_EXCEPTION_IF_NULL(input_size_list);
823   for (size_t i = 0; i < input_json.size(); i++) {
824     for (size_t m = 0; m < input_json[i].size(); m++) {
825       size_t size_i = 1;
826       if (input_json[i][m][kJValid] == false) {
827         continue;
828       }
829       for (size_t j = 0; j < input_json[i][m][kJShape].size(); ++j) {
830         if (input_json[i][m][kJShape][j] == -1) {
831           auto input_max_shape = input_json[i][m][kJRange];
832           if (j >= input_max_shape.size()) {
833             MS_LOG(EXCEPTION) << "Invalid Dynamic Shape Max Shape";
834           }
835           MS_LOG(INFO) << "Change -1 Shape to Max Shape:" << input_max_shape[j][1];
836           size_i *= LongToSize(input_max_shape[j][1]);
837           continue;
838         }
839         size_i *= static_cast<size_t>(input_json[i][m][kJShape][j]);
840       }
841       std::string dtype = input_json[i][m][kJDtype];
842       size_t nbyte = tbe::GetDtypeNbyte(dtype);
843       size_i *= nbyte;
844       input_size_list->push_back(size_i);
845     }
846   }
847 }
848 
GetOutputSizeList(const nlohmann::json & output_json,std::vector<size_t> * output_size_list)849 void GetOutputSizeList(const nlohmann::json &output_json, std::vector<size_t> *output_size_list) {
850   MS_EXCEPTION_IF_NULL(output_size_list);
851   for (size_t i = 0; i < output_json.size(); i++) {
852     for (size_t m = 0; m < output_json[i].size(); m++) {
853       size_t size_i = 1;
854       if (output_json[i][m][kJValid] == false) {
855         std::string output_name = output_json[i][m][kJName];
856         MS_LOG(INFO) << "Output name:" << output_name << " is optional, valid is false.";
857         continue;
858       }
859       for (size_t j = 0; j < output_json[i][m][kJShape].size(); ++j) {
860         if (output_json[i][m][kJShape][j] == -1) {
861           auto output_max_shape = output_json[i][m][kJRange];
862           if (j >= output_max_shape.size()) {
863             MS_LOG(EXCEPTION) << "Invalid Dynamic Shape Max Shape";
864           }
865           MS_LOG(INFO) << "Change -1 Shape to Max Shape:" << output_max_shape[j][1];
866           size_i *= LongToSize(output_max_shape[j][1]);
867           continue;
868         }
869         size_i *= static_cast<size_t>(output_json[i][m][kJShape][j]);
870       }
871       std::string dtype = output_json[i][m][kJDtype];
872       size_t nbyte = tbe::GetDtypeNbyte(dtype);
873       size_i *= nbyte;
874       output_size_list->push_back(size_i);
875     }
876   }
877 }
878 
GetIOSize(const nlohmann::json & kernel_json,std::vector<size_t> * input_size_list,std::vector<size_t> * output_size_list)879 bool TbeKernelBuild::GetIOSize(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list,
880                                std::vector<size_t> *output_size_list) {
881   if (input_size_list == nullptr || output_size_list == nullptr) {
882     MS_LOG(ERROR) << "Input size or output size is nullptr";
883     return false;
884   }
885   input_size_list->clear();
886   output_size_list->clear();
887   GetInputSizeList(kernel_json[kJOpInfo][kJInputs], input_size_list);
888   GetOutputSizeList(kernel_json[kJOpInfo][kJOutputs], output_size_list);
889   return true;
890 }
891 
GetRealInputSize(const nlohmann::json & input_json,std::vector<size_t> * input_size_list,size_t * size_i)892 void GetRealInputSize(const nlohmann::json &input_json, std::vector<size_t> *input_size_list, size_t *size_i) {
893   for (size_t j = 0; j < input_json[kJShape].size(); ++j) {
894     if (input_json[kJShape][j] == -1) {
895       auto input_max_shape = input_json[kJRange];
896       if (j >= input_max_shape.size()) {
897         MS_LOG(EXCEPTION) << "Invalid Dynamic Shape Max Shape";
898       }
899       MS_LOG(INFO) << "Change -1 Shape to Max Shape:" << input_max_shape[j][1];
900       (*size_i) *= LongToSize(input_max_shape[j][1]);
901       continue;
902     }
903     (*size_i) *= static_cast<size_t>(input_json[kJShape][j]);
904   }
905   std::string dtype = input_json[kJDtype];
906   size_t nbyte = tbe::GetDtypeNbyte(dtype);
907   (*size_i) *= nbyte;
908   input_size_list->push_back((*size_i));
909 }
910 
GetInputSizeList2(const nlohmann::json & input_json,std::vector<size_t> * input_size_list)911 void GetInputSizeList2(const nlohmann::json &input_json, std::vector<size_t> *input_size_list) {
912   for (size_t i = 0; i < input_json.size(); i++) {
913     if (input_json[i].is_array()) {
914       for (size_t m = 0; m < input_json[i].size(); m++) {
915         size_t size_i = 1;
916         if (input_json[i][m][kJValid] == false) {
917           continue;
918         }
919         GetRealInputSize(input_json[i][m], input_size_list, &size_i);
920       }
921     } else {
922       size_t size_i = 1;
923       if (input_json[i][kJValid] == false) {
924         continue;
925       }
926       GetRealInputSize(input_json[i], input_size_list, &size_i);
927     }
928   }
929 }
930 
GetRealOutputSize(const nlohmann::json & output_json,std::vector<size_t> * output_size_list,size_t * size_i)931 void GetRealOutputSize(const nlohmann::json &output_json, std::vector<size_t> *output_size_list, size_t *size_i) {
932   for (size_t j = 0; j < output_json[kJShape].size(); ++j) {
933     if (output_json[kJShape][j] == -1) {
934       auto output_max_shape = output_json[kJRange];
935       if (j >= output_max_shape.size()) {
936         MS_LOG(EXCEPTION) << "Invalid Dynamic Shape Max Shape";
937       }
938       MS_LOG(INFO) << "Change -1 Shape to Max Shape:" << output_max_shape[j][1];
939       (*size_i) *= LongToSize(output_max_shape[j][1]);
940       continue;
941     }
942     (*size_i) *= static_cast<size_t>(output_json[kJShape][j]);
943   }
944   std::string dtype = output_json[kJDtype];
945   size_t nbyte = tbe::GetDtypeNbyte(dtype);
946   (*size_i) *= nbyte;
947   output_size_list->push_back((*size_i));
948 }
949 
GetOutputSizeList2(const nlohmann::json & output_json,std::vector<size_t> * output_size_list)950 void GetOutputSizeList2(const nlohmann::json &output_json, std::vector<size_t> *output_size_list) {
951   for (size_t i = 0; i < output_json.size(); i++) {
952     if (output_json[i].is_array()) {
953       for (size_t m = 0; m < output_json[i].size(); m++) {
954         size_t size_i = 1;
955         if (output_json[i][m][kJValid] == false) {
956           std::string output_name = output_json[i][m][kJName];
957           MS_LOG(INFO) << "Output name:" << output_name << " is optional, valid is false.";
958           continue;
959         }
960         GetRealOutputSize(output_json[i][m], output_size_list, &size_i);
961       }
962     } else {
963       size_t size_i = 1;
964       if (output_json[i][kJValid] == false) {
965         std::string output_name = output_json[i][kJName];
966         MS_LOG(INFO) << "Output name:" << output_name << " is optional, valid is false.";
967         continue;
968       }
969       GetRealOutputSize(output_json[i], output_size_list, &size_i);
970     }
971   }
972 }
973 
GetIOSize2(const nlohmann::json & kernel_json,std::vector<size_t> * input_size_list,std::vector<size_t> * output_size_list,const AnfNodePtr & anf_node)974 bool TbeKernelBuild::GetIOSize2(const nlohmann::json &kernel_json, std::vector<size_t> *input_size_list,
975                                 std::vector<size_t> *output_size_list, const AnfNodePtr &anf_node) {
976   if (input_size_list == nullptr || output_size_list == nullptr) {
977     MS_LOG(ERROR) << "Input size or output size is nullptr";
978     return false;
979   }
980   input_size_list->clear();
981   output_size_list->clear();
982   auto op_list = kernel_json["op_list"];
983   for (size_t i = 0; i < op_list.size(); i++) {
984     auto op_info = op_list[i];
985     if (op_info["type"] != "Data") {
986       GetInputSizeList2(op_info["input_desc"], input_size_list);
987       GetOutputSizeList2(op_info["output_desc"], output_size_list);
988     }
989   }
990   return true;
991 }
992 
GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr> & input_nodes,const std::vector<mindspore::AnfNodePtr> & compute_nodes,nlohmann::json * fusion_json,std::string * fusion_kernel_name)993 bool TbeKernelBuild::GenFusionScopeJson(const std::vector<mindspore::AnfNodePtr> &input_nodes,
994                                         const std::vector<mindspore::AnfNodePtr> &compute_nodes,
995                                         nlohmann::json *fusion_json, std::string *fusion_kernel_name) {
996   MS_EXCEPTION_IF_NULL(fusion_json);
997   MS_EXCEPTION_IF_NULL(fusion_kernel_name);
998   SetLicInfo(fusion_json);
999   // get input layer info
1000   std::vector<std::vector<mindspore::AnfNodePtr>> input_layers;
1001   std::map<const AnfNodePtr, FusionDataType> spec_data_input;
1002   if (!GetInputLayers(input_nodes, compute_nodes, &input_layers, &spec_data_input)) {
1003     return false;
1004   }
1005   // gen fusion scopre_op json
1006   std::vector<nlohmann::json> compute_list;
1007   (*fusion_kernel_name) = kFusionKernelNamePrfix;
1008   // index: fusion build option input record, next one from 0
1009   static size_t index = 0;
1010   auto layer_iter = input_layers.begin();
1011   auto compute_op_iter = compute_nodes.begin();
1012   for (; compute_op_iter != compute_nodes.end(); ++compute_op_iter, ++layer_iter) {
1013     nlohmann::json compute_op_str;
1014     (void)GenFusionComputeJson(*compute_op_iter, &layer_iter, &compute_op_str, fusion_kernel_name, &index);
1015     compute_list.push_back(compute_op_str);
1016   }
1017   index = 0;
1018   // gen data input json
1019   std::vector<nlohmann::json> data_list;
1020   for (const auto &layer : input_layers) {
1021     for (const auto &data_input : layer) {
1022       nlohmann::json data_str;
1023       if (!GenFusionDataInputJson(data_input, spec_data_input, &data_str, &index)) {
1024         MS_LOG(INFO) << "Fusion error: gen fusion data input json failed.";
1025         return false;
1026       }
1027       data_list.push_back(data_str);
1028     }
1029   }
1030   index = 0;
1031   data_list.insert(data_list.end(), compute_list.begin(), compute_list.end());
1032   (*fusion_json)[kFusionOpList] = data_list;
1033   auto soc_version = TbeKernelJsonCreator::GetSocVersion();
1034   (*fusion_json)[kJSocVersion] = soc_version;
1035   return true;
1036 }
1037 
GenPreDescJson(nlohmann::json * output_desc)1038 void TbeKernelBuild::GenPreDescJson(nlohmann::json *output_desc) {
1039   MS_EXCEPTION_IF_NULL(output_desc);
1040   (*output_desc)[kJL1AddrOffset] = 0;
1041   (*output_desc)[kJL1FusionType] = -1;
1042   (*output_desc)[kJL1WorkspaceSize] = -1;
1043   (*output_desc)[kJAddrType] = 0;
1044 }
1045 
GenFusionComputeCommonJson(const mindspore::CNodePtr & cnode,nlohmann::json * compute_op_str,std::string * fusion_kernel_name)1046 void TbeKernelBuild::GenFusionComputeCommonJson(const mindspore::CNodePtr &cnode, nlohmann::json *compute_op_str,
1047                                                 std::string *fusion_kernel_name) {
1048   MS_EXCEPTION_IF_NULL(compute_op_str);
1049   MS_EXCEPTION_IF_NULL(fusion_kernel_name);
1050   MS_EXCEPTION_IF_NULL(cnode);
1051   // gen others
1052   auto origin_type = AnfAlgo::GetCNodeName(cnode);
1053   auto op_info_ptr = tbe::TbeDynamicShapeUtil::FindOp(origin_type, cnode);
1054   // replace special op type for buffer fusion op
1055   auto type = GetRealOpType(origin_type);
1056   (*compute_op_str)[kJtype] = type;
1057   (*compute_op_str)[kJDynamicCompileStatic] = op_info_ptr->dynamic_compile_static();
1058   auto func_name = op_info_ptr->kernel_name();
1059   (*compute_op_str)[kJFuncName] = func_name;
1060   (*compute_op_str)[kJInt64Mode] = false;
1061   (*compute_op_str)[kJModuleName] = std::string("impl.") + func_name;
1062   (*compute_op_str)[kJName] = cnode->fullname_with_scope();
1063   (*compute_op_str)[kJPattern] = GetNodeFusionType(cnode);
1064   (*compute_op_str)[kJPyModulePath] = kPyPath;
1065   (void)(*fusion_kernel_name).append("_");
1066   (void)(*fusion_kernel_name).append(func_name);
1067   // attr_desc
1068   TbeKernelJsonCreator json_creater(SINGLE_BUILD);
1069   nlohmann::json json_attr_args;
1070   json_creater.GenTbeAttrJson(cnode, op_info_ptr, &json_attr_args);
1071   nlohmann::json attr_desc;
1072   for (const auto &attr : json_attr_args) {
1073     if (attr[kJName] != "isRef" && attr[kJValid] == true) {
1074       attr_desc.push_back(attr[kJValue]);
1075     }
1076   }
1077   if (!attr_desc.empty()) {
1078     (*compute_op_str)[kJAttrDesc] = attr_desc;
1079   }
1080 }
1081 
GenSuffixDescJson(nlohmann::json * output_desc)1082 void TbeKernelBuild::GenSuffixDescJson(nlohmann::json *output_desc) {
1083   MS_EXCEPTION_IF_NULL(output_desc);
1084   (*output_desc)[kJSliceOffset] = nlohmann::json::array();
1085   (*output_desc)[kJSplitIndex] = 0;
1086   (*output_desc)[kJTotalShape] = nlohmann::json::array();
1087   (*output_desc)[kJValidShape] = nlohmann::json::array();
1088 }
1089 
1090 // anf_node: this node is used to get output desc(type\format\shape ...)
1091 // node_out_idx: node output index
1092 // desc_output_idx: this index use to add json
1093 // nlohmann::json *output_desc: for return
1094 // FusionDataType fusion_data_type: speceial process json desc output shape [kFusionAddN, kFusionReLUGradV2]
GenDescJson(const std::shared_ptr<mindspore::AnfNode> & anf_node,size_t node_out_idx,size_t desc_output_idx,nlohmann::json * output_desc,FusionDataType fusion_data_type)1095 void TbeKernelBuild::GenDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
1096                                  size_t desc_output_idx, nlohmann::json *output_desc, FusionDataType fusion_data_type) {
1097   GenPreDescJson(output_desc);
1098   auto def_format = kOpFormat_NCHW;
1099   if (anf_node->isa<CNode>() && IsNeedChangeDefaultFormat(anf_node->cast<CNodePtr>())) {
1100     def_format = kOpFormat_NCDHW;
1101   }
1102   // data_type
1103   auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, node_out_idx);
1104   (*output_desc)[kJDataType] = tbe::TypeIdToString(type_id);
1105   // name
1106   std::string output_desc_name = anf_node->fullname_with_scope();
1107   if (node_out_idx > 0) {
1108     output_desc_name = output_desc_name + "_" + std::to_string(node_out_idx);
1109   }
1110   (*output_desc)[kJName] = output_desc_name;
1111   // ori_format
1112   (*output_desc)[kJOriFormat] = def_format;
1113   // ori_shape
1114   auto ori_shape = AnfAlgo::GetOutputInferShape(anf_node, node_out_idx);
1115   if (ori_shape.empty()) {
1116     ori_shape.emplace_back(1);
1117   }
1118   (*output_desc)[kJOriShape] = ori_shape;
1119   (*output_desc)[kJAddrType] = 0;
1120   // !! Note: output_index, only node's output use it
1121   (*output_desc)[kJOutputIndex] = desc_output_idx;
1122   // shape
1123   auto shape = AnfAlgo::GetOutputDeviceShape(anf_node, node_out_idx);
1124   if (shape.empty()) {
1125     shape.emplace_back(1);
1126   }
1127   (*output_desc)[kJShape] = shape;
1128   // !! Note: format: only data node's output use it
1129   auto format = AnfAlgo::GetOutputFormat(anf_node, node_out_idx);
1130   if (format == kOpFormat_DEFAULT) {
1131     format = ori_shape.size() == kNCHWShapeSize ? kOpFormat_NCHW : kOpFormat_ND;
1132   } else if (format == kOpFormat_FRAC_Z) {
1133     format = kOpFormat_FRACTAL_Z;
1134   }
1135   (*output_desc)[kJFormat] = format;
1136   // special node
1137   constexpr size_t DIM0 = 0;
1138   constexpr size_t DIM1 = 1;
1139   constexpr size_t DIM2 = 2;
1140   constexpr size_t DIM3 = 3;
1141   constexpr size_t DIM4 = 4;
1142   constexpr size_t C0 = 16;
1143   constexpr size_t kShapeSize5 = 5;
1144   if ((fusion_data_type == kFusionAddN || fusion_data_type == kFusionAdd) && shape.size() == kShapeSize5) {
1145     std::vector<size_t> spec_shape = {};
1146     (void)spec_shape.emplace_back(shape[DIM0]);
1147     (void)spec_shape.emplace_back(shape[DIM1]);
1148     (void)spec_shape.emplace_back(shape[DIM2] * shape[DIM3]);
1149     (void)spec_shape.emplace_back(shape[DIM4]);
1150     (*output_desc)[kJShape] = spec_shape;
1151   } else if (fusion_data_type == kFusionReLUGradV2) {
1152     std::vector<size_t> spec_shape = {};
1153     (void)spec_shape.emplace_back(shape[DIM0]);
1154     (void)spec_shape.emplace_back(shape[DIM1]);
1155     (void)spec_shape.emplace_back(shape[DIM2] * shape[DIM3]);
1156     (void)spec_shape.emplace_back(C0);
1157     (*output_desc)[kJShape] = spec_shape;
1158     (*output_desc)[kJDataType] = kVTypeBool;
1159   }
1160   GenSuffixDescJson(output_desc);
1161 }
1162 
GenFusionOutputDescJson(const std::shared_ptr<mindspore::AnfNode> & anf_node,size_t node_out_idx,size_t desc_output_idx,nlohmann::json * output_desc,nlohmann::json * output_data_desc)1163 void TbeKernelBuild::GenFusionOutputDescJson(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t node_out_idx,
1164                                              size_t desc_output_idx, nlohmann::json *output_desc,
1165                                              nlohmann::json *output_data_desc) {
1166   MS_EXCEPTION_IF_NULL(output_desc);
1167   MS_EXCEPTION_IF_NULL(output_data_desc);
1168   MS_EXCEPTION_IF_NULL(anf_node);
1169   GenDescJson(anf_node, node_out_idx, desc_output_idx, output_desc);
1170   *output_data_desc = *output_desc;
1171   (*output_data_desc)[kJDtype] = (*output_desc)[kJDataType];
1172   output_data_desc->erase(kJDataType);
1173   output_data_desc->erase(kJName);
1174 }
1175 
GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> & anf_node,size_t index,size_t output_index,nlohmann::json * output_desc,const size_t out_size)1176 void TbeKernelBuild::GenReusedOutputDesc(const std::shared_ptr<mindspore::AnfNode> &anf_node, size_t index,
1177                                          size_t output_index, nlohmann::json *output_desc, const size_t out_size) {
1178   (*output_desc)[kJAddrType] = 0;
1179   std::string output_desc_name = anf_node->fullname_with_scope() + "_" + std::to_string(index);
1180   (*output_desc)[kJName] = output_desc_name;
1181   (*output_desc)[kJOutputIndex] = output_index;
1182   std::vector<size_t> shape;
1183   (*output_desc)[kJShape] = shape;
1184   auto type_id = AnfAlgo::GetOutputDeviceDataType(anf_node, out_size - 1);
1185   (*output_desc)[kJDataType] = tbe::TypeIdToString(type_id);
1186   GenSuffixDescJson(output_desc);
1187 }
1188 
GetSpecInputLayers(const std::string & op_name,const std::vector<mindspore::AnfNodePtr> & reorder_layer,std::map<const AnfNodePtr,FusionDataType> * spec_data_input)1189 bool TbeKernelBuild::GetSpecInputLayers(const std::string &op_name,
1190                                         const std::vector<mindspore::AnfNodePtr> &reorder_layer,
1191                                         std::map<const AnfNodePtr, FusionDataType> *spec_data_input) {
1192   MS_EXCEPTION_IF_NULL(spec_data_input);
1193   if ((op_name == kReluGradV2OpName || op_name == kAddNOpName || op_name == kTensorAddOpName) &&
1194       reorder_layer.empty()) {
1195     MS_LOG(INFO) << "Fusion error: node(" << op_name << " )'s input is null. ";
1196     return false;
1197   }
1198   if (op_name == kReluGradV2OpName) {
1199     (*spec_data_input)[reorder_layer[0]] = kFusionReLUGradV2;
1200   } else if (op_name == kAddNOpName) {
1201     for (const auto &it : reorder_layer) {
1202       (*spec_data_input)[it] = kFusionAddN;
1203     }
1204   } else if (op_name == kTensorAddOpName) {
1205     (*spec_data_input)[reorder_layer[0]] = kFusionAdd;
1206   }
1207   return true;
1208 }
1209 
1210 // <input_nodes> : contains parameter/data node, input order may doesn't match tbe input order;
1211 // <compute_nodes> : contains cnode, inputs order may doesn't match tbe input order;
1212 // Special process node list: reference tbe_adapter.cc [except: Conv2DBackpropInput]
GetInputLayers(const std::vector<mindspore::AnfNodePtr> & input_nodes,const std::vector<mindspore::AnfNodePtr> & compute_nodes,std::vector<std::vector<mindspore::AnfNodePtr>> * input_layers,std::map<const AnfNodePtr,FusionDataType> * spec_data_input)1213 bool TbeKernelBuild::GetInputLayers(const std::vector<mindspore::AnfNodePtr> &input_nodes,
1214                                     const std::vector<mindspore::AnfNodePtr> &compute_nodes,
1215                                     std::vector<std::vector<mindspore::AnfNodePtr>> *input_layers,
1216                                     std::map<const AnfNodePtr, FusionDataType> *spec_data_input) {
1217   MS_EXCEPTION_IF_NULL(input_layers);
1218   MS_EXCEPTION_IF_NULL(spec_data_input);
1219   auto result = std::find_if(compute_nodes.begin(), compute_nodes.end(), [](const auto &it) {
1220     auto node_name = AnfAlgo::GetCNodeName(it);
1221     return (node_name == kConv2DBackpropInputOpName || node_name == kConv2DOpName);
1222   });
1223   bool need_spec = (result != compute_nodes.end());
1224   size_t input_size = 0;
1225   for (const auto &compute_node : compute_nodes) {
1226     std::vector<mindspore::AnfNodePtr> layer = {};
1227     std::vector<mindspore::AnfNodePtr> reorder_layer = {};
1228     MS_EXCEPTION_IF_NULL(compute_node);
1229     auto op_name = AnfAlgo::GetCNodeName(compute_node);
1230     auto ccompute_node = compute_node->cast<CNodePtr>();
1231     if (ccompute_node == nullptr) {
1232       MS_LOG(INFO) << "Fusion error: fusion compute node must be cnode";
1233       return false;
1234     }
1235     for (size_t i = 1; i < ccompute_node->inputs().size(); ++i) {
1236       auto input = ccompute_node->input(i);
1237       auto find_iter = std::find(input_nodes.begin(), input_nodes.end(), input);
1238       if (find_iter != input_nodes.end()) {
1239         layer.emplace_back((*find_iter));
1240       }
1241     }
1242     TbeAdapter::FusionDataOrderPass(op_name, layer, &reorder_layer);
1243     if (need_spec) {
1244       if (!GetSpecInputLayers(op_name, reorder_layer, spec_data_input)) {
1245         return false;
1246       }
1247     }
1248     input_size += reorder_layer.size();
1249     input_layers->emplace_back(reorder_layer);
1250   }
1251   if (input_nodes.size() != input_size) {
1252     MS_LOG(INFO) << "Fusion error: fusion scope error, layer input:" << input_size
1253                  << ", input_node:" << input_nodes.size();
1254     return false;
1255   }
1256   return true;
1257 }
1258 
GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> & data_input,const std::map<const AnfNodePtr,FusionDataType> & spec_data_input,nlohmann::json * data_str,size_t * index)1259 bool TbeKernelBuild::GenFusionDataInputJson(const std::shared_ptr<mindspore::AnfNode> &data_input,
1260                                             const std::map<const AnfNodePtr, FusionDataType> &spec_data_input,
1261                                             nlohmann::json *data_str, size_t *index) {
1262   MS_EXCEPTION_IF_NULL(data_str);
1263   MS_EXCEPTION_IF_NULL(index);
1264   std::vector<nlohmann::json> output_desc_list;
1265   // if data_input is null, this is optional input.
1266   if (!data_input) {
1267     auto name = std::string(kOptional) + std::to_string(*index);
1268     (*data_str)[kJName] = name;
1269     nlohmann::json output_desc;
1270     output_desc[kJName] = name;
1271     output_desc[kJDataType] = 0;
1272     output_desc[kJShape] = "NULL";
1273     output_desc_list.push_back(output_desc);
1274     (*index)++;
1275   } else {
1276     FusionDataType fusion_data_type = kFusionNormal;
1277     if (spec_data_input.find(data_input) != spec_data_input.end()) {
1278       fusion_data_type = spec_data_input.at(data_input);
1279     }
1280     auto kernel_idx = AnfAlgo::VisitKernel(data_input, 0);
1281     auto real_node = kernel_idx.first;
1282     size_t real_idx = kernel_idx.second;
1283     // kJOutputDesc
1284     nlohmann::json output_desc;
1285     GenDescJson(real_node, real_idx, real_idx, &output_desc, fusion_data_type);
1286     output_desc_list.push_back(output_desc);
1287     auto full_name = real_node->fullname_with_scope();
1288     if (real_idx > 0) {
1289       full_name = full_name.append("_").append(std::to_string(real_idx));
1290     }
1291     (*data_str)[kJName] = full_name;
1292   }
1293   (*data_str)[kJOutputDesc] = output_desc_list;
1294   (*data_str)[kJtype] = "Data";
1295   return true;
1296 }
1297 
IsDynamicInput(const mindspore::CNodePtr & cnode)1298 bool TbeKernelBuild::IsDynamicInput(const mindspore::CNodePtr &cnode) {
1299   MS_EXCEPTION_IF_NULL(cnode);
1300   auto primitive = AnfAlgo::GetCNodePrimitive(cnode);
1301   MS_EXCEPTION_IF_NULL(primitive);
1302   // for dynamic input number, dyn_input_sizes has the info of dynamic input num for each input.
1303   bool ret = false;
1304   std::vector<int64_t> dyn_input_sizes;
1305   auto dynamic_input_attr = primitive->GetAttr(kAttrDynInputSizes);
1306   if (dynamic_input_attr != nullptr) {
1307     dyn_input_sizes = GetValue<const std::vector<int64_t>>(dynamic_input_attr);
1308     auto real_input_size = cnode->inputs().size() - 1;
1309     auto dyn_input_size = dyn_input_sizes.size();
1310     if (dyn_input_size != 1) {
1311       MS_LOG(INFO) << "Fusion error: fusion build not support dyn_input_sizes > 1";
1312       return ret;
1313     }
1314     if (LongToSize(dyn_input_sizes[0]) != real_input_size) {
1315       MS_LOG(INFO) << "Fusion error: dyn_input_size" << dyn_input_sizes[0] << "not equal real_input_size"
1316                    << real_input_size;
1317       return ret;
1318     }
1319     ret = true;
1320   }
1321   return ret;
1322 }
1323 
GetOptionalInput(const mindspore::CNodePtr & cnode,bool is_dynamic_input)1324 size_t TbeKernelBuild::GetOptionalInput(const mindspore::CNodePtr &cnode, bool is_dynamic_input) {
1325   MS_EXCEPTION_IF_NULL(cnode);
1326   if (is_dynamic_input) {
1327     // Node can not have optional & dynamic input.
1328     return 0;
1329   }
1330   MS_EXCEPTION_IF_NULL(cnode);
1331   auto node_name = AnfAlgo::GetCNodeName(cnode);
1332   auto op_info = tbe::TbeDynamicShapeUtil::FindOp(node_name, cnode);
1333   MS_EXCEPTION_IF_NULL(cnode);
1334   auto node_inputs_size = cnode->inputs().size();
1335   for (auto &input : cnode->inputs()) {
1336     if (HasAbstractMonad(input)) {
1337       node_inputs_size--;
1338     }
1339   }
1340   if (op_info->inputs_ptr().size() < (node_inputs_size - 1)) {
1341     MS_EXCEPTION(ArgumentError) << "op info error, node name:" << cnode->fullname_with_scope();
1342   }
1343   return (op_info->inputs_ptr().size() + 1 - node_inputs_size);
1344 }
1345 
GetRealOpType(const std::string & origin_type)1346 std::string TbeKernelBuild::GetRealOpType(const std::string &origin_type) {
1347   static std::map<std::string, std::string> buffer_fussion_op_map = {
1348     {parallel::DEPTHWISE_CONV2D_NATIVE, parallel::DEPTHWISE_CONV2D}};
1349   string result = origin_type;
1350   auto iter = buffer_fussion_op_map.find(origin_type);
1351   if (iter != buffer_fussion_op_map.end()) {
1352     result = iter->second;
1353   }
1354   return result;
1355 }
1356 
GetNodeFusionType(const mindspore::CNodePtr & cnode)1357 std::string TbeKernelBuild::GetNodeFusionType(const mindspore::CNodePtr &cnode) {
1358   MS_EXCEPTION_IF_NULL(cnode);
1359   auto node_type = AnfAlgo::GetCNodeName(cnode);
1360   static std::map<std::string, std::string> fusion_type_map = {{kConv2DOpName, "Convolution"},
1361                                                                {kBNTrainingReduceOpName, "bn_reduce"},
1362                                                                {kBNTrainingUpdateOpName, "bn_update"},
1363                                                                {kReluV2OpName, "ElemWise"},
1364                                                                {kTensorAddOpName, "ElemWise"},
1365                                                                {kConv2DBackpropInputOpName, "Conv2d_backprop_input"},
1366                                                                {kConv2DBackpropFilterOpName, "Conv2d_backprop_filter"},
1367                                                                {kDepthwiseConv2dNativeOpName, "DepthwiseConvolution"},
1368                                                                {kAddNOpName, "ElemWise"},
1369                                                                {kReluGradV2OpName, "ElemWise"},
1370                                                                {kRealDivOpName, "ElemWise"},
1371                                                                {kBiasAddOpName, "BiasAdd"}};
1372   auto find = fusion_type_map.find(node_type);
1373   if (find == fusion_type_map.end()) {
1374     MS_LOG(INFO) << "Fusion warning: get node fusion type failed from lists, origin node type: " << node_type;
1375     auto op_info = mindspore::kernel::tbe::TbeDynamicShapeUtil::FindOp(node_type, cnode);
1376     MS_EXCEPTION_IF_NULL(op_info);
1377     return op_info->fusion_type();
1378   } else {
1379     return find->second;
1380   }
1381 }
1382 
GenFusionComputeInputJson(const mindspore::CNodePtr & cnode,std::vector<std::vector<mindspore::AnfNodePtr>>::iterator * layer_iter,std::vector<nlohmann::json> * input_desc_list,size_t * index)1383 bool TbeKernelBuild::GenFusionComputeInputJson(const mindspore::CNodePtr &cnode,
1384                                                std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
1385                                                std::vector<nlohmann::json> *input_desc_list, size_t *index) {
1386   MS_EXCEPTION_IF_NULL(cnode);
1387   MS_EXCEPTION_IF_NULL(input_desc_list);
1388   MS_EXCEPTION_IF_NULL(layer_iter);
1389   MS_EXCEPTION_IF_NULL(index);
1390   std::vector<nlohmann::json> input_desc_list_tmp = {};
1391   // 1. input json
1392   bool is_dynamic_input = IsDynamicInput(cnode);
1393   for (size_t i = 1; i < cnode->inputs().size(); ++i) {
1394     auto input = cnode->input(i);
1395     if (HasAbstractMonad(input)) {
1396       continue;
1397     }
1398     auto kernel_idx = AnfAlgo::VisitKernel(input, 0);
1399     auto real_node = kernel_idx.first;
1400     size_t real_idx = kernel_idx.second;
1401     nlohmann::json input_desc;
1402     GenDescJson(real_node, real_idx, real_idx, &input_desc);
1403     if (is_dynamic_input) {
1404       // 2. dynamic input json
1405       MS_LOG(INFO) << "Node has dynamic input.";
1406       input_desc[kJDynIndex] = (i - 1);
1407     }
1408     input_desc_list_tmp.emplace_back(input_desc);
1409   }
1410   size_t optional_num = GetOptionalInput(cnode, is_dynamic_input);
1411   if (optional_num > 0) {
1412     // 3. optional input
1413     for (size_t i = 0; i < optional_num; ++i) {
1414       nlohmann::json optional_input_desc;
1415       optional_input_desc[kJName] = std::string(kOptional) + std::to_string(*index);
1416       optional_input_desc[kJShape] = "NULL";
1417       (*index)++;
1418       (*layer_iter)->emplace_back(nullptr);
1419       input_desc_list_tmp.emplace_back(optional_input_desc);
1420     }
1421   }
1422   TbeAdapter::FusionInputOrderPass(cnode, input_desc_list_tmp, input_desc_list);
1423   return true;
1424 }
1425 
GetDescOutputIndex(const std::vector<int64_t> & output_used_nums)1426 std::vector<size_t> TbeKernelBuild::GetDescOutputIndex(const std::vector<int64_t> &output_used_nums) {
1427   std::vector<size_t> desc_output_index = {};
1428   for (size_t idx = 0; idx < output_used_nums.size(); ++idx) {
1429     auto output_use_num_item = output_used_nums[idx];
1430     desc_output_index.emplace_back(idx);
1431     if (output_use_num_item > 1) {
1432       desc_output_index.emplace_back(idx);
1433     }
1434   }
1435   return desc_output_index;
1436 }
1437 
GenFusionComputeOutputJson(const mindspore::CNodePtr & cnode,std::vector<nlohmann::json> * output_desc_list,std::vector<nlohmann::json> * output_data_desc_list)1438 bool TbeKernelBuild::GenFusionComputeOutputJson(const mindspore::CNodePtr &cnode,
1439                                                 std::vector<nlohmann::json> *output_desc_list,
1440                                                 std::vector<nlohmann::json> *output_data_desc_list) {
1441   MS_EXCEPTION_IF_NULL(output_desc_list);
1442   MS_EXCEPTION_IF_NULL(output_data_desc_list);
1443   auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
1444   if (AnfAlgo::HasNodeAttr(kAttrOutputUsedNum, cnode)) {
1445     auto output_used_nums = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(cnode, kAttrOutputUsedNum);
1446     if (output_used_nums.size() != output_size) {
1447       MS_LOG(INFO) << "Fusion error: output tenor num(" << output_size << ")"
1448                    << " is not match output used num(" << output_used_nums.size() << ")";
1449       return false;
1450     }
1451     auto desc_output_index = GetDescOutputIndex(output_used_nums);
1452     for (size_t i = 0; i < output_size; ++i) {
1453       MS_LOG(INFO) << "Fusion index: " << i << ", desc_output_index: " << desc_output_index[i];
1454       nlohmann::json output_desc;
1455       nlohmann::json output_data_desc;
1456       GenFusionOutputDescJson(cnode, i, desc_output_index[i], &output_desc, &output_data_desc);
1457       output_data_desc_list->emplace_back(output_data_desc);
1458       output_desc_list->emplace_back(output_desc);
1459     }
1460     for (size_t j = output_size; j < desc_output_index.size(); ++j) {
1461       MS_LOG(INFO) << "Fusion index: " << j << ", desc_output_index: " << desc_output_index[j];
1462       nlohmann::json output_desc;
1463       GenReusedOutputDesc(cnode, j, desc_output_index[j], &output_desc, output_size);
1464       output_desc_list->emplace_back(output_desc);
1465     }
1466   } else {
1467     for (size_t i = 0; i < output_size; ++i) {
1468       nlohmann::json output_desc;
1469       nlohmann::json output_data_desc;
1470       GenFusionOutputDescJson(cnode, i, i, &output_desc, &output_data_desc);
1471       output_data_desc_list->emplace_back(output_data_desc);
1472       output_desc_list->emplace_back(output_desc);
1473     }
1474   }
1475   return true;
1476 }
1477 
GenFusionComputeJson(const mindspore::AnfNodePtr & compute_node,std::vector<std::vector<mindspore::AnfNodePtr>>::iterator * layer_iter,nlohmann::json * compute_op_str,std::string * fusion_kernel_name,size_t * index)1478 bool TbeKernelBuild::GenFusionComputeJson(const mindspore::AnfNodePtr &compute_node,
1479                                           std::vector<std::vector<mindspore::AnfNodePtr>>::iterator *layer_iter,
1480                                           nlohmann::json *compute_op_str, std::string *fusion_kernel_name,
1481                                           size_t *index) {
1482   MS_EXCEPTION_IF_NULL(compute_node);
1483   auto cnode = compute_node->cast<CNodePtr>();
1484   MS_EXCEPTION_IF_NULL(cnode);
1485   // gen input desc
1486   std::vector<nlohmann::json> input_desc_list;
1487   (void)GenFusionComputeInputJson(cnode, layer_iter, &input_desc_list, index);
1488   (*compute_op_str)[kJInputDesc] = input_desc_list;
1489   // gen output desc
1490   std::vector<nlohmann::json> output_desc_list;
1491   std::vector<nlohmann::json> output_data_desc_list;
1492   if (!GenFusionComputeOutputJson(cnode, &output_desc_list, &output_data_desc_list)) {
1493     MS_LOG(INFO) << "Fusion Error: gen fusion output desc failed, node full name: " << cnode->fullname_with_scope();
1494     return false;
1495   }
1496   (*compute_op_str)[kJOutputDataDesc] = output_data_desc_list;
1497   (*compute_op_str)[kJOutputDesc] = output_desc_list;
1498   // gen common desc
1499   GenFusionComputeCommonJson(cnode, compute_op_str, fusion_kernel_name);
1500   return true;
1501 }
1502 
GetIOSizeImpl(const nlohmann::json & desc)1503 size_t TbeKernelBuild::GetIOSizeImpl(const nlohmann::json &desc) {
1504   size_t ret = 1;
1505   for (const auto &shape_item : desc[kJShape]) {
1506     ret *= static_cast<size_t>(shape_item);
1507   }
1508   std::string data_type = desc[kJDataType];
1509   size_t nbyte = tbe::GetDtypeNbyte(data_type);
1510   ret *= nbyte;
1511   return ret;
1512 }
1513 
CalInputSize(const nlohmann::json & fusion_op_list,std::vector<size_t> * input_size_list)1514 void TbeKernelBuild::CalInputSize(const nlohmann::json &fusion_op_list, std::vector<size_t> *input_size_list) {
1515   MS_EXCEPTION_IF_NULL(input_size_list);
1516   // cal input size for malloc
1517   for (const auto &op : fusion_op_list) {
1518     if (op[kJtype] == "Data") {
1519       const auto &data_output_desc = op[kJOutputDesc];
1520       for (const auto &data_output : data_output_desc) {
1521         if (data_output[kJShape] == "NULL") {
1522           break;
1523         }
1524         input_size_list->push_back(GetIOSizeImpl(data_output));
1525       }
1526     }
1527   }
1528 }
1529 
CalOutputSize(const nlohmann::json & fusion_op_list,const std::vector<mindspore::AnfNodePtr> & output_nodes,std::vector<size_t> * output_size_list)1530 bool TbeKernelBuild::CalOutputSize(const nlohmann::json &fusion_op_list,
1531                                    const std::vector<mindspore::AnfNodePtr> &output_nodes,
1532                                    std::vector<size_t> *output_size_list) {
1533   MS_EXCEPTION_IF_NULL(output_size_list);
1534   // cal output size for malloc
1535   for (const auto &output_node : output_nodes) {
1536     auto kernel_idx = AnfAlgo::VisitKernel(output_node, 0);
1537     auto real_node = kernel_idx.first;
1538     size_t real_idx = kernel_idx.second;
1539     auto full_name = real_node->fullname_with_scope();
1540     for (const auto &op : fusion_op_list) {
1541       if (op[kJName] != full_name) {
1542         continue;
1543       }
1544       auto op_output_desces = op[kJOutputDesc];
1545       if (output_node != real_node) {
1546         // tuple_get item
1547         auto output_desc = op_output_desces[real_idx];
1548         if (output_desc[kJShape].empty()) {
1549           MS_LOG(INFO) << "Fusion error: output_desc's shape is empty. real_index " << real_idx;
1550           return false;
1551         }
1552         output_size_list->push_back(GetIOSizeImpl(output_desc));
1553       } else {
1554         for (const auto &output_desc : op_output_desces) {
1555           if (output_desc[kJShape].empty()) {
1556             continue;
1557           }
1558           output_size_list->push_back(GetIOSizeImpl(output_desc));
1559         }
1560       }
1561     }
1562   }
1563   return true;
1564 }
1565 
GetIOSize(const nlohmann::json & fusion_op_list,const std::vector<mindspore::AnfNodePtr> & output_nodes,std::vector<size_t> * input_size_list,std::vector<size_t> * output_size_list)1566 bool TbeKernelBuild::GetIOSize(const nlohmann::json &fusion_op_list,
1567                                const std::vector<mindspore::AnfNodePtr> &output_nodes,
1568                                std::vector<size_t> *input_size_list, std::vector<size_t> *output_size_list) {
1569   MS_EXCEPTION_IF_NULL(input_size_list);
1570   MS_EXCEPTION_IF_NULL(output_size_list);
1571   input_size_list->clear();
1572   output_size_list->clear();
1573   // cal input size for malloc
1574   CalInputSize(fusion_op_list, input_size_list);
1575   // cal output size for malloc
1576   return CalOutputSize(fusion_op_list, output_nodes, output_size_list);
1577 }
1578 }  // namespace kernel
1579 }  // namespace mindspore
1580