• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "runtime/device/ascend/dump/data_dumper.h"
17 
18 #include <map>
19 #include <memory>
20 #include <string>
21 #include <algorithm>
22 #include <limits>
23 #include "utility"
24 #include "backend/session/anf_runtime_algorithm.h"
25 #include "utils/convert_utils_base.h"
26 #include "runtime/mem.h"
27 #include "runtime/kernel.h"
28 #include "runtime/rt_model.h"
29 #include "runtime/device/ascend/ge_types_convert.h"
30 #include "proto/op_mapping_info.pb.h"
31 #include "utils/comm_manager.h"
32 #include "utils/ms_context.h"
33 #ifndef ENABLE_SECURITY
34 #include "debug/data_dump/dump_json_parser.h"
35 #endif
36 #ifdef ENABLE_DEBUGGER
37 #include "debug/debugger/debugger.h"
38 #endif
39 
40 static constexpr uint32_t kAicpuLoadFlag = 1;
41 static constexpr uint32_t kAicpuUnloadFlag = 0;
42 static constexpr uint32_t kTupleTaskId = 0;
43 static constexpr uint32_t kTupleStreamId = 1;
44 static constexpr uint32_t kTupleArgs = 2;
45 static constexpr uint32_t kCurrentStepTensorIndex = 0;
46 static constexpr uint32_t kCurrentEpochTensorIndex = 2;
47 static constexpr uint32_t kStepsPerEpochTensorIndex = 3;
48 static constexpr uint64_t kOpDebugShape = 2048;
49 static constexpr uint64_t kOpDebugHostMemSize = 2048;
50 static constexpr uint64_t kOpDebugDevMemSize = sizeof(void *);
51 static constexpr uint8_t kNoOverflow = 0;
52 static constexpr uint8_t kAiCoreOverflow = 0x1;
53 static constexpr uint8_t kAtomicOverflow = (0x1 << 1);
54 static constexpr uint8_t kAllOverflow = (kAiCoreOverflow | kAtomicOverflow);
55 static const std::map<uint32_t, std::string> kOverflowModeStr = {{kNoOverflow, "NoOverflow"},
56                                                                  {kAiCoreOverflow, "AiCoreOverflow"},
57                                                                  {kAtomicOverflow, "AtomicOverflow"},
58                                                                  {kAllOverflow, "AllOverflow"}};
59 constexpr const char *kNodeNameOpDebug = "Node_OpDebug";
60 constexpr const char *kOpTypeOpDebug = "Opdebug";
61 
62 namespace mindspore {
63 namespace device {
64 namespace ascend {
~DataDumper()65 DataDumper::~DataDumper() {
66   kernel_graph_ = nullptr;
67   ReleaseDevMem(&dev_load_mem_);
68   ReleaseDevMem(&dev_unload_mem_);
69   ReleaseDevMem(&op_debug_buffer_addr_);
70   ReleaseDevMem(&op_debug_dump_args_);
71 }
72 
73 #ifndef ENABLE_SECURITY
GetNeedDumpKernelList(NotNull<std::map<std::string,CNodePtr> * > kernel_map) const74 void DataDumper::GetNeedDumpKernelList(NotNull<std::map<std::string, CNodePtr> *> kernel_map) const {
75   MS_EXCEPTION_IF_NULL(kernel_graph_);
76   for (const auto &kernel : kernel_graph_->execution_order()) {
77     MS_EXCEPTION_IF_NULL(kernel);
78     if (AnfAlgo::GetKernelType(kernel) == HCCL_KERNEL &&
79         DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope())) {
80       auto input_size = AnfAlgo::GetInputTensorNum(kernel);
81       for (size_t i = 0; i < input_size; ++i) {
82         auto input_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
83         auto input = input_with_index.first;
84         MS_EXCEPTION_IF_NULL(input);
85         if (input->isa<CNode>()) {
86           MS_LOG(INFO) << "[AsyncDump] Match Hccl Node:" << kernel->fullname_with_scope()
87                        << " Input:" << input->fullname_with_scope();
88           auto it = kernel_map->try_emplace(input->fullname_with_scope(), input->cast<CNodePtr>());
89           if (!it.second) {
90             MS_LOG(INFO) << "Node name already exist: " << input->fullname_with_scope();
91           }
92         }
93       }
94     } else if (KernelNeedDump(kernel)) {
95       MS_LOG(INFO) << "[AsyncDump] Match Node:" << kernel->fullname_with_scope();
96       auto it = kernel_map->try_emplace(kernel->fullname_with_scope(), kernel);
97       if (!it.second) {
98         MS_LOG(INFO) << "Node name already exist: " << kernel->fullname_with_scope();
99       }
100     }
101   }
102 }
103 
LoadDumpInfo()104 void DataDumper::LoadDumpInfo() {
105   MS_LOG(INFO) << "[DataDump] LoadDumpInfo start";
106   MS_EXCEPTION_IF_NULL(kernel_graph_);
107   aicpu::dump::OpMappingInfo dump_info;
108   SetOpDebugMappingInfo(NOT_NULL(&dump_info));
109   SetOpMappingInfo(NOT_NULL(&dump_info));
110 
111   auto kernels = kernel_graph_->execution_order();
112   for (const auto &kernel : kernels) {
113     MS_EXCEPTION_IF_NULL(kernel);
114     if (!KernelNeedDump(kernel)) {
115       continue;
116     }
117     MS_LOG(INFO) << "[DataDump] LoadDumpInfo kernel:" << kernel->UniqueName();
118     dump_kernel_names_.emplace_back(kernel->UniqueName());
119     DumpJsonParser::GetInstance().MatchKernel(kernel->fullname_with_scope());
120 
121     aicpu::dump::Task task;
122     ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task));
123     MS_EXCEPTION_IF_NULL(dump_info.mutable_task());
124     dump_info.mutable_task()->Add(std::move(task));
125   }
126   RtLoadDumpData(dump_info, &dev_load_mem_);
127   load_flag_ = true;
128   // graph id may changed in Unload
129   graph_id_ = kernel_graph_->graph_id();
130   MS_LOG(INFO) << "[DataDump] LoadDumpInfo end";
131 }
132 
SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo * > dump_info) const133 void DataDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const {
134   MS_LOG(INFO) << "SetOpMappinglnfo Start.";
135   auto context_ptr = MsContext::GetInstance();
136   MS_EXCEPTION_IF_NULL(context_ptr);
137   MS_EXCEPTION_IF_NULL(kernel_graph_);
138   auto dump_path = DumpJsonParser::GetInstance().path();
139   const auto &input_ctrl_tensors = kernel_graph_->input_ctrl_tensors();
140   constexpr size_t kLoopSinkCtrlTensorNum = 3;  // cur step, cur epoch, steps per epoch
141   bool valid_ctrl_tensors = input_ctrl_tensors != nullptr && input_ctrl_tensors->size() >= kLoopSinkCtrlTensorNum;
142   std::string net_name = DumpJsonParser::GetInstance().net_name();
143   std::string iteration = DumpJsonParser::GetInstance().iteration_string();
144 
145   if (dump_path.empty()) {
146     MS_LOG(EXCEPTION) << "Dump path invalid";
147   }
148   uint32_t graph_id = kernel_graph_->graph_id();
149   uint32_t rank_id = 0;
150 
151   auto ms_context = MsContext::GetInstance();
152   MS_EXCEPTION_IF_NULL(ms_context);
153   auto env_rank_id = common::GetEnv("RANK_ID");
154   if (ms_context->get_param<bool>(MS_CTX_ENABLE_HCCL) && !env_rank_id.empty()) {
155     // get actual rank id if it's distribution training case.
156     if (!CommManager::GetInstance().GetRankID(kHcclWorldGroup, &rank_id)) {
157       MS_LOG(INFO) << "Failed to get rank id.";
158     }
159   }
160   dump_info->set_dump_path("/" + dump_path + "/rank_" + std::to_string(rank_id) + "/");
161   MS_LOG(INFO) << "[DataDump] dump_path: " << dump_path;
162 
163   dump_info->set_model_name(net_name);
164   MS_LOG(INFO) << "[DataDump] model_name: " << net_name;
165 
166   MS_LOG(INFO) << "[DataDump] iteration_pre: " << iteration;
167   if (iteration == "all") {
168     iteration = "0-" + std::to_string(ULONG_MAX);
169   }
170   MS_LOG(INFO) << "[DataDump] iteration_post: " << iteration;
171   dump_info->set_dump_step(iteration);
172 
173   dump_info->set_model_id(graph_id);
174   dump_info->set_flag(kAicpuLoadFlag);
175 
176   if (!valid_ctrl_tensors) {
177     MS_LOG(INFO) << "[DataDump] input_ctrl_tensors not valid.";
178     return;
179   }
180   const auto &current_step_tensor = input_ctrl_tensors->at(kCurrentStepTensorIndex);
181   const auto &currnet_epoch_tensor = input_ctrl_tensors->at(kCurrentEpochTensorIndex);
182   const auto &steps_per_epoch_tensor = input_ctrl_tensors->at(kStepsPerEpochTensorIndex);
183 
184   MS_EXCEPTION_IF_NULL(current_step_tensor);
185   MS_EXCEPTION_IF_NULL(currnet_epoch_tensor);
186   MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor);
187   MS_EXCEPTION_IF_NULL(current_step_tensor->device_address());
188   MS_EXCEPTION_IF_NULL(currnet_epoch_tensor->device_address());
189   MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor->device_address());
190 
191   void *current_step = current_step_tensor->device_address()->GetMutablePtr();
192   void *current_epoch = currnet_epoch_tensor->device_address()->GetMutablePtr();
193   void *steps_per_epoch = steps_per_epoch_tensor->device_address()->GetMutablePtr();
194 
195   if (current_epoch != nullptr && current_step != nullptr && steps_per_epoch != nullptr) {
196     dump_info->set_step_id_addr(reinterpret_cast<uint64_t>(current_epoch));
197     dump_info->set_loop_cond_addr(reinterpret_cast<uint64_t>(current_step));
198     dump_info->set_iterations_per_loop_addr(reinterpret_cast<uint64_t>(steps_per_epoch));
199   } else {
200     MS_LOG(INFO) << "Invalid ctrl tensor device address";
201   }
202   MS_LOG(INFO) << "SetOpMappinglnfo End.";
203 }
204 
KernelNeedDump(const CNodePtr & kernel) const205 bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const {
206   if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL &&
207       AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) {
208     return false;
209   }
210   MS_EXCEPTION_IF_NULL(kernel);
211   // dump all kernel if mode is set 0 in data_dump.json
212   return DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope());
213 }
214 #endif
215 
UnloadDumpInfo()216 void DataDumper::UnloadDumpInfo() {
217   if (!load_flag_) {
218     MS_LOG(WARNING) << "[DataDump] Load not success, no need to unload";
219     return;
220   }
221   MS_LOG(INFO) << "[DataDump] UnloadDumpInfo start. graphId:" << graph_id_;
222 
223   aicpu::dump::OpMappingInfo op_mapping_info;
224   op_mapping_info.set_model_id(graph_id_);
225   op_mapping_info.set_flag(kAicpuUnloadFlag);
226 
227   for (const auto &kernel_name : dump_kernel_names_) {
228     aicpu::dump::Task task;
229     auto iter = runtime_info_map_.find(kernel_name);
230     if (iter == runtime_info_map_.end()) {
231       MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
232     }
233     MS_EXCEPTION_IF_NULL(iter->second);
234     auto task_id = std::get<kTupleTaskId>(*iter->second);
235     task.set_task_id(task_id);
236     MS_EXCEPTION_IF_NULL(op_mapping_info.mutable_task());
237     op_mapping_info.mutable_task()->Add(std::move(task));
238   }
239 
240   RtLoadDumpData(op_mapping_info, &dev_unload_mem_);
241 }
242 
ReleaseDevMem(void ** ptr) const243 void DataDumper::ReleaseDevMem(void **ptr) const noexcept {
244   if (ptr == nullptr) {
245     return;
246   }
247   if (*ptr != nullptr) {
248     rtError_t rt_error = rtFree(*ptr);
249     if (rt_error != RT_ERROR_NONE) {
250       MS_LOG(ERROR) << "[DataDump] Call rtFree failed, ret:" << rt_error;
251     }
252     *ptr = nullptr;
253   }
254 }
255 
ConstructDumpTask(NotNull<const CNodePtr &> kernel,NotNull<aicpu::dump::Task * > dump_task) const256 void DataDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const {
257   dump_task->set_end_graph(false);
258   auto iter = runtime_info_map_.find(kernel->UniqueName());
259   if (iter == runtime_info_map_.end()) {
260     MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
261   }
262   MS_EXCEPTION_IF_NULL(iter->second);
263   auto task_id = std::get<kTupleTaskId>(*iter->second);
264   auto stream_id = std::get<kTupleStreamId>(*iter->second);
265 #ifndef ENABLE_SECURITY
266   auto args = std::get<kTupleArgs>(*iter->second);
267 #endif
268   MS_LOG(INFO) << "[DataDump] Get runtime info task_id:" << task_id << " stream_id:" << stream_id;
269 
270   dump_task->set_task_id(task_id);
271   dump_task->set_stream_id(stream_id);
272   MS_EXCEPTION_IF_NULL(dump_task->mutable_op());
273   dump_task->mutable_op()->set_op_name(kernel->fullname_with_scope());
274   dump_task->mutable_op()->set_op_type(AnfAlgo::GetCNodeName(kernel.get()));
275 
276 #ifndef ENABLE_SECURITY
277   DumpKernelOutput(kernel, args, dump_task);
278   DumpKernelInput(kernel, args, dump_task);
279 #endif
280 }
281 
SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo * > dump_info) const282 void DataDumper::SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo *> dump_info) const {
283   MS_LOG(INFO) << "[DataDump] Add op debug info to OpMappingInfo, task id = " << debug_task_id_
284                << ", stream id = " << debug_stream_id_;
285   aicpu::dump::Task task;
286   task.set_end_graph(false);
287   task.set_task_id(debug_task_id_);
288   task.set_stream_id(debug_stream_id_);
289   MS_EXCEPTION_IF_NULL(task.mutable_op());
290   task.mutable_op()->set_op_name(kNodeNameOpDebug);
291   task.mutable_op()->set_op_type(kOpTypeOpDebug);
292 
293   aicpu::dump::Output output;
294   output.set_data_type(ge::proto::DataType::DT_UINT8);
295   output.set_format(ge::Format::FORMAT_ND);
296 
297   MS_EXCEPTION_IF_NULL(output.mutable_shape());
298   output.mutable_shape()->add_dim(kOpDebugShape);
299 
300   output.set_original_name(kNodeNameOpDebug);
301   output.set_original_output_index(0);
302   output.set_original_output_format(ge::Format::FORMAT_ND);
303   output.set_original_output_data_type(ge::proto::DataType::DT_UINT8);
304   // due to lhisi virtual addr bug, cannot use args now
305   output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_dump_args_)));
306   output.set_size(kOpDebugHostMemSize);
307 
308   MS_EXCEPTION_IF_NULL(task.mutable_output());
309   task.mutable_output()->Add(std::move(output));
310   MS_EXCEPTION_IF_NULL(dump_info->mutable_task());
311   dump_info->mutable_task()->Add(std::move(task));
312 }
313 
314 #ifndef ENABLE_SECURITY
OpDebugRegister()315 void DataDumper::OpDebugRegister() {
316   uint32_t op_debug_mode = DumpJsonParser::GetInstance().op_debug_mode();
317   auto iter = kOverflowModeStr.find(op_debug_mode);
318   if (iter == kOverflowModeStr.end()) {
319     MS_LOG(EXCEPTION) << "Invalid op debug mode " << op_debug_mode;
320   }
321   MS_LOG(INFO) << "[DataDump] Op debug mode is " << iter->second;
322   if (op_debug_mode == kNoOverflow) {
323     return;
324   }
325 
326   rtError_t rt_ret = rtMalloc(&op_debug_buffer_addr_, kOpDebugHostMemSize, RT_MEMORY_DDR);
327   if (rt_ret != RT_ERROR_NONE) {
328     MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed, ret = " << rt_ret;
329   }
330 
331   rt_ret = rtMalloc(&op_debug_dump_args_, kOpDebugDevMemSize, RT_MEMORY_HBM);
332   if (rt_ret != RT_ERROR_NONE) {
333     MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed, ret = " << rt_ret;
334   }
335 
336   rt_ret =
337     rtMemcpy(op_debug_dump_args_, sizeof(void *), &op_debug_buffer_addr_, sizeof(void *), RT_MEMCPY_HOST_TO_DEVICE);
338   if (rt_ret != RT_ERROR_NONE) {
339     MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed, ret = " << rt_ret;
340   }
341 
342   rt_ret = rtDebugRegister(model_handle_(), op_debug_mode, op_debug_buffer_addr_, &debug_stream_id_, &debug_task_id_);
343   if (rt_ret != RT_ERROR_NONE) {
344     MS_LOG(EXCEPTION) << "[DataDump] Call rtDebugRegister failed, ret = " << rt_ret;
345   }
346 
347   MS_LOG(INFO) << "[DataDump] Distribute op debug task, task id = " << debug_task_id_
348                << ", stream id = " << debug_stream_id_;
349 }
350 
OpDebugUnregister()351 void DataDumper::OpDebugUnregister() {
352   uint32_t op_debug_mode = DumpJsonParser::GetInstance().op_debug_mode();
353   if (op_debug_mode == kNoOverflow) {
354     MS_LOG(INFO) << "[DataDump] Op debug mode is no overflow, no need to unregister.";
355     return;
356   }
357 
358   MS_LOG(INFO) << "[DataDump] Start.";
359   rtError_t rt_ret = rtDebugUnRegister(model_handle_());
360   if (rt_ret != RT_ERROR_NONE) {
361     MS_LOG(EXCEPTION) << "[DataDump] Call rtDebugUnRegister failed, ret = " << rt_ret;
362   }
363 }
364 #endif
365 
RtLoadDumpData(const aicpu::dump::OpMappingInfo & dump_info,void ** ptr)366 void DataDumper::RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) {
367   std::string proto_str;
368   size_t proto_size = dump_info.ByteSizeLong();
369   bool ret = dump_info.SerializeToString(&proto_str);
370   if (!ret || proto_size == 0) {
371     MS_LOG(EXCEPTION) << "[DataDump] Protobuf SerializeToString failed, proto size %zu.";
372   }
373 
374   if (ptr == nullptr) {
375     MS_LOG(ERROR) << "[DataDump] rtMalloc failed, ptr is nullptr";
376     return;
377   }
378 
379   rtError_t rt_ret = rtMalloc(ptr, proto_size, RT_MEMORY_HBM);
380   if (rt_ret != RT_ERROR_NONE) {
381     MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed";
382   }
383   rt_ret = rtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
384   if (rt_ret != RT_ERROR_NONE) {
385     MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed";
386   }
387 
388   MS_LOG(INFO) << "[DataDump] rtDatadumpInfoLoad start";
389   rt_ret = rtDatadumpInfoLoad(*ptr, SizeToUint(proto_size));
390   if (rt_ret != RT_ERROR_NONE) {
391     MS_LOG(EXCEPTION) << "[DataDump] Call rtDatadumpInfoLoad failed";
392   }
393 }
394 
SetDumpShape(const std::vector<size_t> & ms_shape,NotNull<aicpu::dump::Shape * > dump_shape)395 void SetDumpShape(const std::vector<size_t> &ms_shape, NotNull<aicpu::dump::Shape *> dump_shape) {
396   for (auto &dim : ms_shape) {
397     dump_shape->add_dim(dim);
398   }
399 }
400 
401 #ifndef ENABLE_SECURITY
DumpKernelOutput(const CNodePtr & kernel,void * args,NotNull<aicpu::dump::Task * > task)402 void DataDumper::DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
403   if (!DumpJsonParser::GetInstance().OutputNeedDump()) {
404     MS_LOG(INFO) << "Skip dump output";
405     return;
406   }
407   if (HasAbstractMonad(kernel)) {
408     MS_LOG(WARNING) << "Skip Monad node output:" << kernel->fullname_with_scope();
409     return;
410   }
411   MS_LOG(INFO) << "[DataDump] DumpKernelOutput start. Kernel:" << kernel->fullname_with_scope();
412   auto input_size = AnfAlgo::GetInputTensorNum(kernel);
413   auto output_size = AnfAlgo::GetOutputTensorNum(kernel);
414   uint64_t offset = sizeof(void *) * input_size;
415   for (size_t i = 0; i < output_size; ++i) {
416     auto data_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
417     auto output_format = AnfAlgo::GetOutputFormat(kernel, i);
418     auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel, i);
419     auto output_origin_shape = AnfAlgo::GetOutputInferShape(kernel, i);
420 
421     aicpu::dump::Output output;
422     output.set_data_type(GeTypesConvert::GetGeDataType(data_type));
423     output.set_format(GeTypesConvert::GetGeFormat(output_format, output_shape.size()));
424     SetDumpShape(output_shape, NOT_NULL(output.mutable_shape()));
425     SetDumpShape(output_origin_shape, NOT_NULL(output.mutable_origin_shape()));
426 
427     output.set_original_output_format(GeTypesConvert::GetGeFormat(output_format, output_shape.size()));
428     output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
429     // device address data size
430     auto address = AnfAlgo::GetOutputAddr(kernel, i);
431     MS_EXCEPTION_IF_NULL(address);
432     output.set_size(address->GetSize());
433     MS_LOG(INFO) << "[DataDump] output " << i << " address size:" << output.size();
434     MS_EXCEPTION_IF_NULL(task->mutable_output());
435     task->mutable_output()->Add(std::move(output));
436     offset = SizetAddWithOverflowCheck(offset, sizeof(void *));
437   }
438 }
439 
DumpKernelInput(const CNodePtr & kernel,void * args,NotNull<aicpu::dump::Task * > task)440 void DataDumper::DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
441   if (!DumpJsonParser::GetInstance().InputNeedDump()) {
442     MS_LOG(INFO) << "Skip dump input";
443     return;
444   }
445   MS_EXCEPTION_IF_NULL(kernel);
446   if (AnfAlgo::IsNodeInputContainMonad(kernel)) {
447     MS_LOG(WARNING) << "Skip Monad node:" << kernel->fullname_with_scope();
448     return;
449   }
450   MS_LOG(INFO) << "[DataDump] DumpKernelInput start. Kernel:" << kernel->fullname_with_scope();
451   auto input_size = AnfAlgo::GetInputTensorNum(kernel);
452   uint64_t offset = 0;
453   for (size_t i = 0; i < input_size; ++i) {
454     aicpu::dump::Input input;
455     auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
456     auto input_node = input_node_with_index.first;
457     auto input_index = input_node_with_index.second;
458     std::string output_format = AnfAlgo::GetOutputFormat(input_node, input_index);
459     auto output_type = AnfAlgo::GetOutputDeviceDataType(input_node, input_index);
460     if (output_type == kTypeUnknown) {
461       MS_LOG(WARNING) << "[DataDump] It is not suggested to use a lonely weight parameter as the output of graph";
462       output_type = AnfAlgo::GetOutputInferDataType(input_node, input_index);
463     }
464     auto output_shape = AnfAlgo::GetOutputDeviceShape(input_node, input_index);
465     auto output_origin_shape = AnfAlgo::GetOutputInferShape(input_node, input_index);
466 
467     input.set_data_type(GeTypesConvert::GetGeDataType(output_type));
468     input.set_format(GeTypesConvert::GetGeFormat(output_format, output_shape.size()));
469     SetDumpShape(output_shape, NOT_NULL(input.mutable_shape()));
470     SetDumpShape(output_origin_shape, NOT_NULL(input.mutable_origin_shape()));
471 
472     input.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
473     // device  address data size
474     auto address = AnfAlgo::GetPrevNodeOutputAddr(kernel, i);
475     MS_EXCEPTION_IF_NULL(address);
476     input.set_size(address->GetSize());
477     MS_LOG(INFO) << "[DataDump] input " << i << " address size:" << input.size();
478     MS_EXCEPTION_IF_NULL(task->mutable_input());
479     task->mutable_input()->Add(std::move(input));
480     offset = SizetAddWithOverflowCheck(offset, sizeof(void *));
481   }
482 }
483 #endif
484 
StripUniqueId(const std::string node_name)485 std::string DataDumper::StripUniqueId(const std::string node_name) {
486   size_t last_underscore = node_name.find_last_of('_');
487   std::string stripped_node_name;
488   if (last_underscore == string::npos) {
489     MS_LOG(ERROR) << "Could not strip unique ID from " << node_name;
490     stripped_node_name = node_name;
491   } else {
492     stripped_node_name = node_name.substr(0, last_underscore);
493   }
494   return stripped_node_name;
495 }
496 }  // namespace ascend
497 }  // namespace device
498 }  // namespace mindspore
499