1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "runtime/device/ascend/dump/data_dumper.h"
17
18 #include <map>
19 #include <memory>
20 #include <string>
21 #include <algorithm>
22 #include <limits>
23 #include "utility"
24 #include "backend/session/anf_runtime_algorithm.h"
25 #include "utils/convert_utils_base.h"
26 #include "runtime/mem.h"
27 #include "runtime/kernel.h"
28 #include "runtime/rt_model.h"
29 #include "runtime/device/ascend/ge_types_convert.h"
30 #include "proto/op_mapping_info.pb.h"
31 #include "utils/comm_manager.h"
32 #include "utils/ms_context.h"
33 #ifndef ENABLE_SECURITY
34 #include "debug/data_dump/dump_json_parser.h"
35 #endif
36 #ifdef ENABLE_DEBUGGER
37 #include "debug/debugger/debugger.h"
38 #endif
39
40 static constexpr uint32_t kAicpuLoadFlag = 1;
41 static constexpr uint32_t kAicpuUnloadFlag = 0;
42 static constexpr uint32_t kTupleTaskId = 0;
43 static constexpr uint32_t kTupleStreamId = 1;
44 static constexpr uint32_t kTupleArgs = 2;
45 static constexpr uint32_t kCurrentStepTensorIndex = 0;
46 static constexpr uint32_t kCurrentEpochTensorIndex = 2;
47 static constexpr uint32_t kStepsPerEpochTensorIndex = 3;
48 static constexpr uint64_t kOpDebugShape = 2048;
49 static constexpr uint64_t kOpDebugHostMemSize = 2048;
50 static constexpr uint64_t kOpDebugDevMemSize = sizeof(void *);
51 static constexpr uint8_t kNoOverflow = 0;
52 static constexpr uint8_t kAiCoreOverflow = 0x1;
53 static constexpr uint8_t kAtomicOverflow = (0x1 << 1);
54 static constexpr uint8_t kAllOverflow = (kAiCoreOverflow | kAtomicOverflow);
55 static const std::map<uint32_t, std::string> kOverflowModeStr = {{kNoOverflow, "NoOverflow"},
56 {kAiCoreOverflow, "AiCoreOverflow"},
57 {kAtomicOverflow, "AtomicOverflow"},
58 {kAllOverflow, "AllOverflow"}};
59 constexpr const char *kNodeNameOpDebug = "Node_OpDebug";
60 constexpr const char *kOpTypeOpDebug = "Opdebug";
61
62 namespace mindspore {
63 namespace device {
64 namespace ascend {
~DataDumper()65 DataDumper::~DataDumper() {
66 kernel_graph_ = nullptr;
67 ReleaseDevMem(&dev_load_mem_);
68 ReleaseDevMem(&dev_unload_mem_);
69 ReleaseDevMem(&op_debug_buffer_addr_);
70 ReleaseDevMem(&op_debug_dump_args_);
71 }
72
73 #ifndef ENABLE_SECURITY
GetNeedDumpKernelList(NotNull<std::map<std::string,CNodePtr> * > kernel_map) const74 void DataDumper::GetNeedDumpKernelList(NotNull<std::map<std::string, CNodePtr> *> kernel_map) const {
75 MS_EXCEPTION_IF_NULL(kernel_graph_);
76 for (const auto &kernel : kernel_graph_->execution_order()) {
77 MS_EXCEPTION_IF_NULL(kernel);
78 if (AnfAlgo::GetKernelType(kernel) == HCCL_KERNEL &&
79 DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope())) {
80 auto input_size = AnfAlgo::GetInputTensorNum(kernel);
81 for (size_t i = 0; i < input_size; ++i) {
82 auto input_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
83 auto input = input_with_index.first;
84 MS_EXCEPTION_IF_NULL(input);
85 if (input->isa<CNode>()) {
86 MS_LOG(INFO) << "[AsyncDump] Match Hccl Node:" << kernel->fullname_with_scope()
87 << " Input:" << input->fullname_with_scope();
88 auto it = kernel_map->try_emplace(input->fullname_with_scope(), input->cast<CNodePtr>());
89 if (!it.second) {
90 MS_LOG(INFO) << "Node name already exist: " << input->fullname_with_scope();
91 }
92 }
93 }
94 } else if (KernelNeedDump(kernel)) {
95 MS_LOG(INFO) << "[AsyncDump] Match Node:" << kernel->fullname_with_scope();
96 auto it = kernel_map->try_emplace(kernel->fullname_with_scope(), kernel);
97 if (!it.second) {
98 MS_LOG(INFO) << "Node name already exist: " << kernel->fullname_with_scope();
99 }
100 }
101 }
102 }
103
LoadDumpInfo()104 void DataDumper::LoadDumpInfo() {
105 MS_LOG(INFO) << "[DataDump] LoadDumpInfo start";
106 MS_EXCEPTION_IF_NULL(kernel_graph_);
107 aicpu::dump::OpMappingInfo dump_info;
108 SetOpDebugMappingInfo(NOT_NULL(&dump_info));
109 SetOpMappingInfo(NOT_NULL(&dump_info));
110
111 auto kernels = kernel_graph_->execution_order();
112 for (const auto &kernel : kernels) {
113 MS_EXCEPTION_IF_NULL(kernel);
114 if (!KernelNeedDump(kernel)) {
115 continue;
116 }
117 MS_LOG(INFO) << "[DataDump] LoadDumpInfo kernel:" << kernel->UniqueName();
118 dump_kernel_names_.emplace_back(kernel->UniqueName());
119 DumpJsonParser::GetInstance().MatchKernel(kernel->fullname_with_scope());
120
121 aicpu::dump::Task task;
122 ConstructDumpTask(NOT_NULL(kernel), NOT_NULL(&task));
123 MS_EXCEPTION_IF_NULL(dump_info.mutable_task());
124 dump_info.mutable_task()->Add(std::move(task));
125 }
126 RtLoadDumpData(dump_info, &dev_load_mem_);
127 load_flag_ = true;
128 // graph id may changed in Unload
129 graph_id_ = kernel_graph_->graph_id();
130 MS_LOG(INFO) << "[DataDump] LoadDumpInfo end";
131 }
132
SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo * > dump_info) const133 void DataDumper::SetOpMappingInfo(NotNull<aicpu::dump::OpMappingInfo *> dump_info) const {
134 MS_LOG(INFO) << "SetOpMappinglnfo Start.";
135 auto context_ptr = MsContext::GetInstance();
136 MS_EXCEPTION_IF_NULL(context_ptr);
137 MS_EXCEPTION_IF_NULL(kernel_graph_);
138 auto dump_path = DumpJsonParser::GetInstance().path();
139 const auto &input_ctrl_tensors = kernel_graph_->input_ctrl_tensors();
140 constexpr size_t kLoopSinkCtrlTensorNum = 3; // cur step, cur epoch, steps per epoch
141 bool valid_ctrl_tensors = input_ctrl_tensors != nullptr && input_ctrl_tensors->size() >= kLoopSinkCtrlTensorNum;
142 std::string net_name = DumpJsonParser::GetInstance().net_name();
143 std::string iteration = DumpJsonParser::GetInstance().iteration_string();
144
145 if (dump_path.empty()) {
146 MS_LOG(EXCEPTION) << "Dump path invalid";
147 }
148 uint32_t graph_id = kernel_graph_->graph_id();
149 uint32_t rank_id = 0;
150
151 auto ms_context = MsContext::GetInstance();
152 MS_EXCEPTION_IF_NULL(ms_context);
153 auto env_rank_id = common::GetEnv("RANK_ID");
154 if (ms_context->get_param<bool>(MS_CTX_ENABLE_HCCL) && !env_rank_id.empty()) {
155 // get actual rank id if it's distribution training case.
156 if (!CommManager::GetInstance().GetRankID(kHcclWorldGroup, &rank_id)) {
157 MS_LOG(INFO) << "Failed to get rank id.";
158 }
159 }
160 dump_info->set_dump_path("/" + dump_path + "/rank_" + std::to_string(rank_id) + "/");
161 MS_LOG(INFO) << "[DataDump] dump_path: " << dump_path;
162
163 dump_info->set_model_name(net_name);
164 MS_LOG(INFO) << "[DataDump] model_name: " << net_name;
165
166 MS_LOG(INFO) << "[DataDump] iteration_pre: " << iteration;
167 if (iteration == "all") {
168 iteration = "0-" + std::to_string(ULONG_MAX);
169 }
170 MS_LOG(INFO) << "[DataDump] iteration_post: " << iteration;
171 dump_info->set_dump_step(iteration);
172
173 dump_info->set_model_id(graph_id);
174 dump_info->set_flag(kAicpuLoadFlag);
175
176 if (!valid_ctrl_tensors) {
177 MS_LOG(INFO) << "[DataDump] input_ctrl_tensors not valid.";
178 return;
179 }
180 const auto ¤t_step_tensor = input_ctrl_tensors->at(kCurrentStepTensorIndex);
181 const auto &currnet_epoch_tensor = input_ctrl_tensors->at(kCurrentEpochTensorIndex);
182 const auto &steps_per_epoch_tensor = input_ctrl_tensors->at(kStepsPerEpochTensorIndex);
183
184 MS_EXCEPTION_IF_NULL(current_step_tensor);
185 MS_EXCEPTION_IF_NULL(currnet_epoch_tensor);
186 MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor);
187 MS_EXCEPTION_IF_NULL(current_step_tensor->device_address());
188 MS_EXCEPTION_IF_NULL(currnet_epoch_tensor->device_address());
189 MS_EXCEPTION_IF_NULL(steps_per_epoch_tensor->device_address());
190
191 void *current_step = current_step_tensor->device_address()->GetMutablePtr();
192 void *current_epoch = currnet_epoch_tensor->device_address()->GetMutablePtr();
193 void *steps_per_epoch = steps_per_epoch_tensor->device_address()->GetMutablePtr();
194
195 if (current_epoch != nullptr && current_step != nullptr && steps_per_epoch != nullptr) {
196 dump_info->set_step_id_addr(reinterpret_cast<uint64_t>(current_epoch));
197 dump_info->set_loop_cond_addr(reinterpret_cast<uint64_t>(current_step));
198 dump_info->set_iterations_per_loop_addr(reinterpret_cast<uint64_t>(steps_per_epoch));
199 } else {
200 MS_LOG(INFO) << "Invalid ctrl tensor device address";
201 }
202 MS_LOG(INFO) << "SetOpMappinglnfo End.";
203 }
204
KernelNeedDump(const CNodePtr & kernel) const205 bool DataDumper::KernelNeedDump(const CNodePtr &kernel) const {
206 if (AnfAlgo::GetKernelType(kernel) != TBE_KERNEL && AnfAlgo::GetKernelType(kernel) != AICPU_KERNEL &&
207 AnfAlgo::GetKernelType(kernel) != AKG_KERNEL) {
208 return false;
209 }
210 MS_EXCEPTION_IF_NULL(kernel);
211 // dump all kernel if mode is set 0 in data_dump.json
212 return DumpJsonParser::GetInstance().NeedDump(kernel->fullname_with_scope());
213 }
214 #endif
215
UnloadDumpInfo()216 void DataDumper::UnloadDumpInfo() {
217 if (!load_flag_) {
218 MS_LOG(WARNING) << "[DataDump] Load not success, no need to unload";
219 return;
220 }
221 MS_LOG(INFO) << "[DataDump] UnloadDumpInfo start. graphId:" << graph_id_;
222
223 aicpu::dump::OpMappingInfo op_mapping_info;
224 op_mapping_info.set_model_id(graph_id_);
225 op_mapping_info.set_flag(kAicpuUnloadFlag);
226
227 for (const auto &kernel_name : dump_kernel_names_) {
228 aicpu::dump::Task task;
229 auto iter = runtime_info_map_.find(kernel_name);
230 if (iter == runtime_info_map_.end()) {
231 MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
232 }
233 MS_EXCEPTION_IF_NULL(iter->second);
234 auto task_id = std::get<kTupleTaskId>(*iter->second);
235 task.set_task_id(task_id);
236 MS_EXCEPTION_IF_NULL(op_mapping_info.mutable_task());
237 op_mapping_info.mutable_task()->Add(std::move(task));
238 }
239
240 RtLoadDumpData(op_mapping_info, &dev_unload_mem_);
241 }
242
ReleaseDevMem(void ** ptr) const243 void DataDumper::ReleaseDevMem(void **ptr) const noexcept {
244 if (ptr == nullptr) {
245 return;
246 }
247 if (*ptr != nullptr) {
248 rtError_t rt_error = rtFree(*ptr);
249 if (rt_error != RT_ERROR_NONE) {
250 MS_LOG(ERROR) << "[DataDump] Call rtFree failed, ret:" << rt_error;
251 }
252 *ptr = nullptr;
253 }
254 }
255
ConstructDumpTask(NotNull<const CNodePtr &> kernel,NotNull<aicpu::dump::Task * > dump_task) const256 void DataDumper::ConstructDumpTask(NotNull<const CNodePtr &> kernel, NotNull<aicpu::dump::Task *> dump_task) const {
257 dump_task->set_end_graph(false);
258 auto iter = runtime_info_map_.find(kernel->UniqueName());
259 if (iter == runtime_info_map_.end()) {
260 MS_LOG(EXCEPTION) << "[DataDump] kernel name not found in runtime_info_map";
261 }
262 MS_EXCEPTION_IF_NULL(iter->second);
263 auto task_id = std::get<kTupleTaskId>(*iter->second);
264 auto stream_id = std::get<kTupleStreamId>(*iter->second);
265 #ifndef ENABLE_SECURITY
266 auto args = std::get<kTupleArgs>(*iter->second);
267 #endif
268 MS_LOG(INFO) << "[DataDump] Get runtime info task_id:" << task_id << " stream_id:" << stream_id;
269
270 dump_task->set_task_id(task_id);
271 dump_task->set_stream_id(stream_id);
272 MS_EXCEPTION_IF_NULL(dump_task->mutable_op());
273 dump_task->mutable_op()->set_op_name(kernel->fullname_with_scope());
274 dump_task->mutable_op()->set_op_type(AnfAlgo::GetCNodeName(kernel.get()));
275
276 #ifndef ENABLE_SECURITY
277 DumpKernelOutput(kernel, args, dump_task);
278 DumpKernelInput(kernel, args, dump_task);
279 #endif
280 }
281
SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo * > dump_info) const282 void DataDumper::SetOpDebugMappingInfo(const NotNull<aicpu::dump::OpMappingInfo *> dump_info) const {
283 MS_LOG(INFO) << "[DataDump] Add op debug info to OpMappingInfo, task id = " << debug_task_id_
284 << ", stream id = " << debug_stream_id_;
285 aicpu::dump::Task task;
286 task.set_end_graph(false);
287 task.set_task_id(debug_task_id_);
288 task.set_stream_id(debug_stream_id_);
289 MS_EXCEPTION_IF_NULL(task.mutable_op());
290 task.mutable_op()->set_op_name(kNodeNameOpDebug);
291 task.mutable_op()->set_op_type(kOpTypeOpDebug);
292
293 aicpu::dump::Output output;
294 output.set_data_type(ge::proto::DataType::DT_UINT8);
295 output.set_format(ge::Format::FORMAT_ND);
296
297 MS_EXCEPTION_IF_NULL(output.mutable_shape());
298 output.mutable_shape()->add_dim(kOpDebugShape);
299
300 output.set_original_name(kNodeNameOpDebug);
301 output.set_original_output_index(0);
302 output.set_original_output_format(ge::Format::FORMAT_ND);
303 output.set_original_output_data_type(ge::proto::DataType::DT_UINT8);
304 // due to lhisi virtual addr bug, cannot use args now
305 output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(op_debug_dump_args_)));
306 output.set_size(kOpDebugHostMemSize);
307
308 MS_EXCEPTION_IF_NULL(task.mutable_output());
309 task.mutable_output()->Add(std::move(output));
310 MS_EXCEPTION_IF_NULL(dump_info->mutable_task());
311 dump_info->mutable_task()->Add(std::move(task));
312 }
313
314 #ifndef ENABLE_SECURITY
OpDebugRegister()315 void DataDumper::OpDebugRegister() {
316 uint32_t op_debug_mode = DumpJsonParser::GetInstance().op_debug_mode();
317 auto iter = kOverflowModeStr.find(op_debug_mode);
318 if (iter == kOverflowModeStr.end()) {
319 MS_LOG(EXCEPTION) << "Invalid op debug mode " << op_debug_mode;
320 }
321 MS_LOG(INFO) << "[DataDump] Op debug mode is " << iter->second;
322 if (op_debug_mode == kNoOverflow) {
323 return;
324 }
325
326 rtError_t rt_ret = rtMalloc(&op_debug_buffer_addr_, kOpDebugHostMemSize, RT_MEMORY_DDR);
327 if (rt_ret != RT_ERROR_NONE) {
328 MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed, ret = " << rt_ret;
329 }
330
331 rt_ret = rtMalloc(&op_debug_dump_args_, kOpDebugDevMemSize, RT_MEMORY_HBM);
332 if (rt_ret != RT_ERROR_NONE) {
333 MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed, ret = " << rt_ret;
334 }
335
336 rt_ret =
337 rtMemcpy(op_debug_dump_args_, sizeof(void *), &op_debug_buffer_addr_, sizeof(void *), RT_MEMCPY_HOST_TO_DEVICE);
338 if (rt_ret != RT_ERROR_NONE) {
339 MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed, ret = " << rt_ret;
340 }
341
342 rt_ret = rtDebugRegister(model_handle_(), op_debug_mode, op_debug_buffer_addr_, &debug_stream_id_, &debug_task_id_);
343 if (rt_ret != RT_ERROR_NONE) {
344 MS_LOG(EXCEPTION) << "[DataDump] Call rtDebugRegister failed, ret = " << rt_ret;
345 }
346
347 MS_LOG(INFO) << "[DataDump] Distribute op debug task, task id = " << debug_task_id_
348 << ", stream id = " << debug_stream_id_;
349 }
350
OpDebugUnregister()351 void DataDumper::OpDebugUnregister() {
352 uint32_t op_debug_mode = DumpJsonParser::GetInstance().op_debug_mode();
353 if (op_debug_mode == kNoOverflow) {
354 MS_LOG(INFO) << "[DataDump] Op debug mode is no overflow, no need to unregister.";
355 return;
356 }
357
358 MS_LOG(INFO) << "[DataDump] Start.";
359 rtError_t rt_ret = rtDebugUnRegister(model_handle_());
360 if (rt_ret != RT_ERROR_NONE) {
361 MS_LOG(EXCEPTION) << "[DataDump] Call rtDebugUnRegister failed, ret = " << rt_ret;
362 }
363 }
364 #endif
365
RtLoadDumpData(const aicpu::dump::OpMappingInfo & dump_info,void ** ptr)366 void DataDumper::RtLoadDumpData(const aicpu::dump::OpMappingInfo &dump_info, void **ptr) {
367 std::string proto_str;
368 size_t proto_size = dump_info.ByteSizeLong();
369 bool ret = dump_info.SerializeToString(&proto_str);
370 if (!ret || proto_size == 0) {
371 MS_LOG(EXCEPTION) << "[DataDump] Protobuf SerializeToString failed, proto size %zu.";
372 }
373
374 if (ptr == nullptr) {
375 MS_LOG(ERROR) << "[DataDump] rtMalloc failed, ptr is nullptr";
376 return;
377 }
378
379 rtError_t rt_ret = rtMalloc(ptr, proto_size, RT_MEMORY_HBM);
380 if (rt_ret != RT_ERROR_NONE) {
381 MS_LOG(EXCEPTION) << "[DataDump] Call rtMalloc failed";
382 }
383 rt_ret = rtMemcpy(*ptr, proto_size, proto_str.c_str(), proto_size, RT_MEMCPY_HOST_TO_DEVICE);
384 if (rt_ret != RT_ERROR_NONE) {
385 MS_LOG(EXCEPTION) << "[DataDump] Call rtMemcpy failed";
386 }
387
388 MS_LOG(INFO) << "[DataDump] rtDatadumpInfoLoad start";
389 rt_ret = rtDatadumpInfoLoad(*ptr, SizeToUint(proto_size));
390 if (rt_ret != RT_ERROR_NONE) {
391 MS_LOG(EXCEPTION) << "[DataDump] Call rtDatadumpInfoLoad failed";
392 }
393 }
394
SetDumpShape(const std::vector<size_t> & ms_shape,NotNull<aicpu::dump::Shape * > dump_shape)395 void SetDumpShape(const std::vector<size_t> &ms_shape, NotNull<aicpu::dump::Shape *> dump_shape) {
396 for (auto &dim : ms_shape) {
397 dump_shape->add_dim(dim);
398 }
399 }
400
401 #ifndef ENABLE_SECURITY
DumpKernelOutput(const CNodePtr & kernel,void * args,NotNull<aicpu::dump::Task * > task)402 void DataDumper::DumpKernelOutput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
403 if (!DumpJsonParser::GetInstance().OutputNeedDump()) {
404 MS_LOG(INFO) << "Skip dump output";
405 return;
406 }
407 if (HasAbstractMonad(kernel)) {
408 MS_LOG(WARNING) << "Skip Monad node output:" << kernel->fullname_with_scope();
409 return;
410 }
411 MS_LOG(INFO) << "[DataDump] DumpKernelOutput start. Kernel:" << kernel->fullname_with_scope();
412 auto input_size = AnfAlgo::GetInputTensorNum(kernel);
413 auto output_size = AnfAlgo::GetOutputTensorNum(kernel);
414 uint64_t offset = sizeof(void *) * input_size;
415 for (size_t i = 0; i < output_size; ++i) {
416 auto data_type = AnfAlgo::GetOutputDeviceDataType(kernel, i);
417 auto output_format = AnfAlgo::GetOutputFormat(kernel, i);
418 auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel, i);
419 auto output_origin_shape = AnfAlgo::GetOutputInferShape(kernel, i);
420
421 aicpu::dump::Output output;
422 output.set_data_type(GeTypesConvert::GetGeDataType(data_type));
423 output.set_format(GeTypesConvert::GetGeFormat(output_format, output_shape.size()));
424 SetDumpShape(output_shape, NOT_NULL(output.mutable_shape()));
425 SetDumpShape(output_origin_shape, NOT_NULL(output.mutable_origin_shape()));
426
427 output.set_original_output_format(GeTypesConvert::GetGeFormat(output_format, output_shape.size()));
428 output.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
429 // device address data size
430 auto address = AnfAlgo::GetOutputAddr(kernel, i);
431 MS_EXCEPTION_IF_NULL(address);
432 output.set_size(address->GetSize());
433 MS_LOG(INFO) << "[DataDump] output " << i << " address size:" << output.size();
434 MS_EXCEPTION_IF_NULL(task->mutable_output());
435 task->mutable_output()->Add(std::move(output));
436 offset = SizetAddWithOverflowCheck(offset, sizeof(void *));
437 }
438 }
439
DumpKernelInput(const CNodePtr & kernel,void * args,NotNull<aicpu::dump::Task * > task)440 void DataDumper::DumpKernelInput(const CNodePtr &kernel, void *args, NotNull<aicpu::dump::Task *> task) {
441 if (!DumpJsonParser::GetInstance().InputNeedDump()) {
442 MS_LOG(INFO) << "Skip dump input";
443 return;
444 }
445 MS_EXCEPTION_IF_NULL(kernel);
446 if (AnfAlgo::IsNodeInputContainMonad(kernel)) {
447 MS_LOG(WARNING) << "Skip Monad node:" << kernel->fullname_with_scope();
448 return;
449 }
450 MS_LOG(INFO) << "[DataDump] DumpKernelInput start. Kernel:" << kernel->fullname_with_scope();
451 auto input_size = AnfAlgo::GetInputTensorNum(kernel);
452 uint64_t offset = 0;
453 for (size_t i = 0; i < input_size; ++i) {
454 aicpu::dump::Input input;
455 auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(kernel, i);
456 auto input_node = input_node_with_index.first;
457 auto input_index = input_node_with_index.second;
458 std::string output_format = AnfAlgo::GetOutputFormat(input_node, input_index);
459 auto output_type = AnfAlgo::GetOutputDeviceDataType(input_node, input_index);
460 if (output_type == kTypeUnknown) {
461 MS_LOG(WARNING) << "[DataDump] It is not suggested to use a lonely weight parameter as the output of graph";
462 output_type = AnfAlgo::GetOutputInferDataType(input_node, input_index);
463 }
464 auto output_shape = AnfAlgo::GetOutputDeviceShape(input_node, input_index);
465 auto output_origin_shape = AnfAlgo::GetOutputInferShape(input_node, input_index);
466
467 input.set_data_type(GeTypesConvert::GetGeDataType(output_type));
468 input.set_format(GeTypesConvert::GetGeFormat(output_format, output_shape.size()));
469 SetDumpShape(output_shape, NOT_NULL(input.mutable_shape()));
470 SetDumpShape(output_origin_shape, NOT_NULL(input.mutable_origin_shape()));
471
472 input.set_address(static_cast<uint64_t>(reinterpret_cast<uintptr_t>(args)) + offset);
473 // device address data size
474 auto address = AnfAlgo::GetPrevNodeOutputAddr(kernel, i);
475 MS_EXCEPTION_IF_NULL(address);
476 input.set_size(address->GetSize());
477 MS_LOG(INFO) << "[DataDump] input " << i << " address size:" << input.size();
478 MS_EXCEPTION_IF_NULL(task->mutable_input());
479 task->mutable_input()->Add(std::move(input));
480 offset = SizetAddWithOverflowCheck(offset, sizeof(void *));
481 }
482 }
483 #endif
484
StripUniqueId(const std::string node_name)485 std::string DataDumper::StripUniqueId(const std::string node_name) {
486 size_t last_underscore = node_name.find_last_of('_');
487 std::string stripped_node_name;
488 if (last_underscore == string::npos) {
489 MS_LOG(ERROR) << "Could not strip unique ID from " << node_name;
490 stripped_node_name = node_name;
491 } else {
492 stripped_node_name = node_name.substr(0, last_underscore);
493 }
494 return stripped_node_name;
495 }
496 } // namespace ascend
497 } // namespace device
498 } // namespace mindspore
499