• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "include/backend/debug/data_dump/e2e_dump.h"
18 
19 #include <unistd.h>
20 #include <sstream>
21 #include <algorithm>
22 #include <map>
23 #include <memory>
24 #include <set>
25 #include <utility>
26 #include <vector>
27 #include "include/backend/debug/data_dump/dump_json_parser.h"
28 #include "runtime/device/ms_device_shape_transfer.h"
29 #include "include/common/debug/anf_dump_utils.h"
30 #include "include/common/debug/common.h"
31 #include "include/backend/anf_runtime_algorithm.h"
32 #include "include/common/utils/anfalgo.h"
33 #include "utils/ms_context.h"
34 #include "runtime/device/kernel_runtime_manager.h"
35 #include "include/common/utils/config_manager.h"
36 #include "utils/file_utils.h"
37 #include "include/backend/debug/data_dump/tensor_stat_dump.h"
38 #include "include/backend/debug/common/csv_writer.h"
39 #include "abstract/utils.h"
40 #include "runtime/hardware/device_context_manager.h"
41 #ifdef ENABLE_DEBUGGER
42 #include "ops/op_def.h"
43 #include "debug/debug_services.h"
44 #include "debug/tensor_load.h"
45 #include "include/backend/debug/debugger/debugger.h"
46 #endif
47 
48 namespace mindspore {
GenDataFilePath(const CNodePtr & node,const std::string & kernel_name,const std::string & dump_path,size_t slot,bool is_input)49 std::string GenDataFilePath(const CNodePtr &node, const std::string &kernel_name, const std::string &dump_path,
50                             size_t slot, bool is_input) {
51   std::string op_type = common::AnfAlgo::GetCNodeName(node);
52   std::string op_name = kernel_name;
53   uint64_t timestamp = Common::GetTimeStamp();
54   uint32_t task_id = 0;
55   uint32_t stream_id = 0;
56   if (E2eDump::IsDeviceTargetAscend()) {
57     stream_id = AnfAlgo::GetStreamId(node);
58   }
59   std::string tensor_type = is_input ? ".input." : ".output.";
60   std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
61                           std::to_string(stream_id) + '.' + std::to_string(timestamp) + tensor_type +
62                           std::to_string(slot);
63   return file_path;
64 }
65 
ConvertStringToTypeId(const std::string & dtype)66 TypeId ConvertStringToTypeId(const std::string &dtype) {
67   const std::map<std::string, TypeId> kDbgDataTypeToStringMap = {
68     {"bool", TypeId::kNumberTypeBool},        {"int8", TypeId::kNumberTypeInt16},
69     {"int16", TypeId::kNumberTypeInt16},      {"int32", TypeId::kNumberTypeInt32},
70     {"int64", TypeId::kNumberTypeInt64},      {"uint8", TypeId::kNumberTypeUInt8},
71     {"uint16", TypeId::kNumberTypeUInt16},    {"uint32", TypeId::kNumberTypeUInt32},
72     {"uint64", TypeId::kNumberTypeUInt64},    {"float16", TypeId::kNumberTypeFloat16},
73     {"float32", TypeId::kNumberTypeFloat32},  {"float64", TypeId::kNumberTypeFloat64},
74     {"bfloat16", TypeId::kNumberTypeBFloat16}};
75   auto iter_type = kDbgDataTypeToStringMap.find(dtype);
76   if (iter_type == kDbgDataTypeToStringMap.end()) {
77     return TypeId::kTypeUnknown;
78   }
79   return iter_type->second;
80 }
81 
IsDeviceTargetGPU()82 bool E2eDump::IsDeviceTargetGPU() {
83   auto context = MsContext::GetInstance();
84   MS_EXCEPTION_IF_NULL(context);
85   return context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice;
86 }
87 
IsDeviceTargetAscend()88 bool E2eDump::IsDeviceTargetAscend() {
89   auto context = MsContext::GetInstance();
90   MS_EXCEPTION_IF_NULL(context);
91   return context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice;
92 }
93 
IsMindRTKernelByKernel()94 bool E2eDump::IsMindRTKernelByKernel() {
95   auto debugger = Debugger::GetInstance();
96   MS_EXCEPTION_IF_NULL(debugger);
97   return IsDeviceTargetGPU() || debugger->GetAscendKernelByKernelFlag();
98 }
99 
100 /*
101  * Feature group: Dump.
102  * Target device group: GPU, Ascend.
103  * Runtime category: Old runtime, MindRT.
104  * Description: This function is for dumping tensor loaded to tensor_loader in memory to disk in GPU and Ascend machine.
105  */
DumpMemFromTensorLoaderToFile(const Debugger * debugger,const std::string & file_path,const std::string & original_kernel_name,size_t slot)106 void E2eDump::DumpMemFromTensorLoaderToFile(const Debugger *debugger, const std::string &file_path,
107                                             const std::string &original_kernel_name, size_t slot) {
108 #ifdef ENABLE_DEBUGGER
109   MS_EXCEPTION_IF_NULL(debugger);
110   auto ret = debugger->DumpTensorToFile(file_path, original_kernel_name, slot);
111   if (!ret) {
112     MS_LOG(INFO) << "DumpTensorToFile Failed: path:" << file_path;
113   }
114 #endif
115 }
116 
DumpOutput(const session::KernelGraph * graph,const std::string & dump_path,const Debugger * debugger)117 void E2eDump::DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger) {
118   MS_EXCEPTION_IF_NULL(graph);
119   auto &dump_json_parser = DumpJsonParser::GetInstance();
120   if (!dump_json_parser.OutputNeedDump()) {
121     return;
122   }
123   MS_LOG(INFO) << "Start e2e dump output";
124   bool trans_flag = dump_json_parser.trans_flag();
125   const auto &apply_kernels = graph->execution_order();
126   for (const auto &node : apply_kernels) {
127     MS_EXCEPTION_IF_NULL(node);
128     std::string kernel_name = GetKernelNodeName(node);
129     if (!dump_json_parser.NeedDump(kernel_name)) {
130       continue;
131     }
132     DumpJsonParser::GetInstance().MatchKernel(kernel_name);
133     DumpOutputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
134   }
135 }
136 
DumpOutputSingleNode(const CNodePtr & node,const std::string & dump_path,const Debugger * debugger)137 void E2eDump::DumpOutputSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger) {
138   auto &dump_json_parser = DumpJsonParser::GetInstance();
139   if (!dump_json_parser.OutputNeedDump()) {
140     return;
141   }
142   bool trans_flag = dump_json_parser.trans_flag();
143   MS_EXCEPTION_IF_NULL(node);
144   std::string kernel_name = GetKernelNodeName(node);
145   if (!dump_json_parser.NeedDump(kernel_name)) {
146     return;
147   }
148   DumpJsonParser::GetInstance().MatchKernel(kernel_name);
149   DumpOutputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
150 }
151 
DumpOutputImpl(const CNodePtr & node,bool trans_flag,const std::string & dump_path,std::string * kernel_name,const Debugger * debugger)152 void E2eDump::DumpOutputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
153                              std::string *kernel_name, const Debugger *debugger) {
154   MS_EXCEPTION_IF_NULL(node);
155   GetFileKernelName(NOT_NULL(kernel_name));
156   auto output_size = AnfAlgo::GetOutputTensorNum(node);
157   for (size_t j = 0; j < output_size; ++j) {
158     if (!AnfAlgo::OutputAddrExist(node, j)) {
159       continue;
160     }
161     auto addr = AnfAlgo::GetOutputAddr(node, j);
162     std::string node_name = GetKernelNodeName(node);
163     MS_EXCEPTION_IF_NULL(addr);
164     auto type = common::AnfAlgo::GetOutputInferDataType(node, j);
165     std::string op_type = common::AnfAlgo::GetCNodeName(node);
166     std::string op_name = *kernel_name;
167     uint32_t task_id = 0;
168     uint32_t stream_id = 0;
169     if (IsDeviceTargetAscend()) {
170       stream_id = AnfAlgo::GetStreamId(node);
171     }
172     uint64_t timestamp = Common::GetTimeStamp();
173     std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
174                             std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output." +
175                             std::to_string(j);
176     if (DumpJsonParser::GetInstance().IsStatisticDump() && IsMindRTKernelByKernel()) {
177       TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, false, j, j);
178       (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
179     }
180     if (DumpJsonParser::GetInstance().IsTensorDump()) {
181       if (IsMindRTKernelByKernel()) {
182         DumpMemFromTensorLoaderToFile(debugger, file_path, node_name, j);
183       } else {
184         ShapeVector int_shapes;
185         GetDumpIntShape(node, j, NOT_NULL(&int_shapes), trans_flag);
186         DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
187       }
188     }
189   }
190 }
191 
DumpOutputData(const CNodePtr & node,bool trans_flag,const std::string & dump_path,std::string * kernel_name)192 void E2eDump::DumpOutputData(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
193                              std::string *kernel_name) {
194   if (IsMindRTKernelByKernel()) {
195     MS_LOG(INFO) << "DumpOutputData is only for graph mode on Ascend";
196     return;
197   }
198   MS_EXCEPTION_IF_NULL(node);
199   GetFileKernelName(NOT_NULL(kernel_name));
200   auto output_size = AnfAlgo::GetOutputTensorNum(node);
201   for (size_t j = 0; j < output_size; ++j) {
202     if (!AnfAlgo::OutputAddrExist(node, j)) {
203       continue;
204     }
205     auto addr = AnfAlgo::GetOutputAddr(node, j);
206     MS_EXCEPTION_IF_NULL(addr);
207     ShapeVector int_shapes;
208     GetDumpIntShape(node, j, NOT_NULL(&int_shapes), trans_flag);
209     auto type = common::AnfAlgo::GetOutputInferDataType(node, j);
210     std::string file_path = GenDataFilePath(node, *kernel_name, dump_path, j, false);
211     DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
212   }
213 }
214 
DumpInput(const session::KernelGraph * graph,const std::string & dump_path,const Debugger * debugger)215 void E2eDump::DumpInput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger) {
216   MS_EXCEPTION_IF_NULL(graph);
217   auto &dump_json_parser = DumpJsonParser::GetInstance();
218   if (!dump_json_parser.InputNeedDump()) {
219     return;
220   }
221   MS_LOG(INFO) << "Start e2e dump input";
222   bool trans_flag = dump_json_parser.trans_flag();
223   const auto &apply_kernels = graph->execution_order();
224   for (const auto &node : apply_kernels) {
225     MS_EXCEPTION_IF_NULL(node);
226     std::string kernel_name = GetKernelNodeName(node);
227     if (!dump_json_parser.NeedDump(kernel_name)) {
228       continue;
229     }
230     DumpJsonParser::GetInstance().MatchKernel(kernel_name);
231     DumpInputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
232   }
233 }
234 
DumpInputSingleNode(const CNodePtr & node,const std::string & dump_path,const Debugger * debugger)235 void E2eDump::DumpInputSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger) {
236   auto &dump_json_parser = DumpJsonParser::GetInstance();
237   if (!dump_json_parser.InputNeedDump()) {
238     return;
239   }
240   bool trans_flag = dump_json_parser.trans_flag();
241   MS_EXCEPTION_IF_NULL(node);
242   std::string kernel_name = GetKernelNodeName(node);
243   if (!dump_json_parser.NeedDump(kernel_name)) {
244     return;
245   }
246   DumpJsonParser::GetInstance().MatchKernel(kernel_name);
247   DumpInputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
248 }
249 
DumpArgsSingleNode(const CNodePtr & node,const std::string & dump_path,const Debugger * debugger)250 void E2eDump::DumpArgsSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger) {
251   auto op_name = GetKernelNodeName(node);
252   int start_index = static_cast<int>(op_name.rfind('/')) + 1;
253   int end_index = static_cast<int>(op_name.rfind('-'));
254   if (end_index == -1) {
255     end_index = static_cast<int>(op_name.length());
256   }
257   std::string op_t = op_name.substr(start_index, end_index - start_index);
258   auto op_def = mindspore::ops::GetOpDef(op_t);
259   nlohmann::json json;
260   if (!op_def) {
261     auto prim_node = GetCNodePrimitive(node);
262     if (prim_node != nullptr) {
263       auto prim_attrs = prim_node->attrs();
264       for (const auto &entry : prim_attrs) {
265         json[entry.first] = entry.second->ToString();
266       }
267     }
268   } else {
269     int idx = 0;
270     for (const auto &op_arg : op_def->args_) {
271       ++idx;
272       if (op_arg.as_init_arg_) {
273         auto input_kernel = node->input(idx);
274         std::string input_kernel_name = GetKernelNodeName(input_kernel);
275         string input_tensor_name = input_kernel_name + ':' + "0";
276         auto arg_name = op_arg.arg_name_;
277         auto t_data = debugger->GetTensor(input_tensor_name);
278         std::string type = t_data->GetTypeString();
279         std::shared_ptr<tensor::Tensor> converted_tensor = nullptr;
280         converted_tensor = std::make_shared<tensor::Tensor>(
281           ConvertStringToTypeId(type), t_data->GetShape(),
282           static_cast<void *>(const_cast<char *>(t_data->GetDataPtr())), t_data->GetByteSize());
283         json[arg_name] =
284           converted_tensor->data().ToString(converted_tensor->data_type(), converted_tensor->shape(), false);
285       }
286     }
287   }
288 
289   std::string scope_name = node->fullname_with_scope();
290   std::replace(scope_name.begin(), scope_name.end(), '.', '_');
291   std::replace(scope_name.begin(), scope_name.end(), '/', '_');
292 
293   constexpr int kJsonIndent = 4;
294   std::string file_path = dump_path + op_t + "." + scope_name + ".json";
295   auto realpath = Common::CreatePrefixPath(file_path);
296   if (!realpath.has_value()) {
297     MS_LOG(ERROR) << "Get realpath failed, path=" << file_path;
298     return;
299   }
300   std::ofstream outFile(realpath.value());
301   outFile << json.dump(kJsonIndent);
302   outFile.close();
303 }
304 
DumpInputImpl(const CNodePtr & node,bool trans_flag,const std::string & dump_path,std::string * kernel_name,const Debugger * debugger)305 void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
306                             std::string *kernel_name, const Debugger *debugger) {
307   MS_EXCEPTION_IF_NULL(node);
308   GetFileKernelName(NOT_NULL(kernel_name));
309   auto input_size = common::AnfAlgo::GetInputTensorNum(node);
310   for (size_t j = 0; j < input_size; ++j) {
311     auto kernel_with_index = common::AnfAlgo::GetPrevNodeOutput(node, j);
312     auto input = kernel_with_index.first;
313     auto index = kernel_with_index.second;
314     if (!AnfAlgo::OutputAddrExist(input, index)) {
315       continue;
316     }
317     std::string node_name = GetKernelNodeName(node);
318     size_t slot = j;
319     if (IsMindRTKernelByKernel()) {
320       auto input_kernel = node->input(j + 1);
321       std::string input_kernel_name = GetKernelNodeName(input_kernel);
322       node_name = input_kernel_name;
323       slot = 0;
324     }
325     auto type = common::AnfAlgo::GetOutputInferDataType(input, index);
326     std::string op_type = common::AnfAlgo::GetCNodeName(node);
327     std::string op_name = *kernel_name;
328     uint64_t timestamp = Common::GetTimeStamp();
329     uint32_t task_id = 0;
330     uint32_t stream_id = 0;
331     if (IsDeviceTargetAscend()) {
332       stream_id = AnfAlgo::GetStreamId(node);
333     }
334     std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
335                             std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".input." + std::to_string(j);
336     auto addr = AnfAlgo::GetOutputAddr(input, index);
337     MS_EXCEPTION_IF_NULL(addr);
338     if (DumpJsonParser::GetInstance().IsStatisticDump() && IsMindRTKernelByKernel()) {
339       TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, true, j, slot);
340       (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
341     }
342     if (DumpJsonParser::GetInstance().IsTensorDump()) {
343       if (IsMindRTKernelByKernel()) {
344         DumpMemFromTensorLoaderToFile(debugger, file_path, node_name, slot);
345       } else {
346         ShapeVector int_shapes;
347         GetDumpIntShape(input, index, NOT_NULL(&int_shapes), trans_flag);
348         DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
349       }
350     }
351   }
352 }
353 
DumpInputData(const CNodePtr & node,bool trans_flag,const std::string & dump_path,std::string * kernel_name)354 void E2eDump::DumpInputData(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
355                             std::string *kernel_name) {
356   if (IsMindRTKernelByKernel()) {
357     MS_LOG(INFO) << "DumpInputData is only for graph mode on Ascend";
358     return;
359   }
360   MS_EXCEPTION_IF_NULL(node);
361   GetFileKernelName(NOT_NULL(kernel_name));
362   auto input_size = common::AnfAlgo::GetInputTensorNum(node);
363   for (size_t j = 0; j < input_size; ++j) {
364     auto kernel_with_index = common::AnfAlgo::GetPrevNodeOutput(node, j);
365     auto input = kernel_with_index.first;
366     auto index = kernel_with_index.second;
367     if (!AnfAlgo::OutputAddrExist(input, index)) {
368       continue;
369     }
370     auto addr = AnfAlgo::GetOutputAddr(input, index);
371     MS_EXCEPTION_IF_NULL(addr);
372     ShapeVector int_shapes;
373     GetDumpIntShape(input, index, NOT_NULL(&int_shapes), trans_flag);
374     auto type = common::AnfAlgo::GetOutputInferDataType(input, index);
375     std::string file_path = GenDataFilePath(node, *kernel_name, dump_path, j, true);
376     DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
377   }
378 }
379 
DumpSingleAnfNode(const AnfNodePtr & anf_node,const size_t output_index,const std::string & dump_path,bool trans_flag,const Debugger * debugger)380 void E2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path,
381                                 bool trans_flag, const Debugger *debugger) {
382   MS_EXCEPTION_IF_NULL(anf_node);
383   auto &dump_json_parser = DumpJsonParser::GetInstance();
384   if ((!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) || IsValueNode<StringImm>(anf_node)) {
385     return;
386   }
387   std::string node_name = GetKernelNodeName(anf_node);
388   if (!dump_json_parser.NeedDump(node_name)) {
389     return;
390   }
391   DumpJsonParser::GetInstance().MatchKernel(node_name);
392   GetFileKernelName(NOT_NULL(&node_name));
393 
394   std::string dump_name = node_name;
395   const std::string cst_prefix = "Default_";
396   if (anf_node->isa<ValueNode>()) {
397     if (dump_name.find(cst_prefix) == std::string::npos) {
398       MS_LOG(INFO) << "Incorrect constant format: " << dump_name;
399       return;
400     }
401     dump_name = node_name.substr(cst_prefix.length());
402     trans_flag = false;
403   }
404   // check if output address exists, if not, return;
405   if (!AnfAlgo::OutputAddrExist(anf_node, output_index)) {
406     return;
407   }
408   auto addr = AnfAlgo::GetOutputAddr(anf_node, output_index);
409   MS_EXCEPTION_IF_NULL(addr);
410   ShapeVector int_shapes;
411   GetDumpIntShape(anf_node, output_index, NOT_NULL(&int_shapes), trans_flag);
412   auto type = common::AnfAlgo::GetOutputInferDataType(anf_node, output_index);
413   uint64_t timestamp = Common::GetTimeStamp();
414   uint32_t task_id = 0;
415   uint32_t stream_id = 0;
416   std::string file_path = dump_path + "/Parameter." + dump_name + '.' + std::to_string(task_id) + '.' +
417                           std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output.0";
418   if (IsDeviceTargetGPU()) {
419     if (dump_json_parser.IsStatisticDump()) {
420       TensorStatDump stat_dump("Parameter", dump_name, task_id, stream_id, timestamp, false, 0, 0);
421       (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
422     }
423     if (dump_json_parser.IsTensorDump()) {
424       DumpMemFromTensorLoaderToFile(debugger, file_path, node_name, 0);
425     }
426   } else {
427     // On Ascend, saving statistic data is only supported npy format.
428     if (dump_json_parser.IsStatisticDump() && dump_json_parser.IsNpyFormat()) {
429       // On Ascend kernel by kernel mode, load tensor data into debugger first.
430       auto format = kOpFormat_DEFAULT;
431       std::string tensor_name = node_name + ":0";
432       uint32_t root_graph_id = debugger->GetCurrentRootGraphId();
433       bool ret = addr->LoadMemToHost(tensor_name, 0, format, int_shapes, type, 0, true, root_graph_id, false, true);
434       if (!ret) {
435         MS_LOG(ERROR) << "LoadMemToHost failed, tensor_name: " << tensor_name;
436       } else {
437         TensorStatDump stat_dump("Parameter", dump_name, task_id, stream_id, timestamp, false, 0, 0);
438         (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
439       }
440     }
441     if (dump_json_parser.IsTensorDump()) {
442       DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
443     }
444   }
445 }
446 
447 /*
448  * Feature group: Dump.
449  * Target device group: Ascend, GPU.
450  * Runtime category: MindRT.
451  * Description: This function is similar to DumpSingleAnfNode function but it is only for dumping parameters in mindRT.
452  * This function uses GetParameterInfo to get dump info for the parameter node.
453  */
DumpSingleParameterNode(const AnfNodePtr & anf_node,const std::string & dump_path,bool trans_flag,const Debugger * debugger)454 void E2eDump::DumpSingleParameterNode(const AnfNodePtr &anf_node, const std::string &dump_path, bool trans_flag,
455                                       const Debugger *debugger) {
456   MS_EXCEPTION_IF_NULL(anf_node);
457   auto &dump_json_parser = DumpJsonParser::GetInstance();
458   std::string node_name = GetKernelNodeName(anf_node);
459   if (!anf_node->isa<Parameter>() || !dump_json_parser.NeedDump(node_name) || !dump_json_parser.OutputNeedDump()) {
460     return;
461   }
462   DumpJsonParser::GetInstance().MatchKernel(node_name);
463   GetFileKernelName(NOT_NULL(&node_name));
464   ShapeVector int_shapes;
465   TypeId type;
466   TypeId device_type;
467   auto addr = GetParameterInfo(anf_node, NOT_NULL(&int_shapes), NOT_NULL(&type), NOT_NULL(&device_type));
468   if (addr == nullptr || addr->GetPtr() == nullptr) {
469     MS_LOG(DEBUG) << "Skip node: " << node_name << ". Parameter data is not available for mindRT.";
470     return;
471   }
472   uint64_t timestamp = Common::GetTimeStamp();
473   uint32_t task_id = 0;
474   uint32_t stream_id = 0;
475   std::string file_path = dump_path + "/Parameter." + node_name + '.' + std::to_string(task_id) + '.' +
476                           std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output.0";
477   if (IsDeviceTargetGPU()) {
478     if (dump_json_parser.IsStatisticDump()) {
479       TensorStatDump stat_dump("Parameter", node_name, task_id, stream_id, timestamp, false, 0, 0);
480       (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
481     }
482     if (dump_json_parser.IsTensorDump()) {
483       DumpMemFromTensorLoaderToFile(debugger, file_path, node_name, 0);
484     }
485   } else {
486     // On Ascend, saving statistic data is only supported npy format.
487     if (dump_json_parser.IsStatisticDump() && dump_json_parser.IsNpyFormat()) {
488       // On Ascend kernel by kernel mode, load tensor data into debugger first.
489       auto format = kOpFormat_DEFAULT;
490       std::string tensor_name = node_name + ":0";
491       uint32_t root_graph_id = debugger->GetCurrentRootGraphId();
492       bool ret = addr->LoadMemToHost(tensor_name, 0, format, int_shapes, type, 0, true, root_graph_id, false, true);
493       if (!ret) {
494         MS_LOG(ERROR) << "LoadMemToHost failed, tensor_name: " << tensor_name;
495       }
496       TensorStatDump stat_dump("Parameter", node_name, task_id, stream_id, timestamp, false, 0, 0);
497       (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
498     }
499     if (dump_json_parser.IsTensorDump()) {
500       DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
501     }
502   }
503 }
504 
DumpParameters(const session::KernelGraph * graph,const std::string & dump_path,const Debugger * debugger)505 void E2eDump::DumpParameters(const session::KernelGraph *graph, const std::string &dump_path,
506                              const Debugger *debugger) {
507   MS_EXCEPTION_IF_NULL(graph);
508   auto &dump_json_parser = DumpJsonParser::GetInstance();
509   if (!dump_json_parser.OutputNeedDump()) {
510     return;
511   }
512   MS_LOG(INFO) << "Start e2e dump parameters";
513   bool trans_flag = dump_json_parser.trans_flag();
514 
515   // dump parameters
516   const auto &parameters = graph->inputs();
517   for (auto &item : parameters) {
518     DumpSingleAnfNode(item, kParameterOutputIndex, dump_path, trans_flag, debugger);
519   }
520 }
521 
DumpConstantData(const session::KernelGraph * graph,uint32_t rank_id,const Debugger * debugger)522 void E2eDump::DumpConstantData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
523   MS_EXCEPTION_IF_NULL(graph);
524   auto &dump_json_parser = DumpJsonParser::GetInstance();
525   if (!IsDeviceTargetGPU() || !dump_json_parser.e2e_dump_enabled()) {
526     return;
527   }
528   uint32_t graph_id = graph->graph_id();
529   std::string cst_path = GenerateDumpPath(graph_id, rank_id, true);
530   if (!Common::FileExists(cst_path)) {
531     DumpConstantData(graph, cst_path, debugger);
532   }
533 }
534 
DumpConstantData(const session::KernelGraph * graph,const std::string & cst_dump_path,const Debugger * debugger)535 void E2eDump::DumpConstantData(const session::KernelGraph *graph, const std::string &cst_dump_path,
536                                const Debugger *debugger) {
537   // Dump constant to npy file
538   MS_EXCEPTION_IF_NULL(graph);
539   auto &dump_json_parser = DumpJsonParser::GetInstance();
540   MS_LOG(INFO) << "DumpConstants. Current iteration is " << dump_json_parser.cur_dump_iter();
541   MS_LOG(INFO) << "Current graph id is " << graph->graph_id();
542   if (!dump_json_parser.OutputNeedDump()) {
543     return;
544   }
545   const auto value_nodes = graph->graph_value_nodes();
546   for (auto &item : value_nodes) {
547     DumpSingleAnfNode(item, kValueNodeOutputIndex, cst_dump_path, false, debugger);
548   }
549 }
550 
551 /*
552  * Feature group: Dump.
553  * Target device group: Ascend, GPU.
554  * Runtime category: Old runtime.
555  * Description: This function is for updating dump iteration for GPU and ascend old runtime.
556  */
UpdateIterOldRTDump(const session::KernelGraph * graph)557 void E2eDump::UpdateIterOldRTDump(const session::KernelGraph *graph) {
558   MS_EXCEPTION_IF_NULL(graph);
559   auto &dump_json_parser = DumpJsonParser::GetInstance();
560   uint32_t graph_id = graph->graph_id();
561   if (IsDeviceTargetGPU()) {
562     if (starting_graph_id == INT32_MAX) {
563       starting_graph_id = graph_id;
564     } else if (starting_graph_id == graph_id && !MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
565       // Update dump iter for mindrt runtime is done using UpdateIterGPUDump().
566       // Update dump iter for GPU old runtime.
567       dump_json_parser.UpdateDumpIter();
568     }
569     return;
570   }
571   // If device target is Ascend
572   if (graph->IsDatasetGraph()) {
573     MS_LOG(INFO) << "No need to update iteration for dataset graph.";
574     return;
575   }
576 
577   // In multi network scripts, dump iter is equal to the number of networks that have been executed so far.
578   dump_json_parser.UpdateDumpIter();
579 }
580 
581 /*
582  * Feature group: Dump.
583  * Target device group: Ascend, GPU.
584  * Runtime category: MindRT.
585  * Description: This function is for updating dump iteration for GPU and ascend MindRT dump. Please note that dump with
586  * dataset_sink_mode = True is not supported for GPU.
587  */
UpdateIterMindRTDump()588 void E2eDump::UpdateIterMindRTDump() {
589   auto debugger = Debugger::GetInstance();
590   MS_EXCEPTION_IF_NULL(debugger);
591   // Dataset graph is always the first graph in the list when dataset_sink_mode is true.
592   auto graph_list = debugger->GetStepGraphPtrList();
593   if (graph_list.empty()) {
594     MS_LOG(INFO) << "The graph list is empty.";
595     return;
596   }
597   auto graph = graph_list[0];
598   auto context = MsContext::GetInstance();
599   MS_EXCEPTION_IF_NULL(context);
600   if (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice && graph->IsDatasetGraph()) {
601     MS_LOG(INFO) << "No need to update iteration for dataset graph.";
602     return;
603   }
604   // update dump iter for GPU and kernel by kernel ascend dump.
605   DumpJsonParser::GetInstance().UpdateDumpIter();
606 }
607 
608 /*
609  * Feature group: Dump.
610  * Target device group: Ascend, GPU.
611  * Runtime category: Old runtime, MindRT.
612  * Description: Generates graph history files (dumping all the iteration numbers in which the graph was executed) for
613  * the given graph and rank_id. If dataset_sink_mode is true for async dump in ascend, this function is called once per
614  * each epoch and dumps all the iterations in the epoch to the graph history file.
615  */
DumpRunIter(const KernelGraphPtr & graph,uint32_t rank_id)616 void E2eDump::DumpRunIter(const KernelGraphPtr &graph, uint32_t rank_id) {
617   auto &json_parser = DumpJsonParser::GetInstance();
618   if (!(json_parser.async_dump_enabled() || json_parser.e2e_dump_enabled())) {
619     return;
620   }
621   auto context = MsContext::GetInstance();
622   MS_EXCEPTION_IF_NULL(context);
623   std::string backend = context->backend_policy();
624   if (backend == "ge") {
625     MS_LOG(INFO) << "On 910B or 910C platform, execution_order is not support to dump.";
626     return;
627   }
628   bool sink_mode =
629     (ConfigManager::GetInstance().dataset_mode() == DatasetMode::DS_SINK_MODE || graph->IsDatasetGraph());
630   auto iter_num = SizeToInt(LongToSize(ConfigManager::GetInstance().iter_num()));
631   if (graph->IsDatasetGraph()) {
632     MS_LOG(INFO) << "graph: " << graph->graph_id() << " is dataset graph, not creating graph history file.";
633     return;
634   }
635   auto debugger = Debugger::GetInstance();
636   MS_EXCEPTION_IF_NULL(debugger);
637   if (!debugger->GetAscendKernelByKernelFlag() && !IsDeviceTargetGPU() &&
638       (graph->graph_id() != graph->root_graph_id())) {
639     // when device target is ascend, we only dump graph run iter for the root graph.
640     return;
641   }
642   std::string execution_order_path = json_parser.path() + "/rank_" + std::to_string(rank_id) + "/execution_order/";
643   std::string graph_str =
644     IsDeviceTargetGPU() ? std::to_string(graph->graph_id()) : std::to_string(graph->root_graph_id());
645   std::string file_name_to_check = execution_order_path + "/ms_global_execution_order_graph_" + graph_str + ".csv";
646   auto real_path = Common::CreatePrefixPath(file_name_to_check);
647   if (!real_path.has_value()) {
648     MS_LOG(WARNING) << "Check file path: " << file_name_to_check << " failed.";
649     return;
650   }
651   std::string file_name = real_path.value();
652   ChangeFileMode(file_name, S_IWUSR);
653   std::ofstream fout(file_name, std::ofstream::app);
654   if (!fout.is_open()) {
655     MS_LOG(WARNING) << "Open file for saving graph global execution order failed.";
656     return;
657   }
658   if (sink_mode && json_parser.async_dump_enabled() && !debugger->GetAscendKernelByKernelFlag()) {
659     // for async dump when sink_mode = true, cur_dump_iter() = current_epoch
660     // dump history for all iterations in the epoch
661     debugger->UpdateGraphIterMap(graph->graph_id(), iter_num);
662     auto graph_iter_map = debugger->GetGraphIterMap();
663     auto step_per_epoch = IntToSize(graph_iter_map[graph->graph_id()]);
664     for (size_t i = 0; i < step_per_epoch; i++) {
665       auto step = (json_parser.cur_dump_iter() * step_per_epoch) + i;
666       fout << (std::to_string(step) + "\n");
667     }
668   } else {
669     fout << std::to_string(json_parser.cur_dump_iter()) + "\n";
670   }
671   fout.close();
672   ChangeFileMode(file_name, S_IRUSR);
673 }
674 
675 /*
676  * Feature group: Dump.
677  * Target device group: Ascend, GPU.
678  * Runtime category: Old runtime, MindRT.
679  * Description: This function is for dumping the whole graph. It is used for old runtime in GPU and Ascend and
680  * super-kernel mindRT in Ascend.
681  */
DumpData(const session::KernelGraph * graph,uint32_t rank_id,const Debugger * debugger)682 void E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
683   MS_EXCEPTION_IF_NULL(graph);
684   bool success = false;
685   auto &dump_json_parser = DumpJsonParser::GetInstance();
686   uint32_t graph_id = graph->graph_id();
687   if (!dump_json_parser.e2e_dump_enabled()) {
688     return;
689   }
690 
691   if (dump_json_parser.GetIterDumpFlag()) {
692     MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter();
693     MS_LOG(INFO) << "Current graph id is " << graph_id;
694     std::string dump_path = GenerateDumpPath(graph_id, rank_id);
695     if (dump_json_parser.IsStatisticDump()) {
696       (void)TensorStatDump::OpenStatisticsFile(dump_path);
697     }
698     DumpInput(graph, dump_path, debugger);
699     DumpOutput(graph, dump_path, debugger);
700     if (!MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
701       // Dump parameters for old runtime. For mindRT it is done in PostExecuteGraphDebugger.
702       DumpParameters(graph, dump_path, debugger);
703       // DumpConstantData for GPU old runtime.
704       DumpConstantData(graph, rank_id, debugger);
705     }
706     if (dump_json_parser.IsStatisticDump()) {
707       CsvWriter::GetInstance().CloseFile();
708     }
709     success = true;
710   }
711 
712   if (success) {
713     MS_LOG(DEBUG) << "E2eDump Dump Data completed!";
714   } else {
715     MS_LOG(DEBUG) << "E2eDump Dump has not occurred!";
716   }
717 }
718 
719 /*
720  * Feature group: Dump.
721  * Target device group: Ascend, GPU.
722  * Runtime category: MindRT.
723  * Description: This function is for dumping a single node. It is used for mindrt in GPU and Ascend kernel-by-kernel.
724  */
DumpSingleNodeData(const CNodePtr & node,uint32_t graph_id,uint32_t rank_id,const Debugger * debugger)725 bool E2eDump::DumpSingleNodeData(const CNodePtr &node, uint32_t graph_id, uint32_t rank_id, const Debugger *debugger) {
726   bool success = false;
727   auto &dump_json_parser = DumpJsonParser::GetInstance();
728   if (dump_json_parser.DumpEnabledForIter()) {
729     std::string dump_path = GenerateDumpPath(graph_id, rank_id);
730     DumpInputSingleNode(node, dump_path, debugger);
731     DumpOutputSingleNode(node, dump_path, debugger);
732     if (dump_json_parser.save_args_flag()) {
733       DumpArgsSingleNode(node, dump_path, debugger);
734     }
735     success = true;
736   }
737   return success;
738 }
739 
740 /*
741  * Feature group: Dump.
742  * Target device group: Ascend, GPU.
743  * Runtime category: MindRT.
744  * Description: This function is for dumping all the parameters in the current root graph for GPU, Ascend superkernel
745  * (e2e dump) and Ascend kernel-by-kernel (e2e and async dump).
746  */
DumpParametersData(uint32_t rank_id,const Debugger * debugger)747 void E2eDump::DumpParametersData(uint32_t rank_id, const Debugger *debugger) {
748   uint32_t root_graph_id = debugger->GetCurrentRootGraphId();
749   auto &dump_json_parser = DumpJsonParser::GetInstance();
750   if ((dump_json_parser.async_dump_enabled() && !debugger->GetAscendKernelByKernelFlag()) ||
751       (dump_json_parser.async_dump_enabled() && dump_json_parser.op_debug_mode() > 0)) {
752     // Dump parameters for mindRT in async dump only for kernel by kernel mode.
753     return;
754   }
755   if (dump_json_parser.DumpEnabledForIter()) {
756     MS_LOG(INFO) << "DumpParameters. Current iteration is " << dump_json_parser.cur_dump_iter();
757     MS_LOG(INFO) << "Current root graph id is " << root_graph_id;
758     std::string dump_path = GenerateDumpPath(root_graph_id, rank_id);
759     bool trans_flag = dump_json_parser.trans_flag();
760     for (auto &item : debugger->GetParametersMindRT()) {
761       DumpSingleParameterNode(item, dump_path, trans_flag, debugger);
762     }
763   }
764 }
765 }  // namespace mindspore
766