1 /**
2 * Copyright 2020-2022 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "include/backend/debug/data_dump/e2e_dump.h"
18
19 #include <unistd.h>
20 #include <sstream>
21 #include <algorithm>
22 #include <map>
23 #include <memory>
24 #include <set>
25 #include <utility>
26 #include <vector>
27 #include "include/backend/debug/data_dump/dump_json_parser.h"
28 #include "runtime/device/ms_device_shape_transfer.h"
29 #include "include/common/debug/anf_dump_utils.h"
30 #include "include/common/debug/common.h"
31 #include "include/backend/anf_runtime_algorithm.h"
32 #include "include/common/utils/anfalgo.h"
33 #include "utils/ms_context.h"
34 #include "runtime/device/kernel_runtime_manager.h"
35 #include "include/common/utils/config_manager.h"
36 #include "utils/file_utils.h"
37 #include "include/backend/debug/data_dump/tensor_stat_dump.h"
38 #include "include/backend/debug/common/csv_writer.h"
39 #include "abstract/utils.h"
40 #include "runtime/hardware/device_context_manager.h"
41 #ifdef ENABLE_DEBUGGER
42 #include "ops/op_def.h"
43 #include "debug/debug_services.h"
44 #include "debug/tensor_load.h"
45 #include "include/backend/debug/debugger/debugger.h"
46 #endif
47
48 namespace mindspore {
GenDataFilePath(const CNodePtr & node,const std::string & kernel_name,const std::string & dump_path,size_t slot,bool is_input)49 std::string GenDataFilePath(const CNodePtr &node, const std::string &kernel_name, const std::string &dump_path,
50 size_t slot, bool is_input) {
51 std::string op_type = common::AnfAlgo::GetCNodeName(node);
52 std::string op_name = kernel_name;
53 uint64_t timestamp = Common::GetTimeStamp();
54 uint32_t task_id = 0;
55 uint32_t stream_id = 0;
56 if (E2eDump::IsDeviceTargetAscend()) {
57 stream_id = AnfAlgo::GetStreamId(node);
58 }
59 std::string tensor_type = is_input ? ".input." : ".output.";
60 std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
61 std::to_string(stream_id) + '.' + std::to_string(timestamp) + tensor_type +
62 std::to_string(slot);
63 return file_path;
64 }
65
ConvertStringToTypeId(const std::string & dtype)66 TypeId ConvertStringToTypeId(const std::string &dtype) {
67 const std::map<std::string, TypeId> kDbgDataTypeToStringMap = {
68 {"bool", TypeId::kNumberTypeBool}, {"int8", TypeId::kNumberTypeInt16},
69 {"int16", TypeId::kNumberTypeInt16}, {"int32", TypeId::kNumberTypeInt32},
70 {"int64", TypeId::kNumberTypeInt64}, {"uint8", TypeId::kNumberTypeUInt8},
71 {"uint16", TypeId::kNumberTypeUInt16}, {"uint32", TypeId::kNumberTypeUInt32},
72 {"uint64", TypeId::kNumberTypeUInt64}, {"float16", TypeId::kNumberTypeFloat16},
73 {"float32", TypeId::kNumberTypeFloat32}, {"float64", TypeId::kNumberTypeFloat64},
74 {"bfloat16", TypeId::kNumberTypeBFloat16}};
75 auto iter_type = kDbgDataTypeToStringMap.find(dtype);
76 if (iter_type == kDbgDataTypeToStringMap.end()) {
77 return TypeId::kTypeUnknown;
78 }
79 return iter_type->second;
80 }
81
IsDeviceTargetGPU()82 bool E2eDump::IsDeviceTargetGPU() {
83 auto context = MsContext::GetInstance();
84 MS_EXCEPTION_IF_NULL(context);
85 return context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kGPUDevice;
86 }
87
IsDeviceTargetAscend()88 bool E2eDump::IsDeviceTargetAscend() {
89 auto context = MsContext::GetInstance();
90 MS_EXCEPTION_IF_NULL(context);
91 return context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice;
92 }
93
IsMindRTKernelByKernel()94 bool E2eDump::IsMindRTKernelByKernel() {
95 auto debugger = Debugger::GetInstance();
96 MS_EXCEPTION_IF_NULL(debugger);
97 return IsDeviceTargetGPU() || debugger->GetAscendKernelByKernelFlag();
98 }
99
100 /*
101 * Feature group: Dump.
102 * Target device group: GPU, Ascend.
103 * Runtime category: Old runtime, MindRT.
104 * Description: This function is for dumping tensor loaded to tensor_loader in memory to disk in GPU and Ascend machine.
105 */
DumpMemFromTensorLoaderToFile(const Debugger * debugger,const std::string & file_path,const std::string & original_kernel_name,size_t slot)106 void E2eDump::DumpMemFromTensorLoaderToFile(const Debugger *debugger, const std::string &file_path,
107 const std::string &original_kernel_name, size_t slot) {
108 #ifdef ENABLE_DEBUGGER
109 MS_EXCEPTION_IF_NULL(debugger);
110 auto ret = debugger->DumpTensorToFile(file_path, original_kernel_name, slot);
111 if (!ret) {
112 MS_LOG(INFO) << "DumpTensorToFile Failed: path:" << file_path;
113 }
114 #endif
115 }
116
DumpOutput(const session::KernelGraph * graph,const std::string & dump_path,const Debugger * debugger)117 void E2eDump::DumpOutput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger) {
118 MS_EXCEPTION_IF_NULL(graph);
119 auto &dump_json_parser = DumpJsonParser::GetInstance();
120 if (!dump_json_parser.OutputNeedDump()) {
121 return;
122 }
123 MS_LOG(INFO) << "Start e2e dump output";
124 bool trans_flag = dump_json_parser.trans_flag();
125 const auto &apply_kernels = graph->execution_order();
126 for (const auto &node : apply_kernels) {
127 MS_EXCEPTION_IF_NULL(node);
128 std::string kernel_name = GetKernelNodeName(node);
129 if (!dump_json_parser.NeedDump(kernel_name)) {
130 continue;
131 }
132 DumpJsonParser::GetInstance().MatchKernel(kernel_name);
133 DumpOutputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
134 }
135 }
136
DumpOutputSingleNode(const CNodePtr & node,const std::string & dump_path,const Debugger * debugger)137 void E2eDump::DumpOutputSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger) {
138 auto &dump_json_parser = DumpJsonParser::GetInstance();
139 if (!dump_json_parser.OutputNeedDump()) {
140 return;
141 }
142 bool trans_flag = dump_json_parser.trans_flag();
143 MS_EXCEPTION_IF_NULL(node);
144 std::string kernel_name = GetKernelNodeName(node);
145 if (!dump_json_parser.NeedDump(kernel_name)) {
146 return;
147 }
148 DumpJsonParser::GetInstance().MatchKernel(kernel_name);
149 DumpOutputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
150 }
151
DumpOutputImpl(const CNodePtr & node,bool trans_flag,const std::string & dump_path,std::string * kernel_name,const Debugger * debugger)152 void E2eDump::DumpOutputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
153 std::string *kernel_name, const Debugger *debugger) {
154 MS_EXCEPTION_IF_NULL(node);
155 GetFileKernelName(NOT_NULL(kernel_name));
156 auto output_size = AnfAlgo::GetOutputTensorNum(node);
157 for (size_t j = 0; j < output_size; ++j) {
158 if (!AnfAlgo::OutputAddrExist(node, j)) {
159 continue;
160 }
161 auto addr = AnfAlgo::GetOutputAddr(node, j);
162 std::string node_name = GetKernelNodeName(node);
163 MS_EXCEPTION_IF_NULL(addr);
164 auto type = common::AnfAlgo::GetOutputInferDataType(node, j);
165 std::string op_type = common::AnfAlgo::GetCNodeName(node);
166 std::string op_name = *kernel_name;
167 uint32_t task_id = 0;
168 uint32_t stream_id = 0;
169 if (IsDeviceTargetAscend()) {
170 stream_id = AnfAlgo::GetStreamId(node);
171 }
172 uint64_t timestamp = Common::GetTimeStamp();
173 std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
174 std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output." +
175 std::to_string(j);
176 if (DumpJsonParser::GetInstance().IsStatisticDump() && IsMindRTKernelByKernel()) {
177 TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, false, j, j);
178 (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
179 }
180 if (DumpJsonParser::GetInstance().IsTensorDump()) {
181 if (IsMindRTKernelByKernel()) {
182 DumpMemFromTensorLoaderToFile(debugger, file_path, node_name, j);
183 } else {
184 ShapeVector int_shapes;
185 GetDumpIntShape(node, j, NOT_NULL(&int_shapes), trans_flag);
186 DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
187 }
188 }
189 }
190 }
191
DumpOutputData(const CNodePtr & node,bool trans_flag,const std::string & dump_path,std::string * kernel_name)192 void E2eDump::DumpOutputData(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
193 std::string *kernel_name) {
194 if (IsMindRTKernelByKernel()) {
195 MS_LOG(INFO) << "DumpOutputData is only for graph mode on Ascend";
196 return;
197 }
198 MS_EXCEPTION_IF_NULL(node);
199 GetFileKernelName(NOT_NULL(kernel_name));
200 auto output_size = AnfAlgo::GetOutputTensorNum(node);
201 for (size_t j = 0; j < output_size; ++j) {
202 if (!AnfAlgo::OutputAddrExist(node, j)) {
203 continue;
204 }
205 auto addr = AnfAlgo::GetOutputAddr(node, j);
206 MS_EXCEPTION_IF_NULL(addr);
207 ShapeVector int_shapes;
208 GetDumpIntShape(node, j, NOT_NULL(&int_shapes), trans_flag);
209 auto type = common::AnfAlgo::GetOutputInferDataType(node, j);
210 std::string file_path = GenDataFilePath(node, *kernel_name, dump_path, j, false);
211 DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
212 }
213 }
214
DumpInput(const session::KernelGraph * graph,const std::string & dump_path,const Debugger * debugger)215 void E2eDump::DumpInput(const session::KernelGraph *graph, const std::string &dump_path, const Debugger *debugger) {
216 MS_EXCEPTION_IF_NULL(graph);
217 auto &dump_json_parser = DumpJsonParser::GetInstance();
218 if (!dump_json_parser.InputNeedDump()) {
219 return;
220 }
221 MS_LOG(INFO) << "Start e2e dump input";
222 bool trans_flag = dump_json_parser.trans_flag();
223 const auto &apply_kernels = graph->execution_order();
224 for (const auto &node : apply_kernels) {
225 MS_EXCEPTION_IF_NULL(node);
226 std::string kernel_name = GetKernelNodeName(node);
227 if (!dump_json_parser.NeedDump(kernel_name)) {
228 continue;
229 }
230 DumpJsonParser::GetInstance().MatchKernel(kernel_name);
231 DumpInputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
232 }
233 }
234
DumpInputSingleNode(const CNodePtr & node,const std::string & dump_path,const Debugger * debugger)235 void E2eDump::DumpInputSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger) {
236 auto &dump_json_parser = DumpJsonParser::GetInstance();
237 if (!dump_json_parser.InputNeedDump()) {
238 return;
239 }
240 bool trans_flag = dump_json_parser.trans_flag();
241 MS_EXCEPTION_IF_NULL(node);
242 std::string kernel_name = GetKernelNodeName(node);
243 if (!dump_json_parser.NeedDump(kernel_name)) {
244 return;
245 }
246 DumpJsonParser::GetInstance().MatchKernel(kernel_name);
247 DumpInputImpl(node, trans_flag, dump_path, &kernel_name, debugger);
248 }
249
DumpArgsSingleNode(const CNodePtr & node,const std::string & dump_path,const Debugger * debugger)250 void E2eDump::DumpArgsSingleNode(const CNodePtr &node, const std::string &dump_path, const Debugger *debugger) {
251 auto op_name = GetKernelNodeName(node);
252 int start_index = static_cast<int>(op_name.rfind('/')) + 1;
253 int end_index = static_cast<int>(op_name.rfind('-'));
254 if (end_index == -1) {
255 end_index = static_cast<int>(op_name.length());
256 }
257 std::string op_t = op_name.substr(start_index, end_index - start_index);
258 auto op_def = mindspore::ops::GetOpDef(op_t);
259 nlohmann::json json;
260 if (!op_def) {
261 auto prim_node = GetCNodePrimitive(node);
262 if (prim_node != nullptr) {
263 auto prim_attrs = prim_node->attrs();
264 for (const auto &entry : prim_attrs) {
265 json[entry.first] = entry.second->ToString();
266 }
267 }
268 } else {
269 int idx = 0;
270 for (const auto &op_arg : op_def->args_) {
271 ++idx;
272 if (op_arg.as_init_arg_) {
273 auto input_kernel = node->input(idx);
274 std::string input_kernel_name = GetKernelNodeName(input_kernel);
275 string input_tensor_name = input_kernel_name + ':' + "0";
276 auto arg_name = op_arg.arg_name_;
277 auto t_data = debugger->GetTensor(input_tensor_name);
278 std::string type = t_data->GetTypeString();
279 std::shared_ptr<tensor::Tensor> converted_tensor = nullptr;
280 converted_tensor = std::make_shared<tensor::Tensor>(
281 ConvertStringToTypeId(type), t_data->GetShape(),
282 static_cast<void *>(const_cast<char *>(t_data->GetDataPtr())), t_data->GetByteSize());
283 json[arg_name] =
284 converted_tensor->data().ToString(converted_tensor->data_type(), converted_tensor->shape(), false);
285 }
286 }
287 }
288
289 std::string scope_name = node->fullname_with_scope();
290 std::replace(scope_name.begin(), scope_name.end(), '.', '_');
291 std::replace(scope_name.begin(), scope_name.end(), '/', '_');
292
293 constexpr int kJsonIndent = 4;
294 std::string file_path = dump_path + op_t + "." + scope_name + ".json";
295 auto realpath = Common::CreatePrefixPath(file_path);
296 if (!realpath.has_value()) {
297 MS_LOG(ERROR) << "Get realpath failed, path=" << file_path;
298 return;
299 }
300 std::ofstream outFile(realpath.value());
301 outFile << json.dump(kJsonIndent);
302 outFile.close();
303 }
304
DumpInputImpl(const CNodePtr & node,bool trans_flag,const std::string & dump_path,std::string * kernel_name,const Debugger * debugger)305 void E2eDump::DumpInputImpl(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
306 std::string *kernel_name, const Debugger *debugger) {
307 MS_EXCEPTION_IF_NULL(node);
308 GetFileKernelName(NOT_NULL(kernel_name));
309 auto input_size = common::AnfAlgo::GetInputTensorNum(node);
310 for (size_t j = 0; j < input_size; ++j) {
311 auto kernel_with_index = common::AnfAlgo::GetPrevNodeOutput(node, j);
312 auto input = kernel_with_index.first;
313 auto index = kernel_with_index.second;
314 if (!AnfAlgo::OutputAddrExist(input, index)) {
315 continue;
316 }
317 std::string node_name = GetKernelNodeName(node);
318 size_t slot = j;
319 if (IsMindRTKernelByKernel()) {
320 auto input_kernel = node->input(j + 1);
321 std::string input_kernel_name = GetKernelNodeName(input_kernel);
322 node_name = input_kernel_name;
323 slot = 0;
324 }
325 auto type = common::AnfAlgo::GetOutputInferDataType(input, index);
326 std::string op_type = common::AnfAlgo::GetCNodeName(node);
327 std::string op_name = *kernel_name;
328 uint64_t timestamp = Common::GetTimeStamp();
329 uint32_t task_id = 0;
330 uint32_t stream_id = 0;
331 if (IsDeviceTargetAscend()) {
332 stream_id = AnfAlgo::GetStreamId(node);
333 }
334 std::string file_path = dump_path + '/' + op_type + '.' + op_name + '.' + std::to_string(task_id) + '.' +
335 std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".input." + std::to_string(j);
336 auto addr = AnfAlgo::GetOutputAddr(input, index);
337 MS_EXCEPTION_IF_NULL(addr);
338 if (DumpJsonParser::GetInstance().IsStatisticDump() && IsMindRTKernelByKernel()) {
339 TensorStatDump stat_dump(op_type, op_name, task_id, stream_id, timestamp, true, j, slot);
340 (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
341 }
342 if (DumpJsonParser::GetInstance().IsTensorDump()) {
343 if (IsMindRTKernelByKernel()) {
344 DumpMemFromTensorLoaderToFile(debugger, file_path, node_name, slot);
345 } else {
346 ShapeVector int_shapes;
347 GetDumpIntShape(input, index, NOT_NULL(&int_shapes), trans_flag);
348 DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
349 }
350 }
351 }
352 }
353
DumpInputData(const CNodePtr & node,bool trans_flag,const std::string & dump_path,std::string * kernel_name)354 void E2eDump::DumpInputData(const CNodePtr &node, bool trans_flag, const std::string &dump_path,
355 std::string *kernel_name) {
356 if (IsMindRTKernelByKernel()) {
357 MS_LOG(INFO) << "DumpInputData is only for graph mode on Ascend";
358 return;
359 }
360 MS_EXCEPTION_IF_NULL(node);
361 GetFileKernelName(NOT_NULL(kernel_name));
362 auto input_size = common::AnfAlgo::GetInputTensorNum(node);
363 for (size_t j = 0; j < input_size; ++j) {
364 auto kernel_with_index = common::AnfAlgo::GetPrevNodeOutput(node, j);
365 auto input = kernel_with_index.first;
366 auto index = kernel_with_index.second;
367 if (!AnfAlgo::OutputAddrExist(input, index)) {
368 continue;
369 }
370 auto addr = AnfAlgo::GetOutputAddr(input, index);
371 MS_EXCEPTION_IF_NULL(addr);
372 ShapeVector int_shapes;
373 GetDumpIntShape(input, index, NOT_NULL(&int_shapes), trans_flag);
374 auto type = common::AnfAlgo::GetOutputInferDataType(input, index);
375 std::string file_path = GenDataFilePath(node, *kernel_name, dump_path, j, true);
376 DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
377 }
378 }
379
DumpSingleAnfNode(const AnfNodePtr & anf_node,const size_t output_index,const std::string & dump_path,bool trans_flag,const Debugger * debugger)380 void E2eDump::DumpSingleAnfNode(const AnfNodePtr &anf_node, const size_t output_index, const std::string &dump_path,
381 bool trans_flag, const Debugger *debugger) {
382 MS_EXCEPTION_IF_NULL(anf_node);
383 auto &dump_json_parser = DumpJsonParser::GetInstance();
384 if ((!anf_node->isa<Parameter>() && !anf_node->isa<ValueNode>()) || IsValueNode<StringImm>(anf_node)) {
385 return;
386 }
387 std::string node_name = GetKernelNodeName(anf_node);
388 if (!dump_json_parser.NeedDump(node_name)) {
389 return;
390 }
391 DumpJsonParser::GetInstance().MatchKernel(node_name);
392 GetFileKernelName(NOT_NULL(&node_name));
393
394 std::string dump_name = node_name;
395 const std::string cst_prefix = "Default_";
396 if (anf_node->isa<ValueNode>()) {
397 if (dump_name.find(cst_prefix) == std::string::npos) {
398 MS_LOG(INFO) << "Incorrect constant format: " << dump_name;
399 return;
400 }
401 dump_name = node_name.substr(cst_prefix.length());
402 trans_flag = false;
403 }
404 // check if output address exists, if not, return;
405 if (!AnfAlgo::OutputAddrExist(anf_node, output_index)) {
406 return;
407 }
408 auto addr = AnfAlgo::GetOutputAddr(anf_node, output_index);
409 MS_EXCEPTION_IF_NULL(addr);
410 ShapeVector int_shapes;
411 GetDumpIntShape(anf_node, output_index, NOT_NULL(&int_shapes), trans_flag);
412 auto type = common::AnfAlgo::GetOutputInferDataType(anf_node, output_index);
413 uint64_t timestamp = Common::GetTimeStamp();
414 uint32_t task_id = 0;
415 uint32_t stream_id = 0;
416 std::string file_path = dump_path + "/Parameter." + dump_name + '.' + std::to_string(task_id) + '.' +
417 std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output.0";
418 if (IsDeviceTargetGPU()) {
419 if (dump_json_parser.IsStatisticDump()) {
420 TensorStatDump stat_dump("Parameter", dump_name, task_id, stream_id, timestamp, false, 0, 0);
421 (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
422 }
423 if (dump_json_parser.IsTensorDump()) {
424 DumpMemFromTensorLoaderToFile(debugger, file_path, node_name, 0);
425 }
426 } else {
427 // On Ascend, saving statistic data is only supported npy format.
428 if (dump_json_parser.IsStatisticDump() && dump_json_parser.IsNpyFormat()) {
429 // On Ascend kernel by kernel mode, load tensor data into debugger first.
430 auto format = kOpFormat_DEFAULT;
431 std::string tensor_name = node_name + ":0";
432 uint32_t root_graph_id = debugger->GetCurrentRootGraphId();
433 bool ret = addr->LoadMemToHost(tensor_name, 0, format, int_shapes, type, 0, true, root_graph_id, false, true);
434 if (!ret) {
435 MS_LOG(ERROR) << "LoadMemToHost failed, tensor_name: " << tensor_name;
436 } else {
437 TensorStatDump stat_dump("Parameter", dump_name, task_id, stream_id, timestamp, false, 0, 0);
438 (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
439 }
440 }
441 if (dump_json_parser.IsTensorDump()) {
442 DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
443 }
444 }
445 }
446
447 /*
448 * Feature group: Dump.
449 * Target device group: Ascend, GPU.
450 * Runtime category: MindRT.
451 * Description: This function is similar to DumpSingleAnfNode function but it is only for dumping parameters in mindRT.
452 * This function uses GetParameterInfo to get dump info for the parameter node.
453 */
DumpSingleParameterNode(const AnfNodePtr & anf_node,const std::string & dump_path,bool trans_flag,const Debugger * debugger)454 void E2eDump::DumpSingleParameterNode(const AnfNodePtr &anf_node, const std::string &dump_path, bool trans_flag,
455 const Debugger *debugger) {
456 MS_EXCEPTION_IF_NULL(anf_node);
457 auto &dump_json_parser = DumpJsonParser::GetInstance();
458 std::string node_name = GetKernelNodeName(anf_node);
459 if (!anf_node->isa<Parameter>() || !dump_json_parser.NeedDump(node_name) || !dump_json_parser.OutputNeedDump()) {
460 return;
461 }
462 DumpJsonParser::GetInstance().MatchKernel(node_name);
463 GetFileKernelName(NOT_NULL(&node_name));
464 ShapeVector int_shapes;
465 TypeId type;
466 TypeId device_type;
467 auto addr = GetParameterInfo(anf_node, NOT_NULL(&int_shapes), NOT_NULL(&type), NOT_NULL(&device_type));
468 if (addr == nullptr || addr->GetPtr() == nullptr) {
469 MS_LOG(DEBUG) << "Skip node: " << node_name << ". Parameter data is not available for mindRT.";
470 return;
471 }
472 uint64_t timestamp = Common::GetTimeStamp();
473 uint32_t task_id = 0;
474 uint32_t stream_id = 0;
475 std::string file_path = dump_path + "/Parameter." + node_name + '.' + std::to_string(task_id) + '.' +
476 std::to_string(stream_id) + '.' + std::to_string(timestamp) + ".output.0";
477 if (IsDeviceTargetGPU()) {
478 if (dump_json_parser.IsStatisticDump()) {
479 TensorStatDump stat_dump("Parameter", node_name, task_id, stream_id, timestamp, false, 0, 0);
480 (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
481 }
482 if (dump_json_parser.IsTensorDump()) {
483 DumpMemFromTensorLoaderToFile(debugger, file_path, node_name, 0);
484 }
485 } else {
486 // On Ascend, saving statistic data is only supported npy format.
487 if (dump_json_parser.IsStatisticDump() && dump_json_parser.IsNpyFormat()) {
488 // On Ascend kernel by kernel mode, load tensor data into debugger first.
489 auto format = kOpFormat_DEFAULT;
490 std::string tensor_name = node_name + ":0";
491 uint32_t root_graph_id = debugger->GetCurrentRootGraphId();
492 bool ret = addr->LoadMemToHost(tensor_name, 0, format, int_shapes, type, 0, true, root_graph_id, false, true);
493 if (!ret) {
494 MS_LOG(ERROR) << "LoadMemToHost failed, tensor_name: " << tensor_name;
495 }
496 TensorStatDump stat_dump("Parameter", node_name, task_id, stream_id, timestamp, false, 0, 0);
497 (void)stat_dump.DumpTensorStatsToFile(node_name, dump_path, debugger);
498 }
499 if (dump_json_parser.IsTensorDump()) {
500 DumpMemToFile(file_path, *addr, int_shapes, type, trans_flag);
501 }
502 }
503 }
504
DumpParameters(const session::KernelGraph * graph,const std::string & dump_path,const Debugger * debugger)505 void E2eDump::DumpParameters(const session::KernelGraph *graph, const std::string &dump_path,
506 const Debugger *debugger) {
507 MS_EXCEPTION_IF_NULL(graph);
508 auto &dump_json_parser = DumpJsonParser::GetInstance();
509 if (!dump_json_parser.OutputNeedDump()) {
510 return;
511 }
512 MS_LOG(INFO) << "Start e2e dump parameters";
513 bool trans_flag = dump_json_parser.trans_flag();
514
515 // dump parameters
516 const auto ¶meters = graph->inputs();
517 for (auto &item : parameters) {
518 DumpSingleAnfNode(item, kParameterOutputIndex, dump_path, trans_flag, debugger);
519 }
520 }
521
DumpConstantData(const session::KernelGraph * graph,uint32_t rank_id,const Debugger * debugger)522 void E2eDump::DumpConstantData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
523 MS_EXCEPTION_IF_NULL(graph);
524 auto &dump_json_parser = DumpJsonParser::GetInstance();
525 if (!IsDeviceTargetGPU() || !dump_json_parser.e2e_dump_enabled()) {
526 return;
527 }
528 uint32_t graph_id = graph->graph_id();
529 std::string cst_path = GenerateDumpPath(graph_id, rank_id, true);
530 if (!Common::FileExists(cst_path)) {
531 DumpConstantData(graph, cst_path, debugger);
532 }
533 }
534
DumpConstantData(const session::KernelGraph * graph,const std::string & cst_dump_path,const Debugger * debugger)535 void E2eDump::DumpConstantData(const session::KernelGraph *graph, const std::string &cst_dump_path,
536 const Debugger *debugger) {
537 // Dump constant to npy file
538 MS_EXCEPTION_IF_NULL(graph);
539 auto &dump_json_parser = DumpJsonParser::GetInstance();
540 MS_LOG(INFO) << "DumpConstants. Current iteration is " << dump_json_parser.cur_dump_iter();
541 MS_LOG(INFO) << "Current graph id is " << graph->graph_id();
542 if (!dump_json_parser.OutputNeedDump()) {
543 return;
544 }
545 const auto value_nodes = graph->graph_value_nodes();
546 for (auto &item : value_nodes) {
547 DumpSingleAnfNode(item, kValueNodeOutputIndex, cst_dump_path, false, debugger);
548 }
549 }
550
551 /*
552 * Feature group: Dump.
553 * Target device group: Ascend, GPU.
554 * Runtime category: Old runtime.
555 * Description: This function is for updating dump iteration for GPU and ascend old runtime.
556 */
UpdateIterOldRTDump(const session::KernelGraph * graph)557 void E2eDump::UpdateIterOldRTDump(const session::KernelGraph *graph) {
558 MS_EXCEPTION_IF_NULL(graph);
559 auto &dump_json_parser = DumpJsonParser::GetInstance();
560 uint32_t graph_id = graph->graph_id();
561 if (IsDeviceTargetGPU()) {
562 if (starting_graph_id == INT32_MAX) {
563 starting_graph_id = graph_id;
564 } else if (starting_graph_id == graph_id && !MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
565 // Update dump iter for mindrt runtime is done using UpdateIterGPUDump().
566 // Update dump iter for GPU old runtime.
567 dump_json_parser.UpdateDumpIter();
568 }
569 return;
570 }
571 // If device target is Ascend
572 if (graph->IsDatasetGraph()) {
573 MS_LOG(INFO) << "No need to update iteration for dataset graph.";
574 return;
575 }
576
577 // In multi network scripts, dump iter is equal to the number of networks that have been executed so far.
578 dump_json_parser.UpdateDumpIter();
579 }
580
581 /*
582 * Feature group: Dump.
583 * Target device group: Ascend, GPU.
584 * Runtime category: MindRT.
585 * Description: This function is for updating dump iteration for GPU and ascend MindRT dump. Please note that dump with
586 * dataset_sink_mode = True is not supported for GPU.
587 */
UpdateIterMindRTDump()588 void E2eDump::UpdateIterMindRTDump() {
589 auto debugger = Debugger::GetInstance();
590 MS_EXCEPTION_IF_NULL(debugger);
591 // Dataset graph is always the first graph in the list when dataset_sink_mode is true.
592 auto graph_list = debugger->GetStepGraphPtrList();
593 if (graph_list.empty()) {
594 MS_LOG(INFO) << "The graph list is empty.";
595 return;
596 }
597 auto graph = graph_list[0];
598 auto context = MsContext::GetInstance();
599 MS_EXCEPTION_IF_NULL(context);
600 if (context->get_param<std::string>(MS_CTX_DEVICE_TARGET) == kAscendDevice && graph->IsDatasetGraph()) {
601 MS_LOG(INFO) << "No need to update iteration for dataset graph.";
602 return;
603 }
604 // update dump iter for GPU and kernel by kernel ascend dump.
605 DumpJsonParser::GetInstance().UpdateDumpIter();
606 }
607
608 /*
609 * Feature group: Dump.
610 * Target device group: Ascend, GPU.
611 * Runtime category: Old runtime, MindRT.
612 * Description: Generates graph history files (dumping all the iteration numbers in which the graph was executed) for
613 * the given graph and rank_id. If dataset_sink_mode is true for async dump in ascend, this function is called once per
614 * each epoch and dumps all the iterations in the epoch to the graph history file.
615 */
DumpRunIter(const KernelGraphPtr & graph,uint32_t rank_id)616 void E2eDump::DumpRunIter(const KernelGraphPtr &graph, uint32_t rank_id) {
617 auto &json_parser = DumpJsonParser::GetInstance();
618 if (!(json_parser.async_dump_enabled() || json_parser.e2e_dump_enabled())) {
619 return;
620 }
621 auto context = MsContext::GetInstance();
622 MS_EXCEPTION_IF_NULL(context);
623 std::string backend = context->backend_policy();
624 if (backend == "ge") {
625 MS_LOG(INFO) << "On 910B or 910C platform, execution_order is not support to dump.";
626 return;
627 }
628 bool sink_mode =
629 (ConfigManager::GetInstance().dataset_mode() == DatasetMode::DS_SINK_MODE || graph->IsDatasetGraph());
630 auto iter_num = SizeToInt(LongToSize(ConfigManager::GetInstance().iter_num()));
631 if (graph->IsDatasetGraph()) {
632 MS_LOG(INFO) << "graph: " << graph->graph_id() << " is dataset graph, not creating graph history file.";
633 return;
634 }
635 auto debugger = Debugger::GetInstance();
636 MS_EXCEPTION_IF_NULL(debugger);
637 if (!debugger->GetAscendKernelByKernelFlag() && !IsDeviceTargetGPU() &&
638 (graph->graph_id() != graph->root_graph_id())) {
639 // when device target is ascend, we only dump graph run iter for the root graph.
640 return;
641 }
642 std::string execution_order_path = json_parser.path() + "/rank_" + std::to_string(rank_id) + "/execution_order/";
643 std::string graph_str =
644 IsDeviceTargetGPU() ? std::to_string(graph->graph_id()) : std::to_string(graph->root_graph_id());
645 std::string file_name_to_check = execution_order_path + "/ms_global_execution_order_graph_" + graph_str + ".csv";
646 auto real_path = Common::CreatePrefixPath(file_name_to_check);
647 if (!real_path.has_value()) {
648 MS_LOG(WARNING) << "Check file path: " << file_name_to_check << " failed.";
649 return;
650 }
651 std::string file_name = real_path.value();
652 ChangeFileMode(file_name, S_IWUSR);
653 std::ofstream fout(file_name, std::ofstream::app);
654 if (!fout.is_open()) {
655 MS_LOG(WARNING) << "Open file for saving graph global execution order failed.";
656 return;
657 }
658 if (sink_mode && json_parser.async_dump_enabled() && !debugger->GetAscendKernelByKernelFlag()) {
659 // for async dump when sink_mode = true, cur_dump_iter() = current_epoch
660 // dump history for all iterations in the epoch
661 debugger->UpdateGraphIterMap(graph->graph_id(), iter_num);
662 auto graph_iter_map = debugger->GetGraphIterMap();
663 auto step_per_epoch = IntToSize(graph_iter_map[graph->graph_id()]);
664 for (size_t i = 0; i < step_per_epoch; i++) {
665 auto step = (json_parser.cur_dump_iter() * step_per_epoch) + i;
666 fout << (std::to_string(step) + "\n");
667 }
668 } else {
669 fout << std::to_string(json_parser.cur_dump_iter()) + "\n";
670 }
671 fout.close();
672 ChangeFileMode(file_name, S_IRUSR);
673 }
674
675 /*
676 * Feature group: Dump.
677 * Target device group: Ascend, GPU.
678 * Runtime category: Old runtime, MindRT.
679 * Description: This function is for dumping the whole graph. It is used for old runtime in GPU and Ascend and
680 * super-kernel mindRT in Ascend.
681 */
DumpData(const session::KernelGraph * graph,uint32_t rank_id,const Debugger * debugger)682 void E2eDump::DumpData(const session::KernelGraph *graph, uint32_t rank_id, const Debugger *debugger) {
683 MS_EXCEPTION_IF_NULL(graph);
684 bool success = false;
685 auto &dump_json_parser = DumpJsonParser::GetInstance();
686 uint32_t graph_id = graph->graph_id();
687 if (!dump_json_parser.e2e_dump_enabled()) {
688 return;
689 }
690
691 if (dump_json_parser.GetIterDumpFlag()) {
692 MS_LOG(INFO) << "Start e2e dump. Current iteration is " << dump_json_parser.cur_dump_iter();
693 MS_LOG(INFO) << "Current graph id is " << graph_id;
694 std::string dump_path = GenerateDumpPath(graph_id, rank_id);
695 if (dump_json_parser.IsStatisticDump()) {
696 (void)TensorStatDump::OpenStatisticsFile(dump_path);
697 }
698 DumpInput(graph, dump_path, debugger);
699 DumpOutput(graph, dump_path, debugger);
700 if (!MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_MINDRT)) {
701 // Dump parameters for old runtime. For mindRT it is done in PostExecuteGraphDebugger.
702 DumpParameters(graph, dump_path, debugger);
703 // DumpConstantData for GPU old runtime.
704 DumpConstantData(graph, rank_id, debugger);
705 }
706 if (dump_json_parser.IsStatisticDump()) {
707 CsvWriter::GetInstance().CloseFile();
708 }
709 success = true;
710 }
711
712 if (success) {
713 MS_LOG(DEBUG) << "E2eDump Dump Data completed!";
714 } else {
715 MS_LOG(DEBUG) << "E2eDump Dump has not occurred!";
716 }
717 }
718
719 /*
720 * Feature group: Dump.
721 * Target device group: Ascend, GPU.
722 * Runtime category: MindRT.
723 * Description: This function is for dumping a single node. It is used for mindrt in GPU and Ascend kernel-by-kernel.
724 */
DumpSingleNodeData(const CNodePtr & node,uint32_t graph_id,uint32_t rank_id,const Debugger * debugger)725 bool E2eDump::DumpSingleNodeData(const CNodePtr &node, uint32_t graph_id, uint32_t rank_id, const Debugger *debugger) {
726 bool success = false;
727 auto &dump_json_parser = DumpJsonParser::GetInstance();
728 if (dump_json_parser.DumpEnabledForIter()) {
729 std::string dump_path = GenerateDumpPath(graph_id, rank_id);
730 DumpInputSingleNode(node, dump_path, debugger);
731 DumpOutputSingleNode(node, dump_path, debugger);
732 if (dump_json_parser.save_args_flag()) {
733 DumpArgsSingleNode(node, dump_path, debugger);
734 }
735 success = true;
736 }
737 return success;
738 }
739
740 /*
741 * Feature group: Dump.
742 * Target device group: Ascend, GPU.
743 * Runtime category: MindRT.
744 * Description: This function is for dumping all the parameters in the current root graph for GPU, Ascend superkernel
745 * (e2e dump) and Ascend kernel-by-kernel (e2e and async dump).
746 */
DumpParametersData(uint32_t rank_id,const Debugger * debugger)747 void E2eDump::DumpParametersData(uint32_t rank_id, const Debugger *debugger) {
748 uint32_t root_graph_id = debugger->GetCurrentRootGraphId();
749 auto &dump_json_parser = DumpJsonParser::GetInstance();
750 if ((dump_json_parser.async_dump_enabled() && !debugger->GetAscendKernelByKernelFlag()) ||
751 (dump_json_parser.async_dump_enabled() && dump_json_parser.op_debug_mode() > 0)) {
752 // Dump parameters for mindRT in async dump only for kernel by kernel mode.
753 return;
754 }
755 if (dump_json_parser.DumpEnabledForIter()) {
756 MS_LOG(INFO) << "DumpParameters. Current iteration is " << dump_json_parser.cur_dump_iter();
757 MS_LOG(INFO) << "Current root graph id is " << root_graph_id;
758 std::string dump_path = GenerateDumpPath(root_graph_id, rank_id);
759 bool trans_flag = dump_json_parser.trans_flag();
760 for (auto &item : debugger->GetParametersMindRT()) {
761 DumpSingleParameterNode(item, dump_path, trans_flag, debugger);
762 }
763 }
764 }
765 } // namespace mindspore
766