1 /**
2 * Copyright 2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "runtime/framework/actor/debug_actor.h"
18 #include <vector>
19 #include <memory>
20 #include <string>
21 #include "runtime/framework/actor/debug_aware_actor.h"
22 #include "mindrt/include/async/async.h"
23 #include "utils/log_adapter.h"
24 #ifndef ENABLE_SECURITY
25 #include "debug/data_dump/cpu_e2e_dump.h"
26 #endif
27 #ifdef ENABLE_DEBUGGER
28 #include "debug/debugger/debugger.h"
29 #include "debug/debugger/debugger_utils.h"
30 #endif
31
32 namespace mindspore {
33 namespace runtime {
Debug(const AnfNodePtr & node,const KernelLaunchInfo * launch_info_,const DeviceContext * device_context,OpContext<DeviceTensor> * const op_context,const AID * from_aid)34 void DebugActor::Debug(const AnfNodePtr &node, const KernelLaunchInfo *launch_info_,
35 const DeviceContext *device_context, OpContext<DeviceTensor> *const op_context,
36 const AID *from_aid) {
37 MS_EXCEPTION_IF_NULL(node);
38 MS_EXCEPTION_IF_NULL(device_context);
39 MS_EXCEPTION_IF_NULL(op_context);
40 MS_EXCEPTION_IF_NULL(from_aid);
41
42 if (!node->isa<CNode>()) {
43 // Call back to the from actor to process after debug finished.
44 Async(*from_aid, &DebugAwareActor::OnDebugFinish, op_context);
45 return;
46 }
47
48 const auto &cnode = node->cast<CNodePtr>();
49 if (device_context->GetDeviceAddressType() == device::DeviceAddressType::kCPU) {
50 #ifndef ENABLE_SECURITY
51 if (DumpJsonParser::GetInstance().GetIterDumpFlag()) {
52 auto kernel_graph = std::dynamic_pointer_cast<session::KernelGraph>(cnode->func_graph());
53 MS_EXCEPTION_IF_NULL(kernel_graph);
54 CPUE2eDump::DumpCNodeData(cnode, kernel_graph->graph_id());
55 }
56 #endif
57 } else if (device_context->GetDeviceAddressType() == device::DeviceAddressType::kGPU) {
58 #ifdef ENABLE_DEBUGGER
59 auto debugger = Debugger::GetInstance();
60 if (debugger != nullptr) {
61 std::string kernel_name = cnode->fullname_with_scope();
62 debugger->SetCurNode(kernel_name);
63 bool read_data = CheckReadData(cnode);
64 if (read_data) {
65 ReadDataAndDump(cnode, launch_info_, exec_order_);
66 }
67 }
68 exec_order_ += 1;
69 #endif
70 }
71
72 // Call back to the from actor to process after debug finished.
73 Async(*from_aid, &DebugAwareActor::OnDebugFinish, op_context);
74 }
75
DebugOnStepBegin(std::vector<KernelGraphPtr> graphs,std::vector<DeviceContext * > device_contexts,OpContext<DeviceTensor> * const op_context,const AID * from_aid)76 void DebugActor::DebugOnStepBegin(std::vector<KernelGraphPtr> graphs, std::vector<DeviceContext *> device_contexts,
77 OpContext<DeviceTensor> *const op_context, const AID *from_aid) {
78 MS_EXCEPTION_IF_NULL(op_context);
79 MS_EXCEPTION_IF_NULL(from_aid);
80 #ifdef ENABLE_DEBUGGER
81 auto debugger = Debugger::GetInstance();
82 if (debugger != nullptr && debugger->DebuggerBackendEnabled()) {
83 debugger->PreExecuteGraphDebugger(graphs);
84 }
85 #endif
86
87 #ifndef ENABLE_SECURITY
88 if (DumpJsonParser::GetInstance().e2e_dump_enabled()) {
89 DumpJsonParser::GetInstance().ClearGraph();
90 for (size_t i = 0; i < graphs.size(); ++i) {
91 MS_EXCEPTION_IF_NULL(device_contexts[i]);
92 if (device_contexts[i]->GetDeviceAddressType() == device::DeviceAddressType::kCPU) {
93 DumpJsonParser::GetInstance().SaveGraph(graphs[i].get());
94 }
95 }
96 }
97 #endif
98 // Call back to the from actor to process after debug finished.
99 Async(*from_aid, &DebugAwareActor::OnDebugFinish, op_context);
100 }
101
DebugOnStepEnd(OpContext<DeviceTensor> * const op_context,const AID * from_aid)102 void DebugActor::DebugOnStepEnd(OpContext<DeviceTensor> *const op_context, const AID *from_aid) {
103 MS_EXCEPTION_IF_NULL(op_context);
104 MS_EXCEPTION_IF_NULL(from_aid);
105
106 #ifndef ENABLE_SECURITY
107 if (DumpJsonParser::GetInstance().GetIterDumpFlag()) {
108 CPUE2eDump::DumpParametersAndConst();
109 }
110 #endif
111
112 #ifdef ENABLE_DEBUGGER
113 auto debugger = Debugger::GetInstance();
114 if (debugger != nullptr) {
115 debugger->Debugger::UpdateStepNumGPU();
116 // Reset exec_order for the next step
117 exec_order_ = 0;
118 debugger->Debugger::PostExecuteGraphDebugger();
119 }
120 #else
121 #ifndef ENABLE_SECURITY
122 DumpJsonParser::GetInstance().UpdateDumpIter();
123 #endif
124 #endif
125
126 // Call back to the from actor to process after debug finished.
127 Async(*from_aid, &DebugAwareActor::OnDebugFinish, op_context);
128 }
129 } // namespace runtime
130 } // namespace mindspore
131