• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2024 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "runtime/graph_scheduler/actor/debug_actor.h"
18 #include <vector>
19 #include <memory>
20 #include <string>
21 #include "runtime/graph_scheduler/actor/debug_aware_actor.h"
22 #include "mindrt/include/async/async.h"
23 #include "utils/log_adapter.h"
24 #ifndef ENABLE_SECURITY
25 #include "debug/data_dump/cpu_e2e_dump.h"
26 #include "include/backend/debug/data_dump/e2e_dump.h"
27 #include "utils/ms_context.h"
28 #endif
29 #ifdef ENABLE_DEBUGGER
30 #include "include/backend/debug/debugger/debugger.h"
31 #include "debug/debugger/debugger_utils.h"
32 #endif
33 #include "debug/data_dump/data_dumper.h"
34 #include "include/common/debug/common.h"
35 #include "utils/file_utils.h"
36 #include "include/backend/debug/profiler/profiling.h"
37 #include "ops/nn_op_name.h"
38 
39 namespace mindspore {
40 namespace runtime {
ACLDump(uint32_t device_id,const std::vector<KernelGraphPtr> & graphs,bool is_kbyk)41 void DebugActor::ACLDump(uint32_t device_id, const std::vector<KernelGraphPtr> &graphs, bool is_kbyk) {
42   std::string env_enable_str = common::GetEnv("MS_ACL_DUMP_CFG_PATH");
43   std::string dump_enable_str = common::GetEnv("MINDSPORE_DUMP_CONFIG");
44 
45   std::vector<std::string> all_kernel_names;
46   for (const auto &graph : graphs) {
47     auto all_kernels = graph->execution_order();
48     std::for_each(all_kernels.begin(), all_kernels.end(),
49                   [&](const auto &k) { all_kernel_names.push_back(k->fullname_with_scope()); });
50   }
51 
52   auto step_count_num = 0;
53   step_count_num = step_count;
54   if (step_count == 1 && is_dataset_sink == 1) {
55     step_count_num = 0;
56   }
57   if (!graphs.empty()) {
58     auto graph = graphs[0];
59     is_dataset_sink = graph->IsDatasetGraph();
60   }
61   if (DumpJsonParser::GetInstance().async_dump_enabled() &&
62       ((DumpJsonParser::GetInstance().IsDumpIter(step_count_num) && is_kbyk) ||
63        (env_enable_str == dump_enable_str && !is_kbyk))) {
64     bool is_init = false;
65     if ((env_enable_str == dump_enable_str) && !(DumpJsonParser::GetInstance().IsDumpIter(step_count_num))) {
66       is_init = true;
67     } else {
68       std::string dump_path = DumpJsonParser::GetInstance().path();
69       std::string dump_path_step = dump_path + "/" + std::to_string(step_count_num);
70       auto real_path = FileUtils::CreateNotExistDirs(dump_path_step, false);
71       if (!real_path.has_value()) {
72         MS_LOG(WARNING) << "Fail to create acl dump dir " << real_path.value();
73         return;
74       }
75     }
76     dump_flag = true;
77     auto registered_dumper = datadump::DataDumperRegister::Instance().GetDumperForBackend(device::DeviceType::kAscend);
78     if (registered_dumper != nullptr) {
79       registered_dumper->Initialize();
80       registered_dumper->EnableDump(device_id, step_count_num, is_init, all_kernel_names);
81     }
82   }
83 }
84 
DebugPreLaunch(const AnfNodePtr & node,const std::vector<DeviceTensor * > & input_device_tensors,const std::vector<DeviceTensor * > & output_device_tensors,const DeviceContext * device_context,OpContext<DeviceTensor> * const op_context,const AID *)85 void DebugActor::DebugPreLaunch(const AnfNodePtr &node, const std::vector<DeviceTensor *> &input_device_tensors,
86                                 const std::vector<DeviceTensor *> &output_device_tensors,
87                                 const DeviceContext *device_context, OpContext<DeviceTensor> *const op_context,
88                                 const AID *) {
89   MS_EXCEPTION_IF_NULL(node);
90   MS_EXCEPTION_IF_NULL(device_context);
91   MS_EXCEPTION_IF_NULL(op_context);
92 }
93 
94 /*
95  * Feature group: Dump, Online debugger.
96  * Target device group: GPU.
97  * Runtime category: MindRT.
98  * Description: Load and read data for the given node if needed. Dump the node if dump is enabled and free the loaded
99  * memory after the dump (for GPU and ascend kernel-by-kernel).
100  */
DebugPostLaunch(const AnfNodePtr & node,const std::vector<DeviceTensor * > & input_device_tensors,const std::vector<DeviceTensor * > & output_device_tensors,const DeviceContext * device_context,OpContext<DeviceTensor> * const op_context,const AID *)101 void DebugActor::DebugPostLaunch(const AnfNodePtr &node, const std::vector<DeviceTensor *> &input_device_tensors,
102                                  const std::vector<DeviceTensor *> &output_device_tensors,
103                                  const DeviceContext *device_context, OpContext<DeviceTensor> *const op_context,
104                                  const AID *) {
105   MS_EXCEPTION_IF_NULL(node);
106   MS_EXCEPTION_IF_NULL(device_context);
107   MS_EXCEPTION_IF_NULL(op_context);
108   std::lock_guard<std::mutex> locker(debug_mutex_);
109 
110   if (!node->isa<CNode>()) {
111     return;
112   }
113   const auto &cnode = node->cast<CNodePtr>();
114   MS_EXCEPTION_IF_NULL(cnode);
115   MS_LOG(DEBUG) << "kernel by kernel debug for node: " << cnode->fullname_with_scope() << ".";
116   if (device_context->GetDeviceType() == device::DeviceType::kAscend) {
117 #ifdef ENABLE_DEBUGGER
118     AscendKbkDump(cnode, input_device_tensors, output_device_tensors, device_context);
119 #endif
120   } else if (device_context->GetDeviceType() == device::DeviceType::kCPU) {
121 #ifndef ENABLE_SECURITY
122     if (DumpJsonParser::GetInstance().op_debug_mode() == DumpJsonParser::DUMP_LITE_EXCEPTION) {
123       MS_LOG(WARNING) << "Abnormal dump is not supported on CPU backend.";
124       return;
125     }
126     if (DumpJsonParser::GetInstance().GetIterDumpFlag()) {
127       auto kernel_graph = std::dynamic_pointer_cast<session::KernelGraph>(cnode->func_graph());
128       MS_EXCEPTION_IF_NULL(kernel_graph);
129       CPUE2eDump::DumpCNodeData(cnode, kernel_graph->graph_id());
130       CPUE2eDump::DumpRunIter(kernel_graph);
131     }
132 #endif
133   } else if (device_context->GetDeviceType() == device::DeviceType::kGPU) {
134 #ifdef ENABLE_DEBUGGER
135     if (DumpJsonParser::GetInstance().op_debug_mode() == DumpJsonParser::DUMP_LITE_EXCEPTION) {
136       MS_LOG(WARNING) << "Abnormal dump is not supported on GPU backend.";
137       return;
138     }
139     auto debugger = Debugger::GetInstance();
140     if (debugger != nullptr) {
141       auto kernel_graph = std::dynamic_pointer_cast<session::KernelGraph>(cnode->func_graph());
142       debugger->InsertExecutedGraph(kernel_graph);
143       std::string kernel_name = cnode->fullname_with_scope();
144       debugger->SetCurNode(kernel_name);
145       bool read_data = CheckReadData(cnode);
146       if (read_data) {
147         ReadDataAndDump(cnode, input_device_tensors, output_device_tensors, exec_order_, device_context);
148       }
149     }
150     exec_order_ += 1;
151 #endif
152   }
153 }
154 
155 /*
156  * Feature group: Dump, Ascend.
157  * Target device group: Ascend.
158  * Runtime category: MindRT.
159  * Description: Dump data for the given node if needed. It can be normal dump and overflow dump and exception dump
160  * (ascend kernel-by-kernel e2e dump).
161  */
162 #ifdef ENABLE_DEBUGGER
AscendKbkDump(const CNodePtr & cnode,const std::vector<DeviceTensor * > & input_device_tensors,const std::vector<DeviceTensor * > & output_device_tensors,const DeviceContext * device_context)163 void DebugActor::AscendKbkDump(const CNodePtr &cnode, const std::vector<DeviceTensor *> &input_device_tensors,
164                                const std::vector<DeviceTensor *> &output_device_tensors,
165                                const DeviceContext *device_context) {
166   auto debugger = Debugger::GetInstance();
167   if (debugger != nullptr) {
168     auto kernel_graph = std::dynamic_pointer_cast<session::KernelGraph>(cnode->func_graph());
169     MS_EXCEPTION_IF_NULL(kernel_graph);
170     debugger->InsertExecutedGraph(kernel_graph);
171     debugger->SetAscendKernelByKernelFlag(true);
172     auto &dump_json_parser = DumpJsonParser::GetInstance();
173     bool e2e_dump_enabled = dump_json_parser.e2e_dump_enabled();
174     uint32_t op_debug_mode = dump_json_parser.op_debug_mode();
175     bool abnormal_dump = false;
176     bool sync_ok = true;
177     bool read_data = false;
178     if (!e2e_dump_enabled) {
179       exec_order_ += 1;
180       return;
181     }
182     if (op_debug_mode == DumpJsonParser::DUMP_LITE_EXCEPTION) {
183       abnormal_dump = true;
184       sync_ok = device_ctx_->device_res_manager_->SyncAllStreams();
185       if (!sync_ok) {
186         MS_LOG(ERROR) << "Sync stream error! The node input will be dumped";
187       }
188     } else if (op_debug_mode == DumpJsonParser::DUMP_BOTH_OVERFLOW && dump_json_parser.DumpEnabledForIter()) {
189       auto is_overflow = CheckOverflow(device_context, output_device_tensors);
190       if (is_overflow) {
191         read_data = CheckReadData(cnode);
192       }
193     } else {
194       read_data = CheckReadData(cnode);
195     }
196     if ((read_data && e2e_dump_enabled) || !sync_ok) {
197       ReadDataAndDump(cnode, input_device_tensors, output_device_tensors, exec_order_, device_context, abnormal_dump);
198       if (!sync_ok) {
199         MS_LOG(EXCEPTION) << "Sync stream error!";
200       }
201     }
202   }
203   exec_order_ += 1;
204 }
205 #endif
206 /*
207  * Feature group: Dump, Online debugger.
208  * Target device group: Ascend, GPU.
209  * Runtime category: MindRT.
210  * Description: Checks dataset_sink_mode and generates the related error if any exist and calls
211  * PreExecuteGraphDebugger.
212  */
DebugOnStepBegin(const std::vector<KernelGraphPtr> & graphs,const std::vector<AnfNodePtr> & origin_parameters_order,std::vector<DeviceContext * > device_contexts,OpContext<DeviceTensor> * const op_context,const AID *)213 void DebugActor::DebugOnStepBegin(const std::vector<KernelGraphPtr> &graphs,
214                                   const std::vector<AnfNodePtr> &origin_parameters_order,
215                                   std::vector<DeviceContext *> device_contexts,
216                                   OpContext<DeviceTensor> *const op_context, const AID *) {
217   MS_LOG(INFO) << "Debug on step begin.";
218   auto context = MsContext::GetInstance();
219   auto is_kbyk = context->IsKByKExecutorMode();
220   MS_EXCEPTION_IF_NULL(context);
221   std::string backend = context->backend_policy();
222   device_ctx_ = device_contexts[0];
223   auto profiler = profiler::Profiler::GetInstance(kAscendDevice);
224   if ((profiler == nullptr || !profiler->IsInitialized()) &&
225       device_ctx_->GetDeviceType() == device::DeviceType::kAscend) {
226     auto device_id = context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
227     if (common::GetEnv("MS_ACL_DUMP_CFG_PATH") == common::GetEnv("MINDSPORE_DUMP_CONFIG")) {
228       ACLDump(device_id, graphs, is_kbyk);
229     }
230   }
231 #ifndef ENABLE_SECURITY
232   if (DumpJsonParser::GetInstance().e2e_dump_enabled() && !graphs.empty()) {
233     // First graph is the dataset graph when dataset_sink_mode = True
234     auto graph = graphs[0];
235     bool is_dataset_sink = graph->IsDatasetGraph();
236     uint32_t cur_step = DumpJsonParser::GetInstance().cur_dump_iter();
237     if (cur_step == 1 && DumpJsonParser::GetInstance().GetDatasetSink()) {
238       uint32_t init_step = 0;
239       DumpJsonParser::GetInstance().UpdateDumpIter(init_step);
240       MS_LOG(INFO) << "In dataset sink mode, reset step to init_step: " << init_step;
241     }
242     DumpJsonParser::GetInstance().SetDatasetSink(is_dataset_sink);
243   }
244 #endif
245   if (backend == "ge") {
246     return;
247   }
248   MS_EXCEPTION_IF_NULL(op_context);
249   std::lock_guard<std::mutex> locker(debug_mutex_);
250 #ifdef ENABLE_DEBUGGER
251   if (!graphs.empty()) {
252     // First graph is the dataset graph when dataset_sink_mode = True
253     auto graph = graphs[0];
254     std::string error_info = CheckDatasetSinkMode(graph);
255     if (!error_info.empty()) {
256       SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*op_context), error_info);
257     }
258   }
259   auto debugger = Debugger::GetInstance();
260   if (debugger != nullptr && debugger->DebuggerBackendEnabled()) {
261     debugger->PreExecuteGraphDebugger(graphs, origin_parameters_order);
262   }
263 #endif
264 #ifndef ENABLE_SECURITY
265   if (DumpJsonParser::GetInstance().e2e_dump_enabled()) {
266     DumpJsonParser::GetInstance().ClearGraph();
267     if (graphs.size() != device_contexts.size()) {
268       SET_OPCONTEXT_FAIL_RET_WITH_ERROR((*op_context), "Graph num:" + std::to_string(graphs.size()) +
269                                                          " is not equal to device context size:" +
270                                                          std::to_string(device_contexts.size()) + " for debug actor.");
271     }
272     for (size_t i = 0; i < graphs.size(); ++i) {
273       MS_EXCEPTION_IF_NULL(graphs[i]);
274       MS_EXCEPTION_IF_NULL(device_contexts[i]);
275       if (device_contexts[i]->GetDeviceType() == device::DeviceType::kCPU) {
276         DumpJsonParser::GetInstance().SaveGraph(graphs[i].get());
277       }
278     }
279   }
280 #endif
281 }
282 
283 /*
284  * Feature group: Dump, Online debugger.
285  * Target device group: Ascend, GPU and CPU.
286  * Runtime category: MindRT.
287  * Description: Dump parameters and constants and update dump iter for CPU. Call PostExecuteGraph Debugger for GPU and
288  * Ascend and update step number of online debugger GPU.
289  */
DebugOnStepEnd(OpContext<DeviceTensor> * const,const AID *,int total_running_count_)290 void DebugActor::DebugOnStepEnd(OpContext<DeviceTensor> *const, const AID *, int total_running_count_) {
291   MS_LOG(INFO) << "Debug on step end. total_running_count is: " << total_running_count_;
292   auto context = MsContext::GetInstance();
293   MS_EXCEPTION_IF_NULL(context);
294   std::string backend = context->backend_policy();
295   step_count = total_running_count_;
296   if (dump_flag == true) {
297     auto registered_dumper = datadump::DataDumperRegister::Instance().GetDumperForBackend(device::DeviceType::kAscend);
298     if (registered_dumper != nullptr) {
299       device_ctx_->device_res_manager_->SyncAllStreams();
300       registered_dumper->Finalize();
301     }
302     dump_flag = false;
303   }
304   device_ctx_->device_res_manager_->SyncAllStreams();
305   std::lock_guard<std::mutex> locker(debug_mutex_);
306 
307 #ifndef ENABLE_SECURITY
308   if (DumpJsonParser::GetInstance().GetIterDumpFlag()) {
309     CPUE2eDump::DumpParametersData();
310     CPUE2eDump::DumpConstantsData();
311   }
312 #endif
313 
314 #ifdef ENABLE_DEBUGGER
315   auto debugger = Debugger::GetInstance();
316   if (debugger != nullptr) {
317     if (backend == "ge" && !debugger->GetAscendKernelByKernelFlag()) {
318       MS_LOG(INFO) << "Not kernel mode, skip post actions.";
319       return;
320     }
321     // Reset exec_order for the next step
322     exec_order_ = 0;
323     debugger->Debugger::PostExecuteGraphDebugger();
324     debugger->Debugger::UpdateStepNumGPU();
325   }
326 #ifndef ENABLE_SECURITY
327   DumpJsonParser::GetInstance().UpdateDumpIter(step_count);
328   MS_LOG(INFO) << "UpdateDumpIter: " << step_count;
329 #endif
330 #endif
331 }
332 
CheckOverflow(const DeviceContext * device_context,const std::vector<DeviceTensor * > & inputs)333 bool DebugActor::CheckOverflow(const DeviceContext *device_context, const std::vector<DeviceTensor *> &inputs) {
334   std::vector<KernelTensor *> check_kernel_tensors;
335   for (size_t i = 0; i < inputs.size(); i++) {
336     auto input = inputs[i]->kernel_tensor().get();
337     auto type = input->dtype_id();
338     if (type == mindspore::kNumberTypeFloat16 || type == mindspore::kNumberTypeFloat32 ||
339         type == mindspore::kNumberTypeBFloat16) {
340       check_kernel_tensors.emplace_back(input);
341     }
342   }
343   if (check_kernel_tensors.empty()) {
344     return false;
345   }
346   MS_EXCEPTION_IF_NULL(device_context);
347   MS_EXCEPTION_IF_NULL(device_context->device_res_manager_);
348 
349   // 1. Get AllFinite kernel mod.
350   const auto &kernel_mod_iter = finite_kernel_mods_.find(device_context);
351   kernel::KernelModPtr finite_kernel_mod = nullptr;
352   if (kernel_mod_iter == finite_kernel_mods_.end()) {
353     const auto &new_finite_kernel_mod = device_context->GetKernelExecutor(false)->CreateKernelMod(kAllFiniteOpName);
354     MS_EXCEPTION_IF_NULL(new_finite_kernel_mod);
355     finite_kernel_mods_.emplace(device_context, new_finite_kernel_mod);
356     finite_kernel_mod = new_finite_kernel_mod;
357   } else {
358     finite_kernel_mod = kernel_mod_iter->second;
359   }
360   MS_EXCEPTION_IF_NULL(finite_kernel_mod);
361 
362   // 2. Get output kernel tensor for AllFinite kernel.
363   MS_EXCEPTION_IF_NULL(check_kernel_tensors[0]);
364   const auto &stream_id =
365     check_kernel_tensors[0]->managed_by_somas() ? kDefaultStreamIndex : check_kernel_tensors[0]->stream_id();
366   auto &stream_id_to_output_device_address = finite_output_device_addresses_[device_context];
367   if (stream_id_to_output_device_address.find(stream_id) == stream_id_to_output_device_address.end()) {
368     auto finite_output_addr = device_context->device_res_manager_->AllocateMemory(1, stream_id);
369     MS_EXCEPTION_IF_NULL(finite_output_addr);
370 
371     ShapeVector shape_vec = {1};
372     auto kernel_tensor = std::make_shared<kernel::KernelTensor>(
373       finite_output_addr, 1, Format::DEFAULT_FORMAT, kNumberTypeBool, shape_vec,
374       device_context->device_context_key().device_name_, device_context->device_context_key().device_id_);
375     kernel_tensor->set_stream_id(stream_id);
376     kernel_tensor->SetType(std::make_shared<TensorType>(kBool));
377     kernel_tensor->SetShape(std::make_shared<abstract::TensorShape>(shape_vec));
378     auto device_address = device_context->device_res_manager_->CreateDeviceAddress(kernel_tensor);
379     MS_EXCEPTION_IF_NULL(device_address);
380     stream_id_to_output_device_address.emplace(stream_id, device_address);
381   }
382   auto &output_device_address = stream_id_to_output_device_address[stream_id];
383   MS_EXCEPTION_IF_NULL(output_device_address);
384   const auto &output_kernel_tensor = output_device_address->kernel_tensor();
385   MS_EXCEPTION_IF_NULL(output_kernel_tensor);
386 
387   void *stream_ptr = device_context->device_res_manager_->GetStream(stream_id);
388   MS_EXCEPTION_IF_NULL(stream_ptr);
389   bool ret = finite_kernel_mod->Launch(check_kernel_tensors, {}, {output_kernel_tensor.get()}, stream_ptr);
390   if (!ret) {
391     MS_LOG(EXCEPTION) << "Launch AllFinite kernel failed.";
392   }
393   return output_kernel_tensor->GetValueWithCheck<bool>();
394 }
395 
Finalize()396 void DebugActor::Finalize() {
397   DumpJsonParser::GetInstance().PrintUnusedKernel();
398   for (const auto &item : finite_output_device_addresses_) {
399     auto &stream_id_to_output_device_address_map = item.second;
400     auto *device_context = item.first;
401     for (const auto &device_address_item : stream_id_to_output_device_address_map) {
402       const auto &device_address = device_address_item.second;
403       if (device_address && device_context) {
404         device_context->device_res_manager_->FreeMemory(device_address->GetMutablePtr());
405       }
406     }
407   }
408 }
409 }  // namespace runtime
410 }  // namespace mindspore
411