• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2024 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_DEBUG_ACTOR_H_
18 #define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_DEBUG_ACTOR_H_
19 
20 #include <map>
21 #include <memory>
22 #include <mutex>
23 #include <set>
24 #include <string>
25 #include <vector>
26 #include "runtime/graph_scheduler/actor/actor_common.h"
27 #include "runtime/graph_scheduler/device_tensor_store.h"
28 #include "runtime/hardware/device_context.h"
29 #ifdef ENABLE_DEBUGGER
30 #include "include/backend/debug/data_dump/dump_utils.h"
31 #endif
32 #include "ir/dtype/tensor_type.h"
33 
34 namespace mindspore {
35 namespace runtime {
36 using device::DeviceAddressPtr;
37 using kernel::KernelTensor;
38 using kernel::KernelTensorPtr;
39 using mindspore::device::DeviceContext;
40 using mindspore::kernel::KernelLaunchAddr;
41 
42 // The debug actor is used to debug and dump kernel info, it gets the kernel real time execution info in the device, so
43 // it is synchronous and blocked.
44 class DebugActor : public ActorBase {
45  public:
DebugActor()46   DebugActor() : ActorBase("DebugActor") {}
47   ~DebugActor() override = default;
48 
49   void ACLDump(uint32_t device_id, const std::vector<KernelGraphPtr> &graphs, bool is_kbyk);
50 
51   // The debug of each node.
52   void DebugPreLaunch(const AnfNodePtr &node, const std::vector<DeviceTensor *> &op_input_kernel_tensors,
53                       const std::vector<DeviceTensor *> &op_output_kernel_tensors, const DeviceContext *device_context,
54                       OpContext<DeviceTensor> *const op_context, const AID *from_aid);
55   void DebugPostLaunch(const AnfNodePtr &node, const std::vector<DeviceTensor *> &op_input_kernel_tensors,
56                        const std::vector<DeviceTensor *> &op_output_kernel_tensors, const DeviceContext *device_context,
57                        OpContext<DeviceTensor> *const op_context, const AID *from_aid);
58 #ifdef ENABLE_DEBUGGER
59   void AscendKbkDump(const CNodePtr &cnode, const std::vector<DeviceTensor *> &input_kernel_tensors,
60                      const std::vector<DeviceTensor *> &output_kernel_tensors, const DeviceContext *device_context);
61 #endif
62   void AscendStepStart(const std::vector<KernelGraphPtr> &graphs, std::vector<DeviceContext *> device_contexts);
63 
64   void AscendStepEnd();
65 
66   // The debug on step begin.
67   void DebugOnStepBegin(const std::vector<KernelGraphPtr> &graphs,
68                         const std::vector<AnfNodePtr> &origin_parameters_order,
69                         std::vector<DeviceContext *> device_contexts, OpContext<DeviceTensor> *const op_context,
70                         const AID *from_aid);
71 
72   // The debug on step end.
73   void DebugOnStepEnd(OpContext<DeviceTensor> *const op_context, const AID *from_aid, int total_running_count_);
74   static inline uint64_t current_step{1};
75 
76  private:
77   // Check kernel output is finite or not synchronously.
78   bool CheckOverflow(const DeviceContext *device_context, const std::vector<DeviceTensor *> &inputs);
79   // Release device memory for AllFinite kernel.
80   void Finalize() override;
81 
82   std::map<const DeviceContext *, kernel::KernelModPtr> finite_kernel_mods_;
83   std::map<const DeviceContext *, std::map<uint32_t, DeviceAddressPtr>> finite_output_device_addresses_;
84 
85   // class members
86   uint32_t exec_order_ = 0;
87   int step_count = 0;
88   bool dump_flag = false;
89   int is_dataset_sink = 0;
90 
91   bool profile_started_ = false;
92   DeviceContext *device_ctx_ = nullptr;
93 
94   // Support multi-thread.
95   std::mutex debug_mutex_;
96 };
97 
98 }  // namespace runtime
99 }  // namespace mindspore
100 
101 #endif  // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_DEBUG_ACTOR_H_
102