1 /** 2 * Copyright 2021-2024 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_DEBUG_ACTOR_H_ 18 #define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_DEBUG_ACTOR_H_ 19 20 #include <map> 21 #include <memory> 22 #include <mutex> 23 #include <set> 24 #include <string> 25 #include <vector> 26 #include "runtime/graph_scheduler/actor/actor_common.h" 27 #include "runtime/graph_scheduler/device_tensor_store.h" 28 #include "runtime/hardware/device_context.h" 29 #ifdef ENABLE_DEBUGGER 30 #include "include/backend/debug/data_dump/dump_utils.h" 31 #endif 32 #include "ir/dtype/tensor_type.h" 33 34 namespace mindspore { 35 namespace runtime { 36 using device::DeviceAddressPtr; 37 using kernel::KernelTensor; 38 using kernel::KernelTensorPtr; 39 using mindspore::device::DeviceContext; 40 using mindspore::kernel::KernelLaunchAddr; 41 42 // The debug actor is used to debug and dump kernel info, it gets the kernel real time execution info in the device, so 43 // it is synchronous and blocked. 44 class DebugActor : public ActorBase { 45 public: DebugActor()46 DebugActor() : ActorBase("DebugActor") {} 47 ~DebugActor() override = default; 48 49 void ACLDump(uint32_t device_id, const std::vector<KernelGraphPtr> &graphs, bool is_kbyk); 50 51 // The debug of each node. 52 void DebugPreLaunch(const AnfNodePtr &node, const std::vector<DeviceTensor *> &op_input_kernel_tensors, 53 const std::vector<DeviceTensor *> &op_output_kernel_tensors, const DeviceContext *device_context, 54 OpContext<DeviceTensor> *const op_context, const AID *from_aid); 55 void DebugPostLaunch(const AnfNodePtr &node, const std::vector<DeviceTensor *> &op_input_kernel_tensors, 56 const std::vector<DeviceTensor *> &op_output_kernel_tensors, const DeviceContext *device_context, 57 OpContext<DeviceTensor> *const op_context, const AID *from_aid); 58 #ifdef ENABLE_DEBUGGER 59 void AscendKbkDump(const CNodePtr &cnode, const std::vector<DeviceTensor *> &input_kernel_tensors, 60 const std::vector<DeviceTensor *> &output_kernel_tensors, const DeviceContext *device_context); 61 #endif 62 void AscendStepStart(const std::vector<KernelGraphPtr> &graphs, std::vector<DeviceContext *> device_contexts); 63 64 void AscendStepEnd(); 65 66 // The debug on step begin. 67 void DebugOnStepBegin(const std::vector<KernelGraphPtr> &graphs, 68 const std::vector<AnfNodePtr> &origin_parameters_order, 69 std::vector<DeviceContext *> device_contexts, OpContext<DeviceTensor> *const op_context, 70 const AID *from_aid); 71 72 // The debug on step end. 73 void DebugOnStepEnd(OpContext<DeviceTensor> *const op_context, const AID *from_aid, int total_running_count_); 74 static inline uint64_t current_step{1}; 75 76 private: 77 // Check kernel output is finite or not synchronously. 78 bool CheckOverflow(const DeviceContext *device_context, const std::vector<DeviceTensor *> &inputs); 79 // Release device memory for AllFinite kernel. 80 void Finalize() override; 81 82 std::map<const DeviceContext *, kernel::KernelModPtr> finite_kernel_mods_; 83 std::map<const DeviceContext *, std::map<uint32_t, DeviceAddressPtr>> finite_output_device_addresses_; 84 85 // class members 86 uint32_t exec_order_ = 0; 87 int step_count = 0; 88 bool dump_flag = false; 89 int is_dataset_sink = 0; 90 91 bool profile_started_ = false; 92 DeviceContext *device_ctx_ = nullptr; 93 94 // Support multi-thread. 95 std::mutex debug_mutex_; 96 }; 97 98 } // namespace runtime 99 } // namespace mindspore 100 101 #endif // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_DEBUG_ACTOR_H_ 102