• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "debug/debugger/debugger_utils.h"
18 #include <iostream>
19 #include <vector>
20 #include <memory>
21 #include <string>
22 #include "debug/anf_ir_utils.h"
23 #include "debug/debugger/debugger.h"
24 #include "runtime/device/gpu/gpu_device_address.h"
25 #include "debug/data_dump/dump_json_parser.h"
26 #include "backend/session/anf_runtime_algorithm.h"
27 #include "backend/kernel_compiler/kernel.h"
28 
29 using mindspore::kernel::AddressPtr;
30 using mindspore::kernel::KernelLaunchInfo;
31 using AddressPtrList = std::vector<mindspore::kernel::AddressPtr>;
32 using KernelGraph = mindspore::session::KernelGraph;
33 using AnfAlgo = mindspore::session::AnfRuntimeAlgorithm;
34 
35 namespace mindspore {
36 static const size_t PARAMETER_OUTPUT_INDEX = 0;
37 
CheckRealOutput(const std::string & node_name,const size_t & output_size)38 std::vector<size_t> CheckRealOutput(const std::string &node_name, const size_t &output_size) {
39   // define a vector containing real output number
40   std::vector<size_t> real_outputs;
41   // P.BatchNorm is used for training and inference
42   // can add the filter list for more operators here....
43   if (node_name == "BatchNorm") {
44     MS_LOG(INFO) << "loading node named " << node_name;
45     (void)real_outputs.insert(real_outputs.end(), {0, 3, 4});
46   } else {
47     // by default, TensorLoader will load all outputs
48     for (size_t j = 0; j < output_size; ++j) {
49       real_outputs.push_back(j);
50     }
51   }
52   return real_outputs;
53 }
54 
LoadInputs(const CNodePtr & cnode,const KernelLaunchInfo * launch_info_,uint32_t exec_order_)55 void LoadInputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
56   // get inputs
57   auto kernel_inputs = launch_info_->inputs_;
58   auto input_size = AnfAlgo::GetInputTensorNum(cnode);
59   for (size_t j = 0; j < input_size; ++j) {
60     auto input_kernel = cnode->input(j + 1);
61     std::string input_kernel_name = GetKernelNodeName(input_kernel);
62     auto addr = kernel_inputs[j];
63     auto type = AnfAlgo::GetOutputInferDataType(input_kernel, PARAMETER_OUTPUT_INDEX);
64     // For example, this happens with the Depend op
65     if (type == kMetaTypeNone) {
66       continue;
67     }
68 #ifdef ENABLE_GPU
69     auto format = kOpFormat_DEFAULT;
70     auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
71     string input_tensor_name = input_kernel_name + ':' + "0";
72     ShapeVector int_shapes = trans::GetRuntimePaddingShape(input_kernel, PARAMETER_OUTPUT_INDEX);
73     auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order_, format, int_shapes, type, 0, true);
74     if (!ret) {
75       MS_LOG(ERROR) << "LoadMemToHost:"
76                     << ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
77     }
78 #endif
79   }
80 }
81 
LoadOutputs(const CNodePtr & cnode,const KernelLaunchInfo * launch_info_,uint32_t exec_order_)82 void LoadOutputs(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
83   // get outputs
84   auto kernel_outputs = launch_info_->outputs_;
85   auto output_size = AnfAlgo::GetOutputTensorNum(cnode);
86   auto node_name = AnfAlgo::GetCNodeName(cnode);
87   std::string kernel_name = GetKernelNodeName(cnode);
88   std::vector<size_t> real_outputs = CheckRealOutput(node_name, output_size);
89 
90   for (size_t j : real_outputs) {
91     auto addr = kernel_outputs[j];
92     auto type = AnfAlgo::GetOutputInferDataType(cnode, j);
93     // For example, this happens with the Depend op
94     if (type == kMetaTypeNone) {
95       continue;
96     }
97 #ifdef ENABLE_GPU
98     auto format = kOpFormat_DEFAULT;
99     auto gpu_addr = std::make_unique<device::gpu::GPUDeviceAddress>(addr->addr, addr->size, format, type);
100     string tensor_name = kernel_name + ':' + std::to_string(j);
101     ShapeVector int_shapes = trans::GetRuntimePaddingShape(cnode, j);
102     auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order_, format, int_shapes, type, j, false);
103     if (!ret) {
104       MS_LOG(ERROR) << "LoadMemToHost:"
105                     << ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
106     }
107 #endif
108   }
109 }
110 
CheckReadData(const CNodePtr & cnode)111 bool CheckReadData(const CNodePtr &cnode) {
112   auto debugger = Debugger::GetInstance();
113   if (!debugger) {
114     return false;
115   }
116   bool read_data = false;
117   auto &dump_json_parser = DumpJsonParser::GetInstance();
118   bool dump_enabled = debugger->DumpDataEnabledIteration();
119   std::string kernel_name = GetKernelNodeName(cnode);
120   if (dump_enabled) {
121     auto dump_mode = dump_json_parser.dump_mode();
122     // dump the node if dump_mode is 0, which means all kernels, or if this kernel is in the kernels list
123     if ((dump_mode == 0) || ((dump_mode == 1) && dump_json_parser.NeedDump(kernel_name))) {
124       read_data = true;
125     }
126   } else if (debugger->debugger_enabled()) {
127     read_data = debugger->ReadNodeDataRequired(cnode);
128   }
129   return read_data;
130 }
131 
ReadDataAndDump(const CNodePtr & cnode,const KernelLaunchInfo * launch_info_,uint32_t exec_order_)132 void ReadDataAndDump(const CNodePtr &cnode, const KernelLaunchInfo *launch_info_, uint32_t exec_order_) {
133   auto debugger = Debugger::GetInstance();
134   if (!debugger) {
135     return;
136   }
137   auto &dump_json_parser = DumpJsonParser::GetInstance();
138   bool dump_enabled = debugger->DumpDataEnabledIteration();
139   if (debugger->debugger_enabled() || dump_json_parser.InputNeedDump()) {
140     LoadInputs(cnode, launch_info_, exec_order_);
141   }
142   if (debugger->debugger_enabled() || dump_json_parser.OutputNeedDump()) {
143     LoadOutputs(cnode, launch_info_, exec_order_);
144   }
145   // Dump kernel
146   if (dump_enabled) {
147     auto kernel_graph = std::dynamic_pointer_cast<KernelGraph>(cnode->func_graph());
148     MS_EXCEPTION_IF_NULL(kernel_graph);
149     auto graph_id = kernel_graph->graph_id();
150     debugger->DumpSingleNode(cnode, graph_id);
151     // Clear Dumped data when online debugger is not enabled
152     if (!debugger->debugger_enabled()) {
153       debugger->ClearCurrentData();
154     }
155   }
156   // check if the node is last kernel
157   bool last_kernel = !AnfAlgo::IsInplaceNode(cnode, "skip");
158   debugger->PostExecuteNode(cnode, last_kernel);
159 }
160 }  // namespace mindspore
161