• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "profiler/device/gpu/gpu_profiling_utils.h"
18 #include "backend/kernel_compiler/kernel.h"
19 #include "backend/session/anf_runtime_algorithm.h"
20 #include "utils/ms_utils.h"
21 #include "utils/ms_context.h"
22 #include "utils/utils.h"
23 
24 namespace mindspore {
25 namespace profiler {
26 namespace gpu {
27 constexpr char kFpStartNode[] = "PROFILING_FP_START";
28 constexpr char kBpEndNode[] = "PROFILING_BP_END";
29 constexpr char kIterEndNode[] = "PROFILING_ITER_END";
30 constexpr auto kInitDatasetQueueOpName = "InitDataSetQueue";
31 
32 bool ProfilingUtils::have_communication_op = false;
33 ProfilingTraceInfo ProfilingUtils::profiling_trace = {"", "", ""};
34 std::unordered_map<uint32_t, bool> ProfilingUtils::is_first_step_map_ = {};
35 
GetProfilingTraceFromEnv(NotNull<const session::KernelGraph * > graph_ptr)36 ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<const session::KernelGraph *> graph_ptr) {
37   MS_LOG(INFO) << "get current subgraph op name start.";
38   auto &cnode_exec_order = graph_ptr->execution_order();
39   if (cnode_exec_order.empty()) {
40     return profiling_trace;
41   }
42 
43   ProfilingTraceInfo empty_info;
44   ProfilingTraceInfo last_graph_profiling_trace = profiling_trace;
45   profiling_trace = empty_info;
46   SetTraceIterEnd(cnode_exec_order);
47   SetTraceFpStart(cnode_exec_order);
48   SetTraceBpEnd(cnode_exec_order);
49   GetTraceHccl(cnode_exec_order);
50 
51   OutputStepTraceOpNameStatus();
52   is_first_step_map_[graph_ptr->graph_id()] = false;
53 
54   // If current graph has only one node, the bp_end will be empty, so select the last graph node.
55   if (profiling_trace.trace_bp_end != "") {
56     return profiling_trace;
57   } else {
58     return last_graph_profiling_trace;
59   }
60 }
61 
OutputStepTraceOpNameStatus()62 void ProfilingUtils::OutputStepTraceOpNameStatus() {
63   if (profiling_trace.IsValid()) {
64     MS_LOG(INFO) << "Get all the step_trace op name.";
65   }
66   MS_LOG(INFO) << "[profiling]trace_fp_start: " << profiling_trace.trace_fp_start
67                << "trace_bp_end: " << profiling_trace.trace_bp_end
68                << "trace_iter_end: " << profiling_trace.trace_iter_end;
69   MS_LOG(INFO) << "get step_trace op name end.";
70 }
71 
GetTraceHccl(const std::vector<CNodePtr> & cnode_exec_order)72 void ProfilingUtils::GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order) {
73   for (const auto &node : cnode_exec_order) {
74     if (AnfAlgo::IsCommunicationOp(node)) {
75       MS_EXCEPTION_IF_NULL(node);
76       if (std::find(profiling_trace.trace_custom_node.begin(), profiling_trace.trace_custom_node.end(),
77                     node->fullname_with_scope()) == profiling_trace.trace_custom_node.end()) {
78         profiling_trace.trace_custom_node.push_back(node->fullname_with_scope());
79       }
80       MS_LOG(INFO) << "[profiling]Get hccl node:" << node->fullname_with_scope();
81     }
82   }
83 }
84 
SetTraceFpStart(const std::vector<CNodePtr> & cnode_exec_order)85 void ProfilingUtils::SetTraceFpStart(const std::vector<CNodePtr> &cnode_exec_order) {
86   const char *trace_fp_start = std::getenv(kFpStartNode);
87   if (trace_fp_start != nullptr) {
88     profiling_trace.trace_fp_start = std::string(trace_fp_start);
89     MS_LOG(INFO) << "Set the Fp Start Op Name from Environment Variable:" << profiling_trace.trace_fp_start;
90     return;
91   }
92 
93   auto first_node = cnode_exec_order.front();
94   MS_EXCEPTION_IF_NULL(first_node);
95   auto node_name = AnfAlgo::GetCNodeName(first_node);
96   if (node_name == kInitDatasetQueueOpName) {
97     return;
98   }
99 
100   if (node_name == kGetNextOpName) {
101     if (cnode_exec_order.size() > 1) {
102       profiling_trace.trace_fp_start = cnode_exec_order.at(1)->fullname_with_scope();
103     } else {
104       MS_LOG(WARNING) << "No Op Behind the GetNext Op" << std::endl;
105     }
106   } else {
107     profiling_trace.trace_fp_start = first_node->fullname_with_scope();
108   }
109 }
110 
SetTraceBpEnd(const std::vector<CNodePtr> & cnode_exec_order)111 void ProfilingUtils::SetTraceBpEnd(const std::vector<CNodePtr> &cnode_exec_order) {
112   const char *trace_bp_end = std::getenv(kBpEndNode);
113   if (trace_bp_end != nullptr) {
114     profiling_trace.trace_bp_end = std::string(trace_bp_end);
115     MS_LOG(INFO) << "Set the Bp End Op Name from Environment Variable:" << profiling_trace.trace_bp_end;
116     return;
117   }
118 
119   std::string bp_end_str;
120   // Contain hccl kernel (try to find the last communication op)
121   auto iter = cnode_exec_order.rbegin();
122   while (iter != cnode_exec_order.rend()) {
123     if (AnfAlgo::IsCommunicationOp(*iter)) {
124       break;
125     }
126     ++iter;
127   }
128   // If find the communication op
129   if (iter != cnode_exec_order.rend()) {
130     // store communication op input nodes' name
131     std::set<std::string> ar_input_node_names;
132     size_t input_num = AnfAlgo::GetInputTensorNum(*iter);
133     for (size_t i = 0; i < input_num; ++i) {
134       auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(*iter, i);
135       auto input_node = input_node_with_index.first;
136       ar_input_node_names.insert(input_node->fullname_with_scope());
137     }
138     // start from previous node
139     ++iter;
140     // find input names in previous node
141     while (iter != cnode_exec_order.rend()) {
142       if (ar_input_node_names.find((*iter)->fullname_with_scope()) != ar_input_node_names.end()) {
143         bp_end_str = (*iter)->fullname_with_scope();
144         break;
145       }
146       ++iter;
147     }
148   }
149 
150   if (bp_end_str.empty() && !have_communication_op) {
151     bp_end_str = GetGraphSecondLastKernelName(cnode_exec_order);
152   }
153 
154   if (!bp_end_str.empty()) {
155     profiling_trace.trace_bp_end = bp_end_str;
156   }
157 }
158 
SetTraceIterEnd(const std::vector<CNodePtr> & cnode_exec_order)159 void ProfilingUtils::SetTraceIterEnd(const std::vector<CNodePtr> &cnode_exec_order) {
160   const char *trace_iter_end = std::getenv(kIterEndNode);
161   if (trace_iter_end != nullptr) {
162     profiling_trace.trace_iter_end = std::string(trace_iter_end);
163     MS_LOG(INFO) << "Set the Iter End Op Name from Environment Variable:" << profiling_trace.trace_iter_end;
164     return;
165   }
166 
167   auto iter_end = cnode_exec_order.rbegin();
168   profiling_trace.trace_iter_end = (*iter_end)->fullname_with_scope();
169 }
170 
GetGraphSecondLastKernelName(const std::vector<CNodePtr> & cnode_exec_order)171 std::string ProfilingUtils::GetGraphSecondLastKernelName(const std::vector<CNodePtr> &cnode_exec_order) {
172   std::string second_last_kernel_name;
173   auto iter = cnode_exec_order.rbegin();
174   ++iter;
175   if (iter != cnode_exec_order.rend()) {
176     second_last_kernel_name = (*iter)->fullname_with_scope();
177   }
178 
179   return second_last_kernel_name;
180 }
181 
IsFirstStep(const uint32_t graph_id)182 bool ProfilingUtils::IsFirstStep(const uint32_t graph_id) {
183   auto iter = is_first_step_map_.find(graph_id);
184   if (iter == is_first_step_map_.end()) {
185     is_first_step_map_[graph_id] = false;
186     return true;
187   }
188   return is_first_step_map_[graph_id];
189 }
190 }  // namespace gpu
191 }  // namespace profiler
192 }  // namespace mindspore
193