1 /**
2 * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "profiler/device/gpu/gpu_profiling_utils.h"
18 #include "backend/kernel_compiler/kernel.h"
19 #include "backend/session/anf_runtime_algorithm.h"
20 #include "utils/ms_utils.h"
21 #include "utils/ms_context.h"
22 #include "utils/utils.h"
23
24 namespace mindspore {
25 namespace profiler {
26 namespace gpu {
27 constexpr char kFpStartNode[] = "PROFILING_FP_START";
28 constexpr char kBpEndNode[] = "PROFILING_BP_END";
29 constexpr char kIterEndNode[] = "PROFILING_ITER_END";
30 constexpr auto kInitDatasetQueueOpName = "InitDataSetQueue";
31
32 bool ProfilingUtils::have_communication_op = false;
33 ProfilingTraceInfo ProfilingUtils::profiling_trace = {"", "", ""};
34 std::unordered_map<uint32_t, bool> ProfilingUtils::is_first_step_map_ = {};
35
GetProfilingTraceFromEnv(NotNull<const session::KernelGraph * > graph_ptr)36 ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<const session::KernelGraph *> graph_ptr) {
37 MS_LOG(INFO) << "get current subgraph op name start.";
38 auto &cnode_exec_order = graph_ptr->execution_order();
39 if (cnode_exec_order.empty()) {
40 return profiling_trace;
41 }
42
43 ProfilingTraceInfo empty_info;
44 ProfilingTraceInfo last_graph_profiling_trace = profiling_trace;
45 profiling_trace = empty_info;
46 SetTraceIterEnd(cnode_exec_order);
47 SetTraceFpStart(cnode_exec_order);
48 SetTraceBpEnd(cnode_exec_order);
49 GetTraceHccl(cnode_exec_order);
50
51 OutputStepTraceOpNameStatus();
52 is_first_step_map_[graph_ptr->graph_id()] = false;
53
54 // If current graph has only one node, the bp_end will be empty, so select the last graph node.
55 if (profiling_trace.trace_bp_end != "") {
56 return profiling_trace;
57 } else {
58 return last_graph_profiling_trace;
59 }
60 }
61
OutputStepTraceOpNameStatus()62 void ProfilingUtils::OutputStepTraceOpNameStatus() {
63 if (profiling_trace.IsValid()) {
64 MS_LOG(INFO) << "Get all the step_trace op name.";
65 }
66 MS_LOG(INFO) << "[profiling]trace_fp_start: " << profiling_trace.trace_fp_start
67 << "trace_bp_end: " << profiling_trace.trace_bp_end
68 << "trace_iter_end: " << profiling_trace.trace_iter_end;
69 MS_LOG(INFO) << "get step_trace op name end.";
70 }
71
GetTraceHccl(const std::vector<CNodePtr> & cnode_exec_order)72 void ProfilingUtils::GetTraceHccl(const std::vector<CNodePtr> &cnode_exec_order) {
73 for (const auto &node : cnode_exec_order) {
74 if (AnfAlgo::IsCommunicationOp(node)) {
75 MS_EXCEPTION_IF_NULL(node);
76 if (std::find(profiling_trace.trace_custom_node.begin(), profiling_trace.trace_custom_node.end(),
77 node->fullname_with_scope()) == profiling_trace.trace_custom_node.end()) {
78 profiling_trace.trace_custom_node.push_back(node->fullname_with_scope());
79 }
80 MS_LOG(INFO) << "[profiling]Get hccl node:" << node->fullname_with_scope();
81 }
82 }
83 }
84
SetTraceFpStart(const std::vector<CNodePtr> & cnode_exec_order)85 void ProfilingUtils::SetTraceFpStart(const std::vector<CNodePtr> &cnode_exec_order) {
86 const char *trace_fp_start = std::getenv(kFpStartNode);
87 if (trace_fp_start != nullptr) {
88 profiling_trace.trace_fp_start = std::string(trace_fp_start);
89 MS_LOG(INFO) << "Set the Fp Start Op Name from Environment Variable:" << profiling_trace.trace_fp_start;
90 return;
91 }
92
93 auto first_node = cnode_exec_order.front();
94 MS_EXCEPTION_IF_NULL(first_node);
95 auto node_name = AnfAlgo::GetCNodeName(first_node);
96 if (node_name == kInitDatasetQueueOpName) {
97 return;
98 }
99
100 if (node_name == kGetNextOpName) {
101 if (cnode_exec_order.size() > 1) {
102 profiling_trace.trace_fp_start = cnode_exec_order.at(1)->fullname_with_scope();
103 } else {
104 MS_LOG(WARNING) << "No Op Behind the GetNext Op" << std::endl;
105 }
106 } else {
107 profiling_trace.trace_fp_start = first_node->fullname_with_scope();
108 }
109 }
110
SetTraceBpEnd(const std::vector<CNodePtr> & cnode_exec_order)111 void ProfilingUtils::SetTraceBpEnd(const std::vector<CNodePtr> &cnode_exec_order) {
112 const char *trace_bp_end = std::getenv(kBpEndNode);
113 if (trace_bp_end != nullptr) {
114 profiling_trace.trace_bp_end = std::string(trace_bp_end);
115 MS_LOG(INFO) << "Set the Bp End Op Name from Environment Variable:" << profiling_trace.trace_bp_end;
116 return;
117 }
118
119 std::string bp_end_str;
120 // Contain hccl kernel (try to find the last communication op)
121 auto iter = cnode_exec_order.rbegin();
122 while (iter != cnode_exec_order.rend()) {
123 if (AnfAlgo::IsCommunicationOp(*iter)) {
124 break;
125 }
126 ++iter;
127 }
128 // If find the communication op
129 if (iter != cnode_exec_order.rend()) {
130 // store communication op input nodes' name
131 std::set<std::string> ar_input_node_names;
132 size_t input_num = AnfAlgo::GetInputTensorNum(*iter);
133 for (size_t i = 0; i < input_num; ++i) {
134 auto input_node_with_index = AnfAlgo::GetPrevNodeOutput(*iter, i);
135 auto input_node = input_node_with_index.first;
136 ar_input_node_names.insert(input_node->fullname_with_scope());
137 }
138 // start from previous node
139 ++iter;
140 // find input names in previous node
141 while (iter != cnode_exec_order.rend()) {
142 if (ar_input_node_names.find((*iter)->fullname_with_scope()) != ar_input_node_names.end()) {
143 bp_end_str = (*iter)->fullname_with_scope();
144 break;
145 }
146 ++iter;
147 }
148 }
149
150 if (bp_end_str.empty() && !have_communication_op) {
151 bp_end_str = GetGraphSecondLastKernelName(cnode_exec_order);
152 }
153
154 if (!bp_end_str.empty()) {
155 profiling_trace.trace_bp_end = bp_end_str;
156 }
157 }
158
SetTraceIterEnd(const std::vector<CNodePtr> & cnode_exec_order)159 void ProfilingUtils::SetTraceIterEnd(const std::vector<CNodePtr> &cnode_exec_order) {
160 const char *trace_iter_end = std::getenv(kIterEndNode);
161 if (trace_iter_end != nullptr) {
162 profiling_trace.trace_iter_end = std::string(trace_iter_end);
163 MS_LOG(INFO) << "Set the Iter End Op Name from Environment Variable:" << profiling_trace.trace_iter_end;
164 return;
165 }
166
167 auto iter_end = cnode_exec_order.rbegin();
168 profiling_trace.trace_iter_end = (*iter_end)->fullname_with_scope();
169 }
170
GetGraphSecondLastKernelName(const std::vector<CNodePtr> & cnode_exec_order)171 std::string ProfilingUtils::GetGraphSecondLastKernelName(const std::vector<CNodePtr> &cnode_exec_order) {
172 std::string second_last_kernel_name;
173 auto iter = cnode_exec_order.rbegin();
174 ++iter;
175 if (iter != cnode_exec_order.rend()) {
176 second_last_kernel_name = (*iter)->fullname_with_scope();
177 }
178
179 return second_last_kernel_name;
180 }
181
IsFirstStep(const uint32_t graph_id)182 bool ProfilingUtils::IsFirstStep(const uint32_t graph_id) {
183 auto iter = is_first_step_map_.find(graph_id);
184 if (iter == is_first_step_map_.end()) {
185 is_first_step_map_[graph_id] = false;
186 return true;
187 }
188 return is_first_step_map_[graph_id];
189 }
190 } // namespace gpu
191 } // namespace profiler
192 } // namespace mindspore
193