1 /**
2 * Copyright 2019-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "debug/trace.h"
18
19 #include <iostream>
20 #include <fstream>
21 #include <map>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <vector>
25 #include <string>
26 #include <sstream>
27 #include <utility>
28 #include <stack>
29 #include <algorithm>
30
31 #include "ir/meta_func_graph.h"
32 #include "ir/graph_utils.h"
33 #include "frontend/operator/composite/composite.h"
34 #include "ir/tensor.h"
35 #include "debug/anf_ir_utils.h"
36 #include "debug/common.h"
37 #include "pipeline/jit/static_analysis/evaluator.h"
38 #include "pipeline/jit/static_analysis/async_eval_result.h"
39 #include "pipeline/jit/base.h"
40 #include "utils/log_adapter.h"
41 #include "utils/comm_manager.h"
42 #include "abstract/abstract_value.h"
43 #include "utils/file_utils.h"
44
45 namespace mindspore {
46 // namespace to support debug trace information
47 namespace trace {
48 using abstract::AbstractBasePtr;
49 using abstract::AnalysisContextPtr;
50 using abstract::AnalysisEnginePtr;
51 using abstract::AnfNodeConfigPtr;
52
GetAbstractStr(const abstract::AbstractBasePtr & abs)53 std::string GetAbstractStr(const abstract::AbstractBasePtr &abs) {
54 if (abs == nullptr) {
55 return "NullAbstract";
56 }
57 auto shape = abs->BuildShape()->cast<abstract::ShapePtr>();
58 TypePtr type = abs->BuildType();
59 std::ostringstream oss;
60 if ((shape != nullptr) && (type != nullptr)) {
61 oss << type->DumpText() << shape->DumpText();
62 } else if (type != nullptr) {
63 oss << type->DumpText();
64 } else {
65 oss << "Undefined";
66 }
67 return oss.str();
68 }
69
GetGraphParamString(const FuncGraphPtr & graph,const abstract::AbstractBasePtrList & args_spec_list)70 std::string GetGraphParamString(const FuncGraphPtr &graph, const abstract::AbstractBasePtrList &args_spec_list) {
71 MS_EXCEPTION_IF_NULL(graph);
72 std::ostringstream oss;
73 oss << "graph:" << graph->ToString() << " with args[";
74 auto params = graph->parameters();
75 if (params.size() < args_spec_list.size()) {
76 MS_EXCEPTION(TypeError) << "The size of parameters less than args_spec_list's size.";
77 }
78 for (size_t i = 0; i < args_spec_list.size(); i++) {
79 auto parameter = params[i];
80 MS_EXCEPTION_IF_NULL(parameter);
81 oss << parameter->ToString() << ":<" << GetAbstractStr(args_spec_list[i]) << ">,";
82 }
83 oss << "]";
84 oss << GetDebugInfo(graph->debug_info(), kSourceLineTipDiscard);
85 return oss.str();
86 }
87
DumpInferStack(std::ostringstream & oss)88 void DumpInferStack(std::ostringstream &oss) {
89 auto &graph_stack = GetCurrentGraphEvalStack();
90 if (graph_stack.empty()) {
91 return;
92 }
93 std::vector<std::pair<abstract::AnalysisContextPtr, abstract::AnfNodeConfigPtr>> infer_vec;
94 while (!graph_stack.empty()) {
95 auto top = graph_stack.back();
96 infer_vec.push_back(top);
97 graph_stack.pop_back();
98 }
99 std::reverse(infer_vec.begin(), infer_vec.end());
100 int index = 0;
101 for (const auto &item : infer_vec) {
102 auto context = item.first;
103 if (context == nullptr) {
104 MS_LOG(EXCEPTION) << "DumpInferStack failed, got null graph context";
105 }
106 auto graph = context->func_graph();
107 if (graph == nullptr) { // Top context.
108 continue;
109 }
110 auto args_spec_list = context->args_spec_list();
111 if (graph->parameters().size() < args_spec_list.size()) {
112 continue;
113 }
114 oss << " #" << index++ << " " << GetGraphParamString(graph, args_spec_list) << "\n";
115 }
116 }
117
TraceGraphEval()118 void TraceGraphEval() {
119 auto &graph_stack = GetCurrentGraphEvalStack();
120 if (graph_stack.empty()) {
121 MS_LOG(INFO) << "Length of analysis graph stack is empty.";
122 return;
123 }
124 std::ostringstream oss;
125 oss << "\n*******************************graph evaluate stack**********************************";
126 oss << std::endl;
127 DumpInferStack(oss);
128 oss << "\n*************************************************************************************";
129 MS_LOG(INFO) << oss.str();
130 }
131
132 class AnalyzeFailExporter : public AnfExporter {
133 public:
AnalyzeFailExporter()134 AnalyzeFailExporter() : AnfExporter(true, false) {}
135 ~AnalyzeFailExporter() override = default;
136
137 bool ExportFuncGraph(const std::string &filename, const TraceCNodeEvalStack &node_config_stack);
138
139 protected:
140 void OutputCNode(std::ofstream &ofs, const CNodePtr &cnode, const FuncGraphPtr &func_graph, int *idx,
141 std::map<AnfNodePtr, int> *const apply_map) override;
142 std::string GetNodeType(const AnfNodePtr &nd) override;
143 AbstractBasePtr GetNodeAbstract(const AnfNodePtr &nd);
144 AnfNodeConfigPtr GetForwardConfig(const AnfNodeConfigPtr &cfg);
145 void ProcessFuncGraphCall(const CNodePtr &node, std::string *const op_comment);
146 std::unordered_map<FuncGraphPtr, TaggedNodeMap> CreateTaggedNodeMap(
147 const std::vector<abstract::AnfNodeConfigPtr> &node_config_stack);
148
149 private:
150 AnalysisContextPtr current_context_ = nullptr;
151 AnalysisEnginePtr engine_ = nullptr;
152 };
153
CreateTaggedNodeMap(const std::vector<abstract::AnfNodeConfigPtr> & node_config_stack)154 std::unordered_map<FuncGraphPtr, TaggedNodeMap> AnalyzeFailExporter::CreateTaggedNodeMap(
155 const std::vector<abstract::AnfNodeConfigPtr> &node_config_stack) {
156 std::unordered_set<abstract::AnfNodeConfigPtr> forwarded_configs; // Check if config. is forwarded.
157 std::unordered_map<FuncGraphPtr, TaggedNodeMap> tagged_func_graphs;
158 size_t index = 0;
159 for (auto &node_config : node_config_stack) {
160 MS_EXCEPTION_IF_NULL(node_config);
161
162 // Record new config in set.
163 auto new_config = GetForwardConfig(node_config);
164 if (new_config != node_config) {
165 MS_LOG(DEBUG) << "The node_config is forwarded, old config: " << node_config->ToString()
166 << ", new_config: " << new_config->ToString();
167 forwarded_configs.emplace(new_config);
168 }
169
170 // Ignore the new config.
171 if (forwarded_configs.find(node_config) != forwarded_configs.end()) {
172 continue;
173 }
174
175 auto fg = node_config->func_graph();
176 MS_EXCEPTION_IF_NULL(fg);
177 auto node = node_config->node();
178 tagged_func_graphs[fg][node] = index;
179 index++;
180 }
181 return tagged_func_graphs;
182 }
183
OutputAnalyzedGraphWithType(const string & file_path)184 bool OutputAnalyzedGraphWithType(const string &file_path) {
185 AnalyzeFailExporter exporter;
186 return exporter.ExportFuncGraph(file_path, GetCNodeDebugStack());
187 }
188
GetNodeType(const AnfNodePtr & node)189 std::string AnalyzeFailExporter::GetNodeType(const AnfNodePtr &node) {
190 if (current_context_ == nullptr) {
191 return AnfExporter::GetNodeType(node);
192 }
193
194 MS_EXCEPTION_IF_NULL(engine_);
195 try {
196 FuncGraphPtr dummy_call_func_graph = nullptr;
197 auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph);
198 auto res = abstract::AnalysisResultCacheMgr::GetInstance().GetValue(cfg);
199 if (res != nullptr) {
200 return GetAbstractStr(res->abstract());
201 }
202 } catch (const std::exception &e) {
203 MS_LOG(INFO) << "Exception: " << e.what();
204 }
205 return "Undefined";
206 }
207
GetNodeAbstract(const AnfNodePtr & node)208 AbstractBasePtr AnalyzeFailExporter::GetNodeAbstract(const AnfNodePtr &node) {
209 if (current_context_ == nullptr) {
210 return nullptr;
211 }
212 MS_EXCEPTION_IF_NULL(engine_);
213 try {
214 FuncGraphPtr dummy_call_func_graph = nullptr;
215 auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph);
216 auto res = abstract::AnalysisResultCacheMgr::GetInstance().GetValue(cfg);
217 return res == nullptr ? nullptr : res->abstract();
218 } catch (const std::exception &e) {
219 MS_LOG(INFO) << "Exception: " << e.what();
220 }
221 return nullptr;
222 }
223
GetForwardConfig(const AnfNodeConfigPtr & cfg)224 AnfNodeConfigPtr AnalyzeFailExporter::GetForwardConfig(const AnfNodeConfigPtr &cfg) {
225 MS_EXCEPTION_IF_NULL(cfg);
226 MS_EXCEPTION_IF_NULL(engine_);
227 AnfNodeConfigPtr cur_cfg = cfg;
228 auto iter = engine_->anfnode_config_map().find(cur_cfg);
229 while (iter != engine_->anfnode_config_map().end()) {
230 auto node = cur_cfg->node();
231 cur_cfg = iter->second;
232 MS_LOG(DEBUG) << "Get forward node: " << node << "[" << node->DebugString() << "] --> " << cur_cfg->node() << "["
233 << cur_cfg->node()->DebugString() << "]";
234 iter = engine_->anfnode_config_map().find(cur_cfg);
235 }
236 return cur_cfg;
237 }
238
ProcessFuncGraphCall(const CNodePtr & node,std::string * const op_comment)239 void AnalyzeFailExporter::ProcessFuncGraphCall(const CNodePtr &node, std::string *const op_comment) {
240 if (node == nullptr) {
241 MS_LOG(ERROR) << "Node is nullptr";
242 return;
243 }
244 CNodePtr cnode = nullptr;
245 try {
246 FuncGraphPtr dummy_call_func_graph = nullptr;
247 auto cfg = engine_->MakeConfig(node, current_context_, dummy_call_func_graph);
248 cfg = GetForwardConfig(cfg);
249 MS_EXCEPTION_IF_NULL(cfg);
250 cnode = dyn_cast<CNode>(cfg->node());
251 } catch (const std::exception &e) {
252 MS_LOG(INFO) << "Exception: " << e.what();
253 }
254 if (cnode == nullptr) {
255 MS_LOG(INFO) << "CNode is nullptr";
256 return;
257 }
258
259 const auto &inputs = cnode->inputs();
260 for (size_t i = 0; i < inputs.size(); ++i) {
261 auto op_abs = GetNodeAbstract(inputs[i]);
262 if (op_abs == nullptr) {
263 MS_LOG(DEBUG) << "Abstract of inputs[" << i << "] of cnode " << cnode->ToString() << " is nullptr";
264 continue;
265 }
266
267 if (!op_abs->isa<abstract::FuncGraphAbstractClosure>() && !op_abs->isa<abstract::MetaFuncGraphAbstractClosure>()) {
268 MS_LOG(DEBUG) << "Inputs[" << i << "] of cnode " << cnode->ToString() << " is of type " << op_abs->type_name()
269 << ", not function, ignore it";
270 // Get prototype of VirtualEvaluator for printing
271 if (i == 0 && op_abs->isa<abstract::VirtualAbstractClosure>()) {
272 auto func = dyn_cast<abstract::VirtualAbstractClosure>(op_abs);
273 std::ostringstream oss;
274 oss << "(";
275 bool first_flag = false;
276 for (const auto &arg : func->args_spec_list()) {
277 if (!first_flag) {
278 first_flag = true;
279 } else {
280 oss << ", ";
281 }
282 oss << GetAbstractStr(arg);
283 }
284 oss << ") -> " << GetAbstractStr(func->output()) << " ";
285 *op_comment = oss.str();
286 }
287 }
288 }
289 }
290
OutputCNode(std::ofstream & ofs,const CNodePtr & cnode,const FuncGraphPtr & func_graph,int * idx,std::map<AnfNodePtr,int> * const apply_map)291 void AnalyzeFailExporter::OutputCNode(std::ofstream &ofs, const CNodePtr &cnode, const FuncGraphPtr &func_graph,
292 int *idx, std::map<AnfNodePtr, int> *const apply_map) {
293 OutputCNodeText(ofs, cnode, func_graph, idx, apply_map);
294 // Process function graph call
295 std::string op_comment;
296 ProcessFuncGraphCall(cnode, &op_comment);
297 if (!op_comment.empty()) {
298 auto &inputs = cnode->inputs();
299 ofs << " #" << GetAnfNodeText(func_graph, inputs[0], *apply_map) << ".prototype = " << op_comment;
300 }
301 // Output comment
302 OutputStatementComment(ofs, cnode);
303 ofs << "\n";
304 }
305
ExportFuncGraph(const std::string & filename,const TraceCNodeEvalStack & node_config_stack)306 bool AnalyzeFailExporter::ExportFuncGraph(const std::string &filename, const TraceCNodeEvalStack &node_config_stack) {
307 if (node_config_stack.empty()) {
308 MS_LOG(DEBUG) << "Node configs is empty";
309 return false;
310 }
311 auto real_filepath = Common::CreatePrefixPath(filename);
312 if (!real_filepath.has_value()) {
313 MS_LOG(ERROR) << "The export ir path: " << filename << " is not illegal.";
314 return false;
315 }
316 ChangeFileMode(real_filepath.value(), S_IWUSR);
317 std::ofstream ofs(real_filepath.value());
318 if (!ofs.is_open()) {
319 MS_LOG(ERROR) << "Open file '" << real_filepath.value() << "' failed!" << ErrnoToString(errno);
320 return false;
321 }
322
323 if (engine_ == nullptr) {
324 engine_ = node_config_stack.front()->engine();
325 }
326
327 auto tagged_func_graphs = CreateTaggedNodeMap(node_config_stack);
328 std::unordered_set<FuncGraphPtr> printed_func_graphs; // Check if func graph has been printed.
329 // Output graph on the analysis stack
330 for (const auto &node_config : node_config_stack) {
331 MS_EXCEPTION_IF_NULL(node_config);
332 auto fg = node_config->func_graph();
333 MS_LOG(INFO) << "Node: " << node_config->node()->DebugString()
334 << ", FV: " << (node_config->func_graph() != node_config->context()->func_graph())
335 << ", calling func graph: " << node_config->func_graph()->ToString()
336 << ", context func graph: " << node_config->context()->func_graph()->ToString();
337 if (fg == nullptr) {
338 MS_LOG(ERROR) << "FuncGraph is null, context: " << node_config->ToString();
339 continue;
340 }
341 if (printed_func_graphs.find(fg) != printed_func_graphs.end()) {
342 continue;
343 }
344 (void)printed_func_graphs.emplace(fg);
345
346 current_context_ = node_config->context(); // Set current context.
347 ExportOneFuncGraph(ofs, fg, tagged_func_graphs[fg]);
348 ofs << "\n\n";
349 }
350
351 ofs << "#===============================================================================\n";
352 ofs << "# num of function graphs in stack: ";
353 auto ignored_num = (node_config_stack.size() - printed_func_graphs.size());
354 if (ignored_num == 0) {
355 ofs << node_config_stack.size() << "\n";
356 } else {
357 ofs << printed_func_graphs.size() << "/" << node_config_stack.size() << " (Ignored " << ignored_num
358 << " internal frames).\n";
359 }
360 ofs.close();
361 ChangeFileMode(real_filepath.value(), S_IRUSR);
362 return true;
363 }
364
GetEvalFailDatPath()365 std::string GetEvalFailDatPath() {
366 std::string path;
367 auto ms_om_path = common::GetEnv("MS_OM_PATH");
368 if (!ms_om_path.empty()) {
369 path = ms_om_path;
370 } else {
371 path = ".";
372 }
373 path += "/rank_" + std::to_string(GetRank()) + "/om/analyze_fail.dat";
374 // Support "../" in path.
375 auto realpath = Common::CreatePrefixPath(path, true);
376 if (!realpath.has_value()) {
377 MS_EXCEPTION(ValueError) << "Get real path failed. path=" << path;
378 }
379 return realpath.value();
380 }
381
GetEvalStackInfo(std::ostringstream & oss)382 void GetEvalStackInfo(std::ostringstream &oss) {
383 MS_LOG(INFO) << "Get graph analysis information begin";
384 auto stack = GetCNodeDebugStack();
385 if (stack.empty()) {
386 MS_LOG(INFO) << "Length of analysis information stack is empty.";
387 return;
388 }
389 oss << "\nThe function call stack";
390 #ifndef ENABLE_SECURITY
391 std::string file_name = GetEvalFailDatPath();
392 auto ret = OutputAnalyzedGraphWithType(file_name);
393 if (ret) {
394 oss << " (See file '" << file_name << "' for more details)";
395 }
396 #endif
397 oss << ":\n";
398
399 int index = 0;
400 std::string last_location_info = "";
401 for (size_t i = 0; i < stack.size(); ++i) {
402 auto node_config = stack[i];
403 MS_EXCEPTION_IF_NULL(node_config);
404 auto cnode = dyn_cast<CNode>(node_config->node());
405 if (cnode == nullptr) {
406 MS_LOG(DEBUG) << "CNode of elements[" << i << "] is nullptr.";
407 continue;
408 }
409
410 auto debug_info = cnode->debug_info();
411 auto this_location_info = trace::GetDebugInfo(debug_info, std::string(""));
412 if (this_location_info.empty() || this_location_info == last_location_info) {
413 continue;
414 }
415
416 last_location_info = this_location_info;
417 oss << "# " << index++ << " " << this_location_info;
418 }
419
420 stack.clear();
421 MS_LOG(INFO) << "Get graph analysis information *end*";
422 }
423
424 // Trace the graph evaluator stack
425 thread_local TraceGraphEvalStack graph_infer_stack;
426 // Trace the cnode infer debug info
427 thread_local TraceCNodeEvalStack cnode_debug_stack{};
428
TraceGraphEvalEnter(const abstract::AnalysisContextPtr & context,const abstract::AnfNodeConfigPtr & node)429 void TraceGraphEvalEnter(const abstract::AnalysisContextPtr &context, const abstract::AnfNodeConfigPtr &node) {
430 if (context == nullptr) {
431 MS_LOG(EXCEPTION) << "GraphInferEnter got null context";
432 }
433 (void)graph_infer_stack.push_back(std::pair<abstract::AnalysisContextPtr, abstract::AnfNodeConfigPtr>(context, node));
434 }
435
TraceGraphEvalLeave(const abstract::AnalysisContextPtr & context)436 void TraceGraphEvalLeave(const abstract::AnalysisContextPtr &context) {
437 if (context == nullptr || graph_infer_stack.empty()) {
438 MS_LOG(EXCEPTION) << "The context is null, or call stack is empty.";
439 }
440 if (context != graph_infer_stack.back().first) {
441 MS_LOG(EXCEPTION) << "Different context: " << context->func_graph()->ToString() << ", "
442 << graph_infer_stack.back().first->func_graph()->ToString();
443 }
444 graph_infer_stack.pop_back();
445 }
446
TraceGraphEvalStackPrepare(const TraceGraphEvalStack & graphEvals)447 void TraceGraphEvalStackPrepare(const TraceGraphEvalStack &graphEvals) {
448 graph_infer_stack.insert(graph_infer_stack.end(), graphEvals.begin(), graphEvals.end());
449 }
450
TraceEvalCNodeStackPrepare(const TraceCNodeEvalStack & cnodeEvals)451 void TraceEvalCNodeStackPrepare(const TraceCNodeEvalStack &cnodeEvals) {
452 cnode_debug_stack.insert(cnode_debug_stack.end(), cnodeEvals.begin(), cnodeEvals.end());
453 }
454
TraceEvalCNodeEnter(const abstract::AnfNodeConfigPtr & node_config)455 void TraceEvalCNodeEnter(const abstract::AnfNodeConfigPtr &node_config) { cnode_debug_stack.push_back(node_config); }
456
TraceEvalCNodeLeave()457 void TraceEvalCNodeLeave() { cnode_debug_stack.pop_back(); }
458
GetCNodeDebugStack()459 TraceCNodeEvalStack &GetCNodeDebugStack() { return cnode_debug_stack; }
460
GetCurrentGraphEvalStack()461 TraceGraphEvalStack &GetCurrentGraphEvalStack() { return graph_infer_stack; }
462
ClearTraceStack()463 void ClearTraceStack() {
464 while (!graph_infer_stack.empty()) {
465 graph_infer_stack.pop_back();
466 }
467 cnode_debug_stack.clear();
468 }
469
GetTraceStackInfo(std::ostringstream & oss)470 void GetTraceStackInfo(std::ostringstream &oss) {
471 TraceGraphEval();
472 std::ostringstream trace_info;
473 GetEvalStackInfo(trace_info);
474 if (trace_info.str().empty()) {
475 DebugInfoPtr debug_info = TraceManager::GetParseOrResolveDebugInfo();
476 if (debug_info != nullptr) {
477 oss << "\n\n# " << trace::GetDebugInfo(debug_info);
478 }
479 } else {
480 oss << trace_info.str();
481 }
482 }
483
484 // Register trace provider to LogWriter.
485 struct TraceProviderRegister {
TraceProviderRegistermindspore::trace::TraceProviderRegister486 TraceProviderRegister() { LogWriter::set_trace_provider(GetTraceStackInfo); }
487 ~TraceProviderRegister() = default;
488 } trace_provider_register;
489
490 // Register trace cnode provider to AbstractBase.
491 struct TraceNodeProviderRegister {
TraceNodeProviderRegistermindspore::trace::TraceNodeProviderRegister492 TraceNodeProviderRegister() {
493 abstract::AbstractBase::set_trace_node_provider([](AnfNodePtr *node) {
494 auto stack = GetCNodeDebugStack();
495 if (!stack.empty()) {
496 auto conf = stack.back();
497 *node = conf->node();
498 }
499 });
500 }
501 ~TraceNodeProviderRegister() = default;
502 } trace_node_provider_register;
503 } // namespace trace
504 } // namespace mindspore
505