1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "mindspore/ccsrc/debug/summary/summary.h"
18 #include "include/backend/anf_runtime_algorithm.h"
19 #include "include/common/utils/anfalgo.h"
20 #include "mindspore/core/ops/structure_ops.h"
21 #include "runtime/device/ms_device_shape_transfer.h"
22 #include "utils/ms_context.h"
23 #include "utils/trace_base.h"
24
25 namespace mindspore::debug {
26 constexpr int kSummaryGetItem = 2;
27
28 namespace {
GetSummaryNameWithTag(CNodePtr cnode)29 string GetSummaryNameWithTag(CNodePtr cnode) {
30 std::string tag = GetValue<std::string>(GetValueNode(cnode->input(1)));
31 std::string name;
32 if (cnode->IsApply(prim::kPrimScalarSummary)) {
33 name = tag + "[:Scalar]";
34 } else if (cnode->IsApply(prim::kPrimImageSummary)) {
35 name = tag + "[:Image]";
36 } else if (cnode->IsApply(prim::kPrimHistogramSummary)) {
37 name = tag + "[:Histogram]";
38 } else {
39 name = tag + "[:Tensor]";
40 }
41 return name;
42 }
43 } // namespace
44
GetInstance()45 Summary &Summary::GetInstance() {
46 static Summary instance;
47 return instance;
48 }
49
RecurseSetSummaryNodesForAllGraphs(KernelGraph * graph)50 void Summary::RecurseSetSummaryNodesForAllGraphs(KernelGraph *graph) {
51 MS_EXCEPTION_IF_NULL(graph);
52 MS_LOG(INFO) << "Recurse set summary nodes for all graphs in graph: " << graph->graph_id() << " start";
53 auto ms_context = MsContext::GetInstance();
54 MS_EXCEPTION_IF_NULL(ms_context);
55 std::string backend = ms_context->backend_policy();
56 if (backend == "ge") {
57 MS_LOG(INFO) << "This function should be skipped on GE backend.";
58 return;
59 }
60 SetSummaryNodes(graph);
61 auto &summary_nodes = graph->summary_nodes();
62 std::map<std::string, std::pair<AnfNodePtr, int>> summary;
63 summary.insert(summary_nodes.cbegin(), summary_nodes.cend());
64 auto &child_graphs = graph->child_graph_order();
65 for (auto &child_graph : child_graphs) {
66 SetSummaryNodes(child_graph.lock().get());
67 auto &child_graph_summary = child_graph.lock()->summary_nodes();
68 summary.insert(child_graph_summary.cbegin(), child_graph_summary.cend());
69 RecurseSetSummaryNodesForAllGraphs(child_graph.lock().get());
70 }
71 graph->set_summary_nodes(summary);
72 MS_LOG(INFO) << "The total summary nodes is: " << summary.size() << " for graph: " << graph->graph_id();
73 }
74
SummaryTensor(KernelGraph * graph)75 void Summary::SummaryTensor(KernelGraph *graph) {
76 MS_EXCEPTION_IF_NULL(graph);
77 auto ms_context = MsContext::GetInstance();
78 MS_EXCEPTION_IF_NULL(ms_context);
79 std::string backend = ms_context->backend_policy();
80 if (backend == "ge") {
81 MS_LOG(INFO) << "This function should be skipped on GE backend.";
82 return;
83 }
84
85 if (summary_callback_ == nullptr) {
86 return;
87 }
88 MS_EXCEPTION_IF_NULL(graph);
89 bool exist_summary = graph->summary_node_exist();
90 if (!exist_summary) {
91 return;
92 }
93
94 auto summary_outputs = graph->summary_nodes();
95 std::map<std::string, tensor::TensorPtr> params_list;
96 // fetch outputs apply kernel in session & run callback functions
97 for (const auto &output_item : summary_outputs) {
98 auto node = output_item.second.first;
99 size_t index = IntToSize(output_item.second.second);
100 auto address = AnfAlgo::GetOutputAddr(node, index, false);
101 auto kt = AnfAlgo::GetOutputKernelTensor(node, index);
102 auto shape = kt->GetShapeVector();
103 TypeId type_id = kt->dtype_id();
104 tensor::TensorPtr tensor = std::make_shared<tensor::Tensor>(type_id, shape);
105 MS_EXCEPTION_IF_NULL(address);
106 if (!address->GetPtr()) {
107 continue;
108 }
109 if (!address->SyncDeviceToHost(trans::GetRuntimePaddingShape(node, index), LongToSize(tensor->data().nbytes()),
110 tensor->data_type(), tensor->data_c())) {
111 MS_LOG(ERROR) << "Failed to sync output from device to host.";
112 }
113 tensor->set_sync_status(kNoNeedSync);
114 params_list[output_item.first] = tensor;
115 }
116 // call callback function here
117 summary_callback_(0, params_list);
118 }
119
RegisterSummaryCallBackFunc(const CallBackFunc & callback)120 void Summary::RegisterSummaryCallBackFunc(const CallBackFunc &callback) { summary_callback_ = callback; }
121
SetSummaryNodes(KernelGraph * graph)122 void Summary::SetSummaryNodes(KernelGraph *graph) {
123 MS_LOG(DEBUG) << "Update summary Start";
124 MS_EXCEPTION_IF_NULL(graph);
125 auto ms_context = MsContext::GetInstance();
126 MS_EXCEPTION_IF_NULL(ms_context);
127 std::string backend = ms_context->backend_policy();
128 if (backend == "ge") {
129 MS_LOG(INFO) << "This function should be skipped on GE backend.";
130 return;
131 }
132 if (!graph->summary_node_exist()) {
133 return;
134 }
135 auto summary = graph->summary_nodes();
136 auto apply_list = TopoSort(graph->get_return());
137 for (auto &n : apply_list) {
138 MS_EXCEPTION_IF_NULL(n);
139 if (AnfAlgo::IsSummaryNode(n)) {
140 auto cnode = n->cast<CNodePtr>();
141 MS_EXCEPTION_IF_NULL(cnode);
142 if (cnode->size() <= kSummaryGetItem) {
143 MS_LOG(EXCEPTION) << "The node Summary should have 2 inputs at least, but got " << (cnode->size() - 1) << "."
144 << trace::DumpSourceLines(cnode);
145 }
146 auto node = cnode->input(kSummaryGetItem);
147 MS_EXCEPTION_IF_NULL(node);
148 auto item_with_index = common::AnfAlgo::VisitKernelWithReturnType(node, 0, false);
149 MS_EXCEPTION_IF_NULL(item_with_index.first);
150 if (!AnfUtils::IsRealKernel(item_with_index.first)) {
151 MS_LOG(EXCEPTION) << "Unexpected node:" << item_with_index.first->DebugString();
152 }
153 summary[GetSummaryNameWithTag(cnode)] = item_with_index;
154 }
155 }
156 graph->set_summary_nodes(summary);
157 MS_LOG(DEBUG) << "Update summary end size: " << summary.size();
158 }
159
160 } // namespace mindspore::debug
161