1 /**
2 * Copyright 2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "plugin/device/ascend/hal/hardware/ge_graph_optimization.h"
18 #include <string>
19 #include <memory>
20 #include "ops/framework_ops.h"
21 #include "include/common/utils/anfalgo.h"
22 #include "backend/common/optimizer/common_backend_optimization.h"
23 #include "backend/common/graph_kernel/graph_kernel_flags.h"
24 #include "backend/common/graph_kernel/adapter/graph_kernel_optimization.h"
25 #include "plugin/device/ascend/optimizer/ge_backend_optimization.h"
26 #include "plugin/device/ascend/optimizer/backend_common_unify_mindir.h"
27 #include "include/backend/anf_runtime_algorithm.h"
28 #include "include/backend/debug/profiler/profiling.h"
29 #ifndef ENABLE_SECURITY
30 #include "include/common/debug/dump_proto.h"
31 #endif
32
33 namespace mindspore {
34 namespace device {
35 namespace ascend {
36 namespace {
MarkRefGraph(const KernelGraphPtr & kernel_graph)37 void MarkRefGraph(const KernelGraphPtr &kernel_graph) {
38 MS_EXCEPTION_IF_NULL(kernel_graph);
39 MS_LOG(INFO) << "Mark graph is ref graph: " << kernel_graph->graph_id();
40 auto ms_context = MsContext::GetInstance();
41 MS_EXCEPTION_IF_NULL(ms_context);
42 auto is_kbk = ms_context->IsKByKExecutorMode();
43 auto manager = kernel_graph->manager();
44 if (manager == nullptr || kernel_graph->has_attr(kIsRefGraph)) {
45 return;
46 }
47 for (const auto &node : TopoSort(kernel_graph->get_return(), SuccDeeperSimple, AlwaysInclude)) {
48 if (!node->isa<CNode>()) {
49 continue;
50 }
51 auto cnode = node->cast<CNodePtr>();
52 if (cnode == nullptr) {
53 continue;
54 }
55 auto is_side_effect = common::AnfAlgo::HasNodeAttr(GRAPH_FLAG_SIDE_EFFECT_MEM, cnode) &&
56 common::AnfAlgo::GetNodeAttr<bool>(cnode, GRAPH_FLAG_SIDE_EFFECT_MEM);
57 if (!(is_side_effect && cnode->fullname_with_scope().find("optimizer") != std::string::npos)) {
58 continue;
59 }
60 for (const auto &node_pair : manager->node_users()[cnode]) {
61 if (IsPrimitiveCNode(node_pair.first, prim::kPrimUpdateState)) {
62 kernel_graph->set_attr(kIsRefGraph, MakeValue(true));
63 MS_LOG(INFO) << "graph is ref graph: " << kernel_graph->graph_id();
64 if (!is_kbk) {
65 return;
66 }
67 common::AnfAlgo::SetNodeAttr(kFromRefGraph, MakeValue(true), cnode);
68 break;
69 }
70 }
71 }
72 }
73 } // namespace
74
OptimizeGEGraph(const KernelGraphPtr & graph,std::set<KernelGraphPtr> * const memo)75 void GEGraphOptimization::OptimizeGEGraph(const KernelGraphPtr &graph, std::set<KernelGraphPtr> *const memo) {
76 MS_EXCEPTION_IF_NULL(graph);
77 MS_EXCEPTION_IF_NULL(memo);
78 if (memo->find(graph) != memo->end()) {
79 return;
80 }
81 memo->insert(graph);
82 MS_LOG(DEBUG) << "Status record: start optimize ge graph. graph id: " << graph->graph_id();
83 // empty graph dont entry to backend
84 if (graph->execution_order().empty()) {
85 MS_LOG(DEBUG) << graph->ToString() << " is empty graph.";
86 AnfAlgo::InsertMakeTupleForOutput(NOT_NULL(graph));
87 graph->set_executable(false);
88 MS_LOG(DEBUG) << "Status record: end optimize ge graph. graph id: " << graph->graph_id();
89 }
90 MarkRefGraph(graph);
91 opt::GEBackendOptimizeACL(graph);
92 opt::GEBackendOptimization(graph);
93 if (const auto &gk = graphkernel::GraphKernelFlags::GetInstance(); gk.IsEnableGraphKernel()) {
94 if (gk.kernel_generator != "DVM") {
95 graphkernel::GraphKernelOptimize(graph);
96 graph->SetExecOrderByDefault();
97 } else {
98 MS_LOG(WARNING) << "In ge graph, GraphKernel fusion is not supported for the DVM kernel_generator.";
99 }
100 }
101 for (auto &child_graph : graph->child_graph_order()) {
102 OptimizeGEGraph(child_graph.lock(), memo);
103 }
104 MS_LOG(DEBUG) << "Status record: end optimize ge graph. graph id: " << graph->graph_id();
105 }
106
OptimizeACLGraph(const KernelGraphPtr & graph,std::set<KernelGraphPtr> * const memo)107 void GEGraphOptimization::OptimizeACLGraph(const KernelGraphPtr &graph, std::set<KernelGraphPtr> *const memo) {
108 MS_EXCEPTION_IF_NULL(graph);
109 MS_EXCEPTION_IF_NULL(memo);
110 if (memo->find(graph) != memo->end()) {
111 return;
112 }
113 memo->insert(graph);
114 MS_LOG(DEBUG) << "Status record: start optimize acl graph. graph id: " << graph->graph_id();
115 // empty graph dont entry to backend
116 if (graph->execution_order().empty()) {
117 MS_LOG(DEBUG) << graph->ToString() << " is empty graph.";
118 AnfAlgo::InsertMakeTupleForOutput(NOT_NULL(graph));
119 graph->set_executable(false);
120 MS_LOG(DEBUG) << "Status record: end optimize acl graph. graph id: " << graph->graph_id();
121 }
122 MarkRefGraph(graph);
123 opt::AscendUnfoldInputsForSpecialNodes(graph);
124 opt::GEBackendOptimizeACL(graph);
125 for (auto &child_graph : graph->child_graph_order()) {
126 OptimizeACLGraph(child_graph.lock(), memo);
127 }
128 MS_LOG(DEBUG) << "Status record: end optimize acl graph. graph id: " << graph->graph_id();
129 }
130
OptimizeACLGraphAfterKernelSelect(const KernelGraphPtr & graph,std::set<KernelGraphPtr> * const memo)131 void GEGraphOptimization::OptimizeACLGraphAfterKernelSelect(const KernelGraphPtr &graph,
132 std::set<KernelGraphPtr> *const memo) {
133 MS_EXCEPTION_IF_NULL(graph);
134 MS_EXCEPTION_IF_NULL(memo);
135 if (memo->find(graph) != memo->end()) {
136 return;
137 }
138 memo->insert(graph);
139 MS_LOG(DEBUG) << "Status record: start optimize acl graph after kernel select. graph id: " << graph->graph_id();
140 // empty graph dont entry to backend
141 if (graph->execution_order().empty()) {
142 MS_LOG(DEBUG) << graph->ToString() << " is empty graph.";
143 AnfAlgo::InsertMakeTupleForOutput(NOT_NULL(graph));
144 graph->set_executable(false);
145 MS_LOG(DEBUG) << "Status record: end optimize acl graph after kernel select. graph id: " << graph->graph_id();
146 }
147 if (!graph->is_from_single_op() && graphkernel::GraphKernelFlags::GetInstance().IsEnableGraphKernel()) {
148 graphkernel::GraphKernelOptimize(graph);
149 graph->SetExecOrderByDefault();
150 }
151 opt::GEBackendOptimizeACLAfterKernelSelect(graph);
152 for (auto &child_graph : graph->child_graph_order()) {
153 OptimizeACLGraphAfterKernelSelect(child_graph.lock(), memo);
154 }
155 if (!graph->is_from_single_op() && graphkernel::GraphKernelFlags::GetInstance().IsEnableKernelPacket()) {
156 graphkernel::KernelPacketOptimize(graph);
157 graph->SetExecOrderByDefault();
158 }
159 MS_LOG(DEBUG) << "Status record: end optimize acl graph after kernel select. graph id: " << graph->graph_id();
160 }
161
OptimizeACLGraphAfterInline(const KernelGraphPtr & graph)162 void GEGraphOptimization::OptimizeACLGraphAfterInline(const KernelGraphPtr &graph) {
163 MS_EXCEPTION_IF_NULL(graph);
164 MS_LOG(DEBUG) << "Status record: start optimize acl graph after inline. graph id: " << graph->graph_id();
165 // empty graph dont entry to backend
166 if (graph->execution_order().empty()) {
167 MS_LOG(DEBUG) << graph->ToString() << " is empty graph.";
168 AnfAlgo::InsertMakeTupleForOutput(NOT_NULL(graph));
169 graph->set_executable(false);
170 MS_LOG(DEBUG) << "Status record: end optimize acl graph after inline. graph id: " << graph->graph_id();
171 }
172 opt::GEAfterInlineOptimize(graph);
173 MS_LOG(DEBUG) << "Status record: end optimize acl graph after inline. graph id: " << graph->graph_id();
174 }
175
UnifyMindIR(const KernelGraphPtr & graph)176 void GEGraphOptimization::UnifyMindIR(const KernelGraphPtr &graph) {
177 MS_EXCEPTION_IF_NULL(graph);
178 MS_LOG(INFO) << "Status record: start unify mindir. graph id: " << graph->graph_id();
179 profiler::CollectHostInfo("Ascend", "Graph Optimization", "UnifyMindIR", 0, 0, 0);
180 PROF_START(unify_mindir);
181 opt::CommonUnifyMindIR(graph);
182 opt::GEUnifyMindIR(graph);
183 PROF_END(unify_mindir);
184 profiler::CollectHostInfo("Ascend", "Graph Optimization", "UnifyMindIR", 0, 0, 1);
185 MS_LOG(INFO) << "Status record: end unify mindir. graph id: " << graph->graph_id();
186 }
187
GEMindIRPass(const KernelGraphPtr & graph) const188 void GEGraphOptimization::GEMindIRPass(const KernelGraphPtr &graph) const { opt::GEUnifyMindIR(graph); }
189 } // namespace ascend
190 } // namespace device
191 } // namespace mindspore
192