1 /**
2 * Copyright 2024 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "frontend/parallel/pass/begin_end_overlap_inline.h"
18 #include <memory>
19 #include <list>
20 #include "mindspore/core/ops/framework_ops.h"
21 #include "frontend/parallel/ops_info/ops_utils.h"
22 #include "frontend/parallel/step_parallel_utils.h"
23 #include "abstract/abstract_function.h"
24 #include "ir/func_graph_cloner.h"
25
26 namespace mindspore {
27 namespace parallel {
28 namespace {
IsLazyInlineBackward(const FuncGraphPtr & bg)29 bool IsLazyInlineBackward(const FuncGraphPtr &bg) {
30 for (auto &entry : bg->func_graph_cnodes_index()) {
31 auto cnode = entry.first->first->cast<CNodePtr>();
32 auto index = entry.first->second;
33 if (index == 1 && IsPrimitive(cnode->inputs().at(0), prim::kPrimPartial)) {
34 // To find real calling.
35 auto fg = cnode->func_graph();
36 MS_EXCEPTION_IF_NULL(fg);
37 if (fg->has_attr(FUNC_GRAPH_FLAG_NO_INLINE)) {
38 return true;
39 } else {
40 return false;
41 }
42 }
43 }
44 return false;
45 }
46
GetAbstractFunc(const CNodePtr & node)47 FuncGraphPtr GetAbstractFunc(const CNodePtr &node) {
48 if (node->input(0)->isa<CNode>() && node->input(0)->abstract() != nullptr) {
49 auto abs = node->input(0)->abstract();
50 if (abs->isa<abstract::FuncGraphAbstractClosure>()) {
51 const auto &abstract_func_graph = abs->cast<abstract::FuncGraphAbstractClosurePtr>();
52 return abstract_func_graph->func_graph();
53 } else if (abs->isa<abstract::PartialAbstractClosure>()) {
54 const auto &abstract_partial_func = abs->cast<abstract::PartialAbstractClosurePtr>();
55 const auto &abstract_fn = abstract_partial_func->fn();
56 if (abstract_fn->isa<abstract::FuncGraphAbstractClosure>()) {
57 const auto &abstract_func_graph = abstract_fn->cast<abstract::FuncGraphAbstractClosurePtr>();
58 return abstract_func_graph->func_graph();
59 }
60 }
61 }
62 return nullptr;
63 }
64
InlineExpandFuncGraph(const CNodePtr & expanding_node,const FuncGraphPtr & expanded_graph)65 void InlineExpandFuncGraph(const CNodePtr &expanding_node, const FuncGraphPtr &expanded_graph) {
66 auto main_graph = expanding_node->func_graph();
67 auto mng = main_graph->manager();
68 if (mng == nullptr) {
69 mng = Manage(main_graph, true);
70 main_graph->set_manager(mng);
71 }
72 MS_EXCEPTION_IF_NULL(expanding_node);
73 AnfNodePtrList inp(expanding_node->inputs().begin() + 1, expanding_node->inputs().end());
74 // expand bg node from partial
75 auto out = InlineClone(expanded_graph, main_graph, inp, expanding_node);
76 (void)mng->Replace(expanding_node, out);
77 }
78
79 // expand bg node from partial
InlineExpandPartialFuncGraph(const CNodePtr & expanding_node,const FuncGraphPtr & expanded_graph,const AnfNodePtrList & partial_params)80 void InlineExpandPartialFuncGraph(const CNodePtr &expanding_node, const FuncGraphPtr &expanded_graph,
81 const AnfNodePtrList &partial_params) {
82 auto main_graph = expanding_node->func_graph();
83 auto mng = main_graph->manager();
84 if (mng == nullptr) {
85 mng = Manage(main_graph, true);
86 main_graph->set_manager(mng);
87 }
88 MS_EXCEPTION_IF_NULL(expanding_node);
89 AnfNodePtrList inp(expanding_node->inputs().begin() + 1, expanding_node->inputs().end());
90 (void)inp.insert(inp.begin(), partial_params.begin(), partial_params.end());
91 auto out = InlineClone(expanded_graph, main_graph, inp, expanding_node);
92 (void)mng->Replace(expanding_node, out);
93 }
94
SkipBeginEndOverlapInline(const FuncGraphPtr & graph,FuncGraphPtr * fg,FuncGraphPtr * bg,CNodePtrList * fg_call,CNodePtrList * bg_call)95 bool SkipBeginEndOverlapInline(const FuncGraphPtr &graph, FuncGraphPtr *fg, FuncGraphPtr *bg, CNodePtrList *fg_call,
96 CNodePtrList *bg_call) {
97 std::list<CNodePtr> graph_orders = graph->GetOrderedCnodes();
98 for (auto &node : graph_orders) {
99 MS_EXCEPTION_IF_NULL(node);
100 if (IsValueNode<FuncGraph>(node->input(0))) {
101 FuncGraphPtr sub_graph = node->input(0)->cast<ValueNodePtr>()->value()->cast<FuncGraphPtr>();
102 MS_EXCEPTION_IF_NULL(sub_graph);
103 if (sub_graph->has_attr(FUNC_GRAPH_FLAG_NO_INLINE)) {
104 (void)fg_call->emplace_back(node);
105 *fg = sub_graph;
106 }
107 } else {
108 auto func = GetAbstractFunc(node);
109 if (func != nullptr && IsLazyInlineBackward(func)) {
110 *bg = func;
111 (void)bg_call->emplace_back(node);
112 }
113 }
114 }
115 constexpr size_t mini_micro_size = 2;
116 return fg_call->size() < mini_micro_size || bg_call->size() < mini_micro_size;
117 }
118
119 } // namespace
120
BeginEndOverlapInlineOpt(const FuncGraphPtr & graph)121 void BeginEndOverlapInlineOpt(const FuncGraphPtr &graph) {
122 if (parallel::g_device_manager == nullptr) {
123 return;
124 }
125 MS_LOG(INFO) << "Begin end overlap inline start.";
126 // find micro fg call
127 FuncGraphPtr bg;
128 FuncGraphPtr fg;
129 CNodePtrList fg_call;
130 CNodePtrList bg_call;
131 if (SkipBeginEndOverlapInline(graph, &fg, &bg, &fg_call, &bg_call)) {
132 return;
133 }
134
135 // Inline the last micro fg
136 InlineExpandFuncGraph(fg_call.back(), fg);
137 // Inline the last micro bg
138 AnfNodePtrList last_micro_bg_partial_params;
139 CNodePtr last_micro_bg_partial_call;
140 for (auto &entry : bg->func_graph_cnodes_index()) {
141 auto cnode = entry.first->first->cast<CNodePtr>();
142 auto index = entry.first->second;
143 if (index == 1 && IsPrimitive(cnode->inputs().at(0), prim::kPrimPartial)) {
144 // The partial node is in the root graph after last micro forward inline
145 if (graph == cnode->func_graph()) {
146 last_micro_bg_partial_call = cnode;
147 (void)last_micro_bg_partial_params.insert(last_micro_bg_partial_params.begin(),
148 cnode->inputs().begin() + kIndex2, cnode->inputs().end());
149 break;
150 }
151 }
152 }
153 InlineExpandPartialFuncGraph(bg_call.back(), bg, last_micro_bg_partial_params);
154
155 // Inline the first micro fg
156 InlineExpandFuncGraph(fg_call[0], fg);
157 AnfNodePtrList first_micro_bg_partial_params;
158 for (auto &entry : bg->func_graph_cnodes_index()) {
159 auto cnode = entry.first->first->cast<CNodePtr>();
160 auto index = entry.first->second;
161 if (index == 1 && IsPrimitive(cnode->inputs().at(0), prim::kPrimPartial)) {
162 // The partial node is in the root graph after first micro forward inline.
163 MS_EXCEPTION_IF_NULL(fg);
164 if (graph == cnode->func_graph() && cnode != last_micro_bg_partial_call) {
165 (void)first_micro_bg_partial_params.insert(first_micro_bg_partial_params.begin(),
166 cnode->inputs().begin() + kIndex2, cnode->inputs().end());
167 break;
168 }
169 }
170 }
171 // Inline the first micro bg
172 InlineExpandPartialFuncGraph(bg_call[0], bg, first_micro_bg_partial_params);
173 }
174 } // namespace parallel
175 } // namespace mindspore
176