• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #ifndef MINDSPORE_CCSRC_BACKEND_SESSION_KERNEL_GRAPH_H
17 #define MINDSPORE_CCSRC_BACKEND_SESSION_KERNEL_GRAPH_H
18 
19 #include <vector>
20 #include <memory>
21 #include <utility>
22 #include <string>
23 #include <queue>
24 #include <map>
25 #include <unordered_map>
26 #include <set>
27 #include <unordered_set>
28 #include <stack>
29 #include <atomic>
30 #include "ir/func_graph.h"
31 #include "ir/anf.h"
32 #include "ir/graph_utils.h"
33 #include "utils/contract.h"
34 #include "runtime/device/kernel_info.h"
35 
36 namespace mindspore {
37 namespace session {
38 using AnfWithOutIndex = std::pair<AnfNodePtr, size_t>;
39 using KernelWithIndex = std::pair<AnfNodePtr, size_t>;
40 struct KernelWithIndexCmp {
operatorKernelWithIndexCmp41   bool operator()(const KernelWithIndex &key1, const KernelWithIndex &key2) const {
42     if (key1.first != key2.first) {
43       return key1.first < key2.first;
44     }
45     if (key1.second != key2.second) {
46       return key1.second < key2.second;
47     }
48     return false;
49   }
50 };
51 
52 using KernelMapTensor = std::map<session::KernelWithIndex, BaseRef, session::KernelWithIndexCmp>;
53 
54 class KernelGraph : public FuncGraph {
55  public:
KernelGraph()56   KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), current_epoch_(0), is_dynamic_shape_(false) {
57     inputs_ = std::make_shared<std::vector<AnfNodePtr>>();
58     execution_order_ = {};
59     mem_reuse_exec_order_ = {};
60     executable_ = true;
61     summary_node_exist_ = false;
62     stream_distinction_label_ = kInvalidDistincLabel;
63   }
64 
KernelGraph(const KernelGraph & graph)65   KernelGraph(const KernelGraph &graph) : FuncGraph(graph) {
66     inputs_ = graph.inputs_;
67     child_graph_result_ = graph.child_graph_result_;
68     execution_order_ = graph.execution_order_;
69     mem_reuse_exec_order_ = graph.mem_reuse_exec_order_;
70     graph_id_ = graph.graph_id_;
71     stream_distinction_label_ = graph.stream_distinction_label_;
72     front_backend_anf_map_ = graph.front_backend_anf_map_;
73     backend_front_anf_map_ = graph.backend_front_anf_map_;
74     tensor_to_value_node_map_ = graph.tensor_to_value_node_map_;
75     graph_value_nodes_ = graph.graph_value_nodes_;
76     node_input_num_ = graph.node_input_num_;
77     node_input_edges_ = graph.node_input_edges_;
78     ref_out_in_map_ = graph.ref_out_in_map_;
79     node_output_edges_ = graph.node_output_edges_;
80     summary_nodes_ = graph.summary_nodes_;
81     updated_parameters_ = graph.updated_parameters_;
82     executable_ = graph.executable_;
83     summary_node_exist_ = graph.summary_node_exist_;
84     valid_inputs_ = graph.valid_inputs_;
85     child_graph_order_ = graph.child_graph_order_;
86     input_ctrl_tensors_ = graph.input_ctrl_tensors_;
87     parent_graph_ = graph.parent_graph_;
88     start_label_ = graph.start_label_;
89     end_goto_ = graph.end_goto_;
90     internal_parameter_to_front_node_map_ = graph.internal_parameter_to_front_node_map_;
91     graph_output_to_front_node_map_ = graph.graph_output_to_front_node_map_;
92     front_to_internal_outputs_map_ = graph.front_to_internal_outputs_map_;
93     internal_outputs_to_front_map_ = graph.internal_outputs_to_front_map_;
94     internal_outputs_tensor_map_ = graph.internal_outputs_tensor_map_;
95     current_epoch_ = graph.current_epoch_;
96     tuple_parameter_to_make_tuple_map_ = graph.tuple_parameter_to_make_tuple_map_;
97     visited_nodes_ = graph.visited_nodes_;
98     edge_to_ = graph.edge_to_;
99     loop_nodes_ = graph.loop_nodes_;
100     input_nodes_ = graph.input_nodes_;
101     pre_graphs_ = graph.pre_graphs_;
102     post_graphs_ = graph.post_graphs_;
103     allreduce_from_send_recv_pairs_ = graph.allreduce_from_send_recv_pairs_;
104     allreduce_to_send_recv_pairs_ = graph.allreduce_to_send_recv_pairs_;
105     size_t pre_graph_finished_count = graph.pre_graph_finished_count_;
106     pre_graph_finished_count_ = pre_graph_finished_count;
107     size_t post_graph_finished_count = graph.post_graph_finished_count_;
108     post_graph_finished_count_ = post_graph_finished_count;
109     first_step_ = graph.first_step_;
110     has_optimizer_ = graph.has_optimizer_;
111     is_dynamic_shape_ = graph.is_dynamic_shape_;
112   }
113 
114   ~KernelGraph() override;
115 
116   MS_DECLARE_PARENT(KernelGraph, FuncGraph);
117 
118   const std::vector<AnfNodePtr> &inputs() const;
MutableInputs()119   std::vector<AnfNodePtr> *MutableInputs() const { return inputs_.get(); }
SetGraphInputs(const std::vector<AnfNodePtr> & inputs)120   void SetGraphInputs(const std::vector<AnfNodePtr> &inputs) {
121     inputs_ = std::make_shared<std::vector<AnfNodePtr>>(inputs);
122   }
123   void ReplaceGraphInput(const AnfNodePtr &old_parameter, const AnfNodePtr &new_parameter);
124   std::vector<AnfNodePtr> outputs() const;
125   CNodePtr NewCNode(const std::vector<AnfNodePtr> &inputs) override;
126   CNodePtr NewCNodeWithInfos(const std::vector<AnfNodePtr> &inputs, const CNodePtr &ori_cnode = nullptr);
127   void CreateKernelInfoFromNewParameter(const CNodePtr &cnode);
128   CNodePtr NewCNode(const CNodePtr &cnode);
129   void ResetAssignInputFeatureMapFlag(const CNodePtr &cnode) const;
130   ParameterPtr NewParameter(const ParameterPtr &parameter = nullptr);
131   ParameterPtr NewParameter(const abstract::AbstractBasePtr &abstract);
132   ValueNodePtr NewValueNode(const AbstractBasePtr &abstract, const ValuePtr &value);
133   ValueNodePtr NewValueNode(const ValueNodePtr &value_node = nullptr);
134   ValueNodePtr NewValueNode(const tensor::TensorPtr &input_tensor);
135   // trans tuple output to maketuple + no_tuple out
136   AnfNodePtr TransTupleToMakeTuple(const AnfNodePtr &node);
set_execution_order(const std::vector<CNodePtr> & order)137   void set_execution_order(const std::vector<CNodePtr> &order) { execution_order_ = order; }
set_execution_order(std::vector<CNodePtr> && order)138   void set_execution_order(std::vector<CNodePtr> &&order) { execution_order_ = std::move(order); }
execution_order()139   const std::vector<CNodePtr> &execution_order() const { return execution_order_; }
140   // Set new exec_order for mem_reuse
set_mem_reuse_exec_order(const std::vector<CNodePtr> & order)141   void set_mem_reuse_exec_order(const std::vector<CNodePtr> &order) { mem_reuse_exec_order_ = order; }
mem_reuse_exec_order()142   const std::vector<CNodePtr> &mem_reuse_exec_order() const { return mem_reuse_exec_order_; }
143   void SetExecOrderByDefault();
graph_id()144   uint32_t graph_id() const { return graph_id_; }
set_graph_id(uint32_t graph_id)145   void set_graph_id(uint32_t graph_id) { graph_id_ = graph_id; }
root_graph_id()146   uint32_t root_graph_id() const { return root_graph_id_; }
set_root_graph_id(uint32_t root_graph_id)147   void set_root_graph_id(uint32_t root_graph_id) { root_graph_id_ = root_graph_id; }
148 
149   // and a new front to backend anf relation to maop
150   void FrontBackendlMapAdd(const AnfNodePtr &front_anf, const AnfNodePtr &backend_anf);
151   // replace old backend anf with new backend anf
152   void FrontBackendlMapUpdate(const AnfNodePtr &old_backend_anf, const AnfNodePtr &new_backend_anf);
153   // get backend anf by front anf
154   AnfNodePtr GetBackendAnfByFrontAnf(const AnfNodePtr &front_anf);
155   // get front anf by backend anf
156   AnfNodePtr GetFrontAnfByBackendAnf(const AnfNodePtr &backend_anf);
157   // check backend node whether exist in map
158   bool BackendNodeExistInFrontBackendMap(const AnfNodePtr &backend_anf);
159   // get value node by tensor
160   ValueNodePtr GetValueNodeByTensor(const tensor::TensorPtr &tensor);
161   // add value node tensor relation map
162   void TensorValueNodeMapAdd(const tensor::TensorPtr &tensor, const ValueNodePtr &value_node);
163   // get all value nodes of graph
graph_value_nodes()164   const std::unordered_set<ValueNodePtr> graph_value_nodes() const { return graph_value_nodes_; }
165   // add value node to graph
166   void AddValueNodeToGraph(const ValueNodePtr &value_node);
167   // ref output is in map
168   bool IsInRefOutputMap(const AnfWithOutIndex &pair) const;
169   // get ref correspond pairs
170   AnfWithOutIndex GetRefCorrespondOutput(const AnfWithOutIndex &out_pair) const;
171   // add ref correspond pairs
172   void AddRefCorrespondPairs(const AnfWithOutIndex &final_pair, const AnfWithOutIndex &origin_pair);
173   // get map
GetRefMap()174   std::map<AnfWithOutIndex, AnfWithOutIndex> GetRefMap() const { return ref_out_in_map_; }
175   // check whether graph is executable
executable()176   bool executable() const { return executable_; }
177   // set executable of graph
set_executable(bool executable)178   void set_executable(bool executable) { executable_ = executable; }
179 #ifndef ENABLE_SECURITY
180   // set summary_node of graph
set_summary_node_exist(bool summary_node_exist)181   void set_summary_node_exist(bool summary_node_exist) { summary_node_exist_ = summary_node_exist; }
182 #endif
183   // check whether exist summary node in graph
summary_node_exist()184   bool summary_node_exist() const { return summary_node_exist_; }
185   // set invalid inputs for control sink
MutableValidInputs()186   std::vector<bool> *MutableValidInputs() { return &valid_inputs_; }
valid_inputs()187   std::vector<bool> valid_inputs() const { return valid_inputs_; }
188   // replace node in graph
189   void ReplaceNode(const AnfNodePtr &old_anf_node, const AnfNodePtr &new_anf_node);
190   // set stream label of graph
set_stream_distinction_label(uint32_t stream_label)191   void set_stream_distinction_label(uint32_t stream_label) { stream_distinction_label_ = stream_label; }
192   // get stream label of graph
stream_distinction_label()193   uint32_t stream_distinction_label() { return stream_distinction_label_; }
194   // refresh execute kernel stream label
195   void UpdateExecuteKernelStreamLabel();
196   // calculate the leaf graph order of root graph
197   std::vector<std::shared_ptr<KernelGraph>> GetLeafGraphOrder();
198   // the child graph of current graph
child_graph_order()199   const std::vector<std::weak_ptr<KernelGraph>> &child_graph_order() const { return child_graph_order_; }
set_child_graph_order(const std::vector<std::weak_ptr<KernelGraph>> & order)200   void set_child_graph_order(const std::vector<std::weak_ptr<KernelGraph>> &order) { child_graph_order_ = order; }
201   // checkout whether current graph is leaf graph
202   bool IsLeafGraph() const;
203 
204   // set input_tensors pointer of control parameter
set_input_ctrl_tensors(const std::shared_ptr<std::vector<tensor::TensorPtr>> & input_tensors_ptr)205   void set_input_ctrl_tensors(const std::shared_ptr<std::vector<tensor::TensorPtr>> &input_tensors_ptr) {
206     input_ctrl_tensors_ = input_tensors_ptr;
207   }
208   // get input_tensors pointer of control parameter
input_ctrl_tensors()209   std::shared_ptr<std::vector<tensor::TensorPtr>> input_ctrl_tensors() const { return input_ctrl_tensors_; }
210   // get parent kernel graph
parent_graph()211   std::weak_ptr<KernelGraph> parent_graph() const { return parent_graph_; }
212   // set parent kernel graph
set_parent_graph(const std::weak_ptr<KernelGraph> & parent_graph)213   void set_parent_graph(const std::weak_ptr<KernelGraph> &parent_graph) { parent_graph_ = parent_graph; }
214   // find anf node in graph
215   std::vector<CNodePtr> FindNodeByPrimitive(const PrimitivePtr &primitive) const;
216   std::vector<CNodePtr> FindNodeByPrimitive(const std::vector<PrimitivePtr> &primitive_list) const;
217   // used to dump ir
218   std::string ToString() const override;
219 
set_start_label(const CNodePtr & start_label)220   void set_start_label(const CNodePtr &start_label) { start_label_ = start_label; }
get_start_label()221   CNodePtr get_start_label() { return start_label_; }
set_end_goto(const CNodePtr & end_goto)222   void set_end_goto(const CNodePtr &end_goto) { end_goto_ = end_goto; }
get_end_goto()223   CNodePtr get_end_goto() { return end_goto_; }
224   void PrintGraphExecuteOrder() const;
summary_nodes()225   const std::map<std::string, std::pair<AnfNodePtr, int>> &summary_nodes() const { return summary_nodes_; }
set_summary_nodes(const std::map<std::string,std::pair<AnfNodePtr,int>> & nodes)226   void set_summary_nodes(const std::map<std::string, std::pair<AnfNodePtr, int>> &nodes) { summary_nodes_ = nodes; }
227   void AddInternalOutput(const AnfNodePtr &front_node, const AnfNodePtr &node, size_t output_idx, bool unique_target);
228   void ReplaceInternalOutput(const AnfNodePtr &node, const AnfNodePtr &new_node, size_t src_output_idx,
229                              size_t dst_output_idx);
230   void ReplaceInternalOutput(const AnfNodePtr &node, const AnfNodePtr &new_node);
231   AnfNodePtr GetInternalOutputByFrontNode(const AnfNodePtr &front_node) const;
232   bool IsInternalOutput(const AnfNodePtr &node, size_t output_idx) const;
233   bool IsInternalOutput(const AnfNodePtr &node) const;
234   bool IsUniqueTargetInternalOutput(const AnfNodePtr &node, size_t output_idx) const;
235   void AddInternalOutputTensor(const AnfNodePtr &node, size_t output_idx, const tensor::TensorPtr &tensor);
236   tensor::TensorPtr GetInternalOutputTensor(const AnfNodePtr &node, size_t output_idx);
237 
238   // Cache the internal parameter and corresponding to front node into internal_parameter_to_front_node_map_.
239   void CacheInternalParameterToFrontNode(const AnfNodePtr &parameter, const AnfWithOutIndex &front_node_with_index);
240   AnfWithOutIndex GetFrontNodeByInternalParameter(const AnfNodePtr &parameter) const;
241 
242   // Get the funcgraph to which the kernel graph belongs.
243   FuncGraphPtr GetFuncGraph();
244   // Cache the backend graph output nodes and corresponding to front nodes with output index into
245   // graph_output_to_front_node_map_.
246   void CacheGraphOutputToFrontNodeWithIndex(const AnfNodePtr &backend_graph_output, const AnfNodePtr &front_node);
247   AnfWithOutIndex GetFrontNodeWithIndexByGraphOutput(const AnfWithOutIndex &backend_graph_output_with_index) const;
248   // Update the related map of backend graph output nodes by modified backend output nodes.
249   void UpdateGraphOutputMap(const std::vector<AnfWithOutIndex> &old_outputs,
250                             const std::vector<AnfWithOutIndex> &new_outputs);
251 
current_epoch()252   uint32_t current_epoch() const { return current_epoch_; }
set_current_epoch(uint32_t epoch)253   void set_current_epoch(uint32_t epoch) { current_epoch_ = epoch; }
254   void UpdateChildGraphOrder();
child_graph_result()255   const std::vector<AnfNodePtr> &child_graph_result() const { return child_graph_result_; }
AddChildGraphResult(const AnfNodePtr & parameter)256   void AddChildGraphResult(const AnfNodePtr &parameter) { child_graph_result_.push_back(parameter); }
set_child_graph_result(const std::vector<AnfNodePtr> & child_graph_result)257   void set_child_graph_result(const std::vector<AnfNodePtr> &child_graph_result) {
258     child_graph_result_ = child_graph_result;
259   }
260 
InsertTupleParameterToMakeTupleMap(const AnfNodePtr & param,const AnfNodePtr & make_tuple)261   void InsertTupleParameterToMakeTupleMap(const AnfNodePtr &param, const AnfNodePtr &make_tuple) {
262     if (tuple_parameter_to_make_tuple_map_.find(param) != tuple_parameter_to_make_tuple_map_.end()) {
263       return;
264     }
265     tuple_parameter_to_make_tuple_map_[param] = make_tuple;
266   }
FindTupleParameterToMakeTupleMap(const AnfNodePtr & param)267   AnfNodePtr FindTupleParameterToMakeTupleMap(const AnfNodePtr &param) {
268     if (tuple_parameter_to_make_tuple_map_.find(param) != tuple_parameter_to_make_tuple_map_.end()) {
269       return tuple_parameter_to_make_tuple_map_[param];
270     } else {
271       return nullptr;
272     }
273   }
274   void RemoveNodeFromGraph(const AnfNodePtr &node);
275   void UpdateGraphDynamicAttr();
is_dynamic_shape()276   bool is_dynamic_shape() const { return is_dynamic_shape_; }
277   void SetOptimizerFlag();
278   void SetInputNodes();
input_nodes()279   const std::vector<AnfNodePtr> &input_nodes() const { return input_nodes_; }
SetInputTensors(const std::vector<tensor::TensorPtr> & input_tensors)280   void SetInputTensors(const std::vector<tensor::TensorPtr> &input_tensors) { input_tensors_ = input_tensors; }
input_tensors()281   const std::vector<tensor::TensorPtr> &input_tensors() const { return input_tensors_; }
282 
SetOutputNodeToTensor(const KernelMapTensor & node_to_tensor)283   void SetOutputNodeToTensor(const KernelMapTensor &node_to_tensor) { output_node_to_tensor_ = node_to_tensor; }
284 
GetNodeOutputTensor(const session::KernelWithIndex & output_index)285   tensor::TensorPtr GetNodeOutputTensor(const session::KernelWithIndex &output_index) const {
286     auto iter = output_node_to_tensor_.find(output_index);
287     if (iter != output_node_to_tensor_.end()) {
288       return utils::cast<tensor::TensorPtr>(iter->second);
289     }
290     return nullptr;
291   }
292 
has_optimizer()293   bool has_optimizer() const { return has_optimizer_; }
IsUpdatedParameter(const ParameterPtr & param)294   bool IsUpdatedParameter(const ParameterPtr &param) const {
295     if (updated_parameters_.find(param) != updated_parameters_.end()) {
296       return true;
297     }
298     return false;
299   }
300   // handle graph dependency
AddPreGraph(const std::shared_ptr<session::KernelGraph> & graph)301   void AddPreGraph(const std::shared_ptr<session::KernelGraph> &graph) {
302     if (graph != nullptr) {
303       pre_graphs_[graph->graph_id()] = graph;
304     }
305   }
AddPostGraph(const std::shared_ptr<session::KernelGraph> & graph)306   void AddPostGraph(const std::shared_ptr<session::KernelGraph> &graph) {
307     if (graph != nullptr) {
308       post_graphs_[graph->graph_id()] = graph;
309     }
310   }
311 
IsPreGraphFinished()312   bool IsPreGraphFinished() const { return pre_graphs_.size() == pre_graph_finished_count_; }
IsPostGraphFinished()313   bool IsPostGraphFinished() const {
314     if (first_step_) {
315       return true;
316     }
317     return post_graphs_.size() == post_graph_finished_count_;
318   }
319 
HasPostGraph()320   bool HasPostGraph() const { return !post_graphs_.empty(); }
321 
IncPreGraphFinishedCount()322   void IncPreGraphFinishedCount() { pre_graph_finished_count_++; }
IncPostGraphFinishedCount()323   void IncPostGraphFinishedCount() { post_graph_finished_count_++; }
ResetGraphRunningStatus()324   void ResetGraphRunningStatus() {
325     first_step_ = false;
326     post_graph_finished_count_ = 0;
327     pre_graph_finished_count_ = 0;
328   }
OnRunGraphFinished()329   void OnRunGraphFinished() {
330     for (auto post_graph : post_graphs_) {
331       auto post_graph_ptr = post_graph.second.lock();
332       if (post_graph_ptr != nullptr) {
333         post_graph_ptr->IncPreGraphFinishedCount();
334       }
335     }
336     for (auto pre_graph : pre_graphs_) {
337       auto pre_graph_ptr = pre_graph.second.lock();
338       if (pre_graph_ptr != nullptr) {
339         pre_graph_ptr->IncPostGraphFinishedCount();
340       }
341     }
342   }
343   // end of handle graph dependency
344 
345   // The interface of allreduce send/recv pairs map.
InsertFromSendRecvPair(const CNodePtr & allreduce,const std::pair<CNodePtr,CNodePtr> & send_recv_pair)346   void InsertFromSendRecvPair(const CNodePtr &allreduce, const std::pair<CNodePtr, CNodePtr> &send_recv_pair) {
347     allreduce_from_send_recv_pairs_[allreduce] = send_recv_pair;
348   }
InsertToSendRecvPair(const CNodePtr & allreduce,const std::pair<CNodePtr,CNodePtr> & send_recv_pair)349   void InsertToSendRecvPair(const CNodePtr &allreduce, const std::pair<CNodePtr, CNodePtr> &send_recv_pair) {
350     allreduce_to_send_recv_pairs_[allreduce] = send_recv_pair;
351   }
allreduce_from_send_recv_pairs()352   const std::unordered_map<CNodePtr, std::pair<CNodePtr, CNodePtr>> &allreduce_from_send_recv_pairs() const {
353     return allreduce_from_send_recv_pairs_;
354   }
allreduce_to_send_recv_pairs()355   const std::unordered_map<CNodePtr, std::pair<CNodePtr, CNodePtr>> &allreduce_to_send_recv_pairs() const {
356     return allreduce_to_send_recv_pairs_;
357   }
358 
label_num()359   uint32_t label_num() const { return label_num_; }
set_label_num(uint32_t num)360   void set_label_num(uint32_t num) { label_num_ = num; }
361   // The graphs has recursion.
recursive_call()362   bool recursive_call() const { return has_recursive_call_; }
363   // The graphs has subgraph multi-call.
subgraph_multi_call()364   bool subgraph_multi_call() const { return has_subgraph_multicall_; }
365   // set flag to indicate whether has recursion.
set_recursive_call(bool flag)366   void set_recursive_call(bool flag) { has_recursive_call_ = flag; }
367   // set flag to indicate whether has multi-call.
set_subgraph_multi_call(bool flag)368   void set_subgraph_multi_call(bool flag) { has_subgraph_multicall_ = flag; }
369 
is_all_nop_node()370   bool is_all_nop_node() const { return is_all_nop_node_; }
set_is_all_nop_node(bool is_all_nop_node)371   void set_is_all_nop_node(bool is_all_nop_node) { is_all_nop_node_ = is_all_nop_node; }
graph_output_map()372   std::map<AnfWithOutIndex, AnfWithOutIndex> graph_output_map() { return graph_output_to_front_node_map_; }
373 
374   // The interface to set/get the graph GIL flag.
set_is_need_gil(bool flag)375   void set_is_need_gil(bool flag) { is_need_gil_ = flag; }
is_need_gil()376   bool is_need_gil() { return is_need_gil_; }
377 
378   bool IsDatasetGraph() const;
379 
380  private:
381   // remove value node form graph
382   bool RemoveValueNodeFromGraph(const ValueNodePtr &value_node);
383   void SetKernelInfoForNode(const AnfNodePtr &node) const;
384   AnfNodePtr MakeValueNode(const AnfNodePtr &node) const;
385   void EnqueueActiveNodes(const AnfNodePtr &node, std::queue<AnfNodePtr> *visit_queue,
386                           std::unordered_set<AnfNodePtr> *visited_nodes, bool comm_first = true);
387   // update node edge list
388   void UpdateNodeEdgeList(std::queue<AnfNodePtr> *seed_nodes);
389   // add node depend edge by data edge
390   void AddDependEdge(const AnfNodePtr &node, const AnfNodePtr &input, size_t depend_edge_num);
391   std::vector<AnfNodePtr> GetOutputNodes(const AnfNodePtr &node);
392   AnfNodePtr TransValueNodeTuple(const AbstractBasePtr &abstract, const ValuePtr &value);
393   AnfNodePtr TransParameterTuple(const AbstractBasePtr &abstract);
394   AnfNodePtr TransCNodeTuple(const CNodePtr &node);
395   AnfNodePtr CreatTupleGetItemNode(const AnfNodePtr &node, size_t output_idx);
396   std::vector<CNodePtr> SortStartLabelAndEndGoto();
397   // checkout whether loop exist in graph
398   void CheckLoop();
399   uint32_t GetLoopNum(const std::map<AnfNodePtr, size_t> &none_zero_nodes);
400   void GetLoopNodesByDFS(const AnfNodePtr &node, uint32_t *loop_num);
401 
402   // members
403   std::shared_ptr<std::vector<AnfNodePtr>> inputs_;
404   std::vector<AnfNodePtr> child_graph_result_;
405   std::vector<CNodePtr> execution_order_;
406   std::vector<CNodePtr> mem_reuse_exec_order_;
407   uint32_t graph_id_;
408   uint32_t stream_distinction_label_;
409   uint32_t root_graph_id_{0};
410 
411   // record map bettween front anf and backend anf,use two map implement bidirectional map
412   std::unordered_map<AnfNodePtr, AnfNodePtr> front_backend_anf_map_;
413   std::unordered_map<AnfNodePtr, AnfNodePtr> backend_front_anf_map_;
414   // there may be a tensor from ME backend ,a value ndoe will be create according the tensor,map record
415   std::unordered_map<tensor::TensorPtr, ValueNodePtr> tensor_to_value_node_map_;
416   // include all value nodes
417   std::unordered_set<ValueNodePtr> graph_value_nodes_;
418   std::unordered_map<AnfNodePtr, size_t> node_input_num_;
419   std::unordered_map<AnfNodePtr, std::vector<std::pair<AnfNodePtr, size_t>>> node_input_edges_;
420   // record map between ref final output anf with index and ref origin input with index
421   std::map<AnfWithOutIndex, AnfWithOutIndex> ref_out_in_map_;
422   std::unordered_map<AnfNodePtr, std::vector<std::pair<AnfNodePtr, size_t>>> node_output_edges_;
423   std::map<std::string, std::pair<AnfNodePtr, int>> summary_nodes_;
424   // parameters that will be updated when graph is executed
425   std::unordered_set<ParameterPtr> updated_parameters_;
426   // graph needn't execute
427   bool executable_{false};
428   // exist summary node in graph
429   bool summary_node_exist_{false};
430   // valid inputs
431   std::vector<bool> valid_inputs_;
432 
433   // child graph execute order in parent graph
434   std::vector<std::weak_ptr<KernelGraph>> child_graph_order_;
435 
436   // input_tensors of control parameter
437   std::shared_ptr<std::vector<tensor::TensorPtr>> input_ctrl_tensors_;
438 
439   // parameter graph
440   std::weak_ptr<KernelGraph> parent_graph_;
441 
442   CNodePtr start_label_;
443   CNodePtr end_goto_;
444 
445   // Internal parameter is not the origin parameter of func graph, it is the output of previous kernel graph which is
446   // related to the input of this kernel graph. The first of unordered map is the input of this kernel graph, the second
447   // of unordered map is front node corresponding to the output of previous kernel graph.
448   std::unordered_map<AnfNodePtr, AnfWithOutIndex> internal_parameter_to_front_node_map_;
449   // The first of map is the backend graph output of this kernel graph, the second of map is front node corresponding to
450   // the backend node with index.
451   std::map<AnfWithOutIndex, AnfWithOutIndex> graph_output_to_front_node_map_;
452 
453   std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_;
454   std::unordered_map<AnfNodePtr, std::unordered_map<size_t, std::pair<AnfNodePtr, bool>>>
455     internal_outputs_to_front_map_;
456   std::unordered_map<AnfNodePtr, std::unordered_map<size_t, tensor::TensorPtr>> internal_outputs_tensor_map_;
457   uint32_t current_epoch_;
458   std::unordered_map<AnfNodePtr, AnfNodePtr> tuple_parameter_to_make_tuple_map_;
459   std::set<AnfNodePtr> visited_nodes_;
460   std::map<AnfNodePtr, AnfNodePtr> edge_to_;
461   std::stack<AnfNodePtr> loop_nodes_;
462   std::vector<AnfNodePtr> input_nodes_;
463   std::vector<tensor::TensorPtr> input_tensors_;
464   KernelMapTensor output_node_to_tensor_;
465   std::unordered_map<uint32_t, std::weak_ptr<session::KernelGraph>> pre_graphs_;
466   std::unordered_map<uint32_t, std::weak_ptr<session::KernelGraph>> post_graphs_;
467   // The send/recv pairs inserted for allreduce, the key is allreduce kernel, the first of pair is send node, the second
468   // of pair is recv node.
469   std::unordered_map<CNodePtr, std::pair<CNodePtr, CNodePtr>> allreduce_from_send_recv_pairs_;
470   std::unordered_map<CNodePtr, std::pair<CNodePtr, CNodePtr>> allreduce_to_send_recv_pairs_;
471   std::atomic<size_t> pre_graph_finished_count_{0};
472   std::atomic<size_t> post_graph_finished_count_{0};
473   bool first_step_{true};
474   bool has_optimizer_{false};
475   bool is_dynamic_shape_{false};
476 
477   // Indicate the graphs has recursion or multi-call or not as the root graph.
478   bool has_recursive_call_{false};
479   bool has_subgraph_multicall_{false};
480 
481   // Number of labels. This is also the 'batch_num' for DavinciModel,
482   // It should be 1 if no labels used for control flow.
483   uint32_t label_num_ = 1;
484 
485   // If all the nodes of graph is the nop node.
486   bool is_all_nop_node_{false};
487 
488   // Indicate whether the kernels in the graphs acquire Python GIL.
489   bool is_need_gil_{false};
490 };
491 }  // namespace session
492 using KernelGraphPtr = std::shared_ptr<session::KernelGraph>;
493 }  // namespace mindspore
494 #endif  // MINDSPORE_CCSRC_BACKEND_SESSION_KERNEL_GRAPH_H
495