1 /** 2 * Copyright 2019-2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_BACKEND_SESSION_KERNEL_GRAPH_H 17 #define MINDSPORE_CCSRC_BACKEND_SESSION_KERNEL_GRAPH_H 18 19 #include <vector> 20 #include <memory> 21 #include <utility> 22 #include <string> 23 #include <queue> 24 #include <map> 25 #include <unordered_map> 26 #include <set> 27 #include <unordered_set> 28 #include <stack> 29 #include <atomic> 30 #include "ir/func_graph.h" 31 #include "ir/anf.h" 32 #include "ir/graph_utils.h" 33 #include "utils/contract.h" 34 #include "runtime/device/kernel_info.h" 35 36 namespace mindspore { 37 namespace session { 38 using AnfWithOutIndex = std::pair<AnfNodePtr, size_t>; 39 using KernelWithIndex = std::pair<AnfNodePtr, size_t>; 40 struct KernelWithIndexCmp { operatorKernelWithIndexCmp41 bool operator()(const KernelWithIndex &key1, const KernelWithIndex &key2) const { 42 if (key1.first != key2.first) { 43 return key1.first < key2.first; 44 } 45 if (key1.second != key2.second) { 46 return key1.second < key2.second; 47 } 48 return false; 49 } 50 }; 51 52 using KernelMapTensor = std::map<session::KernelWithIndex, BaseRef, session::KernelWithIndexCmp>; 53 54 class KernelGraph : public FuncGraph { 55 public: KernelGraph()56 KernelGraph() : graph_id_(0), start_label_(nullptr), end_goto_(nullptr), current_epoch_(0), is_dynamic_shape_(false) { 57 inputs_ = std::make_shared<std::vector<AnfNodePtr>>(); 58 execution_order_ = {}; 59 mem_reuse_exec_order_ = {}; 60 executable_ = true; 61 summary_node_exist_ = false; 62 stream_distinction_label_ = kInvalidDistincLabel; 63 } 64 KernelGraph(const KernelGraph & graph)65 KernelGraph(const KernelGraph &graph) : FuncGraph(graph) { 66 inputs_ = graph.inputs_; 67 child_graph_result_ = graph.child_graph_result_; 68 execution_order_ = graph.execution_order_; 69 mem_reuse_exec_order_ = graph.mem_reuse_exec_order_; 70 graph_id_ = graph.graph_id_; 71 stream_distinction_label_ = graph.stream_distinction_label_; 72 front_backend_anf_map_ = graph.front_backend_anf_map_; 73 backend_front_anf_map_ = graph.backend_front_anf_map_; 74 tensor_to_value_node_map_ = graph.tensor_to_value_node_map_; 75 graph_value_nodes_ = graph.graph_value_nodes_; 76 node_input_num_ = graph.node_input_num_; 77 node_input_edges_ = graph.node_input_edges_; 78 ref_out_in_map_ = graph.ref_out_in_map_; 79 node_output_edges_ = graph.node_output_edges_; 80 summary_nodes_ = graph.summary_nodes_; 81 updated_parameters_ = graph.updated_parameters_; 82 executable_ = graph.executable_; 83 summary_node_exist_ = graph.summary_node_exist_; 84 valid_inputs_ = graph.valid_inputs_; 85 child_graph_order_ = graph.child_graph_order_; 86 input_ctrl_tensors_ = graph.input_ctrl_tensors_; 87 parent_graph_ = graph.parent_graph_; 88 start_label_ = graph.start_label_; 89 end_goto_ = graph.end_goto_; 90 internal_parameter_to_front_node_map_ = graph.internal_parameter_to_front_node_map_; 91 graph_output_to_front_node_map_ = graph.graph_output_to_front_node_map_; 92 front_to_internal_outputs_map_ = graph.front_to_internal_outputs_map_; 93 internal_outputs_to_front_map_ = graph.internal_outputs_to_front_map_; 94 internal_outputs_tensor_map_ = graph.internal_outputs_tensor_map_; 95 current_epoch_ = graph.current_epoch_; 96 tuple_parameter_to_make_tuple_map_ = graph.tuple_parameter_to_make_tuple_map_; 97 visited_nodes_ = graph.visited_nodes_; 98 edge_to_ = graph.edge_to_; 99 loop_nodes_ = graph.loop_nodes_; 100 input_nodes_ = graph.input_nodes_; 101 pre_graphs_ = graph.pre_graphs_; 102 post_graphs_ = graph.post_graphs_; 103 allreduce_from_send_recv_pairs_ = graph.allreduce_from_send_recv_pairs_; 104 allreduce_to_send_recv_pairs_ = graph.allreduce_to_send_recv_pairs_; 105 size_t pre_graph_finished_count = graph.pre_graph_finished_count_; 106 pre_graph_finished_count_ = pre_graph_finished_count; 107 size_t post_graph_finished_count = graph.post_graph_finished_count_; 108 post_graph_finished_count_ = post_graph_finished_count; 109 first_step_ = graph.first_step_; 110 has_optimizer_ = graph.has_optimizer_; 111 is_dynamic_shape_ = graph.is_dynamic_shape_; 112 } 113 114 ~KernelGraph() override; 115 116 MS_DECLARE_PARENT(KernelGraph, FuncGraph); 117 118 const std::vector<AnfNodePtr> &inputs() const; MutableInputs()119 std::vector<AnfNodePtr> *MutableInputs() const { return inputs_.get(); } SetGraphInputs(const std::vector<AnfNodePtr> & inputs)120 void SetGraphInputs(const std::vector<AnfNodePtr> &inputs) { 121 inputs_ = std::make_shared<std::vector<AnfNodePtr>>(inputs); 122 } 123 void ReplaceGraphInput(const AnfNodePtr &old_parameter, const AnfNodePtr &new_parameter); 124 std::vector<AnfNodePtr> outputs() const; 125 CNodePtr NewCNode(const std::vector<AnfNodePtr> &inputs) override; 126 CNodePtr NewCNodeWithInfos(const std::vector<AnfNodePtr> &inputs, const CNodePtr &ori_cnode = nullptr); 127 void CreateKernelInfoFromNewParameter(const CNodePtr &cnode); 128 CNodePtr NewCNode(const CNodePtr &cnode); 129 void ResetAssignInputFeatureMapFlag(const CNodePtr &cnode) const; 130 ParameterPtr NewParameter(const ParameterPtr ¶meter = nullptr); 131 ParameterPtr NewParameter(const abstract::AbstractBasePtr &abstract); 132 ValueNodePtr NewValueNode(const AbstractBasePtr &abstract, const ValuePtr &value); 133 ValueNodePtr NewValueNode(const ValueNodePtr &value_node = nullptr); 134 ValueNodePtr NewValueNode(const tensor::TensorPtr &input_tensor); 135 // trans tuple output to maketuple + no_tuple out 136 AnfNodePtr TransTupleToMakeTuple(const AnfNodePtr &node); set_execution_order(const std::vector<CNodePtr> & order)137 void set_execution_order(const std::vector<CNodePtr> &order) { execution_order_ = order; } set_execution_order(std::vector<CNodePtr> && order)138 void set_execution_order(std::vector<CNodePtr> &&order) { execution_order_ = std::move(order); } execution_order()139 const std::vector<CNodePtr> &execution_order() const { return execution_order_; } 140 // Set new exec_order for mem_reuse set_mem_reuse_exec_order(const std::vector<CNodePtr> & order)141 void set_mem_reuse_exec_order(const std::vector<CNodePtr> &order) { mem_reuse_exec_order_ = order; } mem_reuse_exec_order()142 const std::vector<CNodePtr> &mem_reuse_exec_order() const { return mem_reuse_exec_order_; } 143 void SetExecOrderByDefault(); graph_id()144 uint32_t graph_id() const { return graph_id_; } set_graph_id(uint32_t graph_id)145 void set_graph_id(uint32_t graph_id) { graph_id_ = graph_id; } root_graph_id()146 uint32_t root_graph_id() const { return root_graph_id_; } set_root_graph_id(uint32_t root_graph_id)147 void set_root_graph_id(uint32_t root_graph_id) { root_graph_id_ = root_graph_id; } 148 149 // and a new front to backend anf relation to maop 150 void FrontBackendlMapAdd(const AnfNodePtr &front_anf, const AnfNodePtr &backend_anf); 151 // replace old backend anf with new backend anf 152 void FrontBackendlMapUpdate(const AnfNodePtr &old_backend_anf, const AnfNodePtr &new_backend_anf); 153 // get backend anf by front anf 154 AnfNodePtr GetBackendAnfByFrontAnf(const AnfNodePtr &front_anf); 155 // get front anf by backend anf 156 AnfNodePtr GetFrontAnfByBackendAnf(const AnfNodePtr &backend_anf); 157 // check backend node whether exist in map 158 bool BackendNodeExistInFrontBackendMap(const AnfNodePtr &backend_anf); 159 // get value node by tensor 160 ValueNodePtr GetValueNodeByTensor(const tensor::TensorPtr &tensor); 161 // add value node tensor relation map 162 void TensorValueNodeMapAdd(const tensor::TensorPtr &tensor, const ValueNodePtr &value_node); 163 // get all value nodes of graph graph_value_nodes()164 const std::unordered_set<ValueNodePtr> graph_value_nodes() const { return graph_value_nodes_; } 165 // add value node to graph 166 void AddValueNodeToGraph(const ValueNodePtr &value_node); 167 // ref output is in map 168 bool IsInRefOutputMap(const AnfWithOutIndex &pair) const; 169 // get ref correspond pairs 170 AnfWithOutIndex GetRefCorrespondOutput(const AnfWithOutIndex &out_pair) const; 171 // add ref correspond pairs 172 void AddRefCorrespondPairs(const AnfWithOutIndex &final_pair, const AnfWithOutIndex &origin_pair); 173 // get map GetRefMap()174 std::map<AnfWithOutIndex, AnfWithOutIndex> GetRefMap() const { return ref_out_in_map_; } 175 // check whether graph is executable executable()176 bool executable() const { return executable_; } 177 // set executable of graph set_executable(bool executable)178 void set_executable(bool executable) { executable_ = executable; } 179 #ifndef ENABLE_SECURITY 180 // set summary_node of graph set_summary_node_exist(bool summary_node_exist)181 void set_summary_node_exist(bool summary_node_exist) { summary_node_exist_ = summary_node_exist; } 182 #endif 183 // check whether exist summary node in graph summary_node_exist()184 bool summary_node_exist() const { return summary_node_exist_; } 185 // set invalid inputs for control sink MutableValidInputs()186 std::vector<bool> *MutableValidInputs() { return &valid_inputs_; } valid_inputs()187 std::vector<bool> valid_inputs() const { return valid_inputs_; } 188 // replace node in graph 189 void ReplaceNode(const AnfNodePtr &old_anf_node, const AnfNodePtr &new_anf_node); 190 // set stream label of graph set_stream_distinction_label(uint32_t stream_label)191 void set_stream_distinction_label(uint32_t stream_label) { stream_distinction_label_ = stream_label; } 192 // get stream label of graph stream_distinction_label()193 uint32_t stream_distinction_label() { return stream_distinction_label_; } 194 // refresh execute kernel stream label 195 void UpdateExecuteKernelStreamLabel(); 196 // calculate the leaf graph order of root graph 197 std::vector<std::shared_ptr<KernelGraph>> GetLeafGraphOrder(); 198 // the child graph of current graph child_graph_order()199 const std::vector<std::weak_ptr<KernelGraph>> &child_graph_order() const { return child_graph_order_; } set_child_graph_order(const std::vector<std::weak_ptr<KernelGraph>> & order)200 void set_child_graph_order(const std::vector<std::weak_ptr<KernelGraph>> &order) { child_graph_order_ = order; } 201 // checkout whether current graph is leaf graph 202 bool IsLeafGraph() const; 203 204 // set input_tensors pointer of control parameter set_input_ctrl_tensors(const std::shared_ptr<std::vector<tensor::TensorPtr>> & input_tensors_ptr)205 void set_input_ctrl_tensors(const std::shared_ptr<std::vector<tensor::TensorPtr>> &input_tensors_ptr) { 206 input_ctrl_tensors_ = input_tensors_ptr; 207 } 208 // get input_tensors pointer of control parameter input_ctrl_tensors()209 std::shared_ptr<std::vector<tensor::TensorPtr>> input_ctrl_tensors() const { return input_ctrl_tensors_; } 210 // get parent kernel graph parent_graph()211 std::weak_ptr<KernelGraph> parent_graph() const { return parent_graph_; } 212 // set parent kernel graph set_parent_graph(const std::weak_ptr<KernelGraph> & parent_graph)213 void set_parent_graph(const std::weak_ptr<KernelGraph> &parent_graph) { parent_graph_ = parent_graph; } 214 // find anf node in graph 215 std::vector<CNodePtr> FindNodeByPrimitive(const PrimitivePtr &primitive) const; 216 std::vector<CNodePtr> FindNodeByPrimitive(const std::vector<PrimitivePtr> &primitive_list) const; 217 // used to dump ir 218 std::string ToString() const override; 219 set_start_label(const CNodePtr & start_label)220 void set_start_label(const CNodePtr &start_label) { start_label_ = start_label; } get_start_label()221 CNodePtr get_start_label() { return start_label_; } set_end_goto(const CNodePtr & end_goto)222 void set_end_goto(const CNodePtr &end_goto) { end_goto_ = end_goto; } get_end_goto()223 CNodePtr get_end_goto() { return end_goto_; } 224 void PrintGraphExecuteOrder() const; summary_nodes()225 const std::map<std::string, std::pair<AnfNodePtr, int>> &summary_nodes() const { return summary_nodes_; } set_summary_nodes(const std::map<std::string,std::pair<AnfNodePtr,int>> & nodes)226 void set_summary_nodes(const std::map<std::string, std::pair<AnfNodePtr, int>> &nodes) { summary_nodes_ = nodes; } 227 void AddInternalOutput(const AnfNodePtr &front_node, const AnfNodePtr &node, size_t output_idx, bool unique_target); 228 void ReplaceInternalOutput(const AnfNodePtr &node, const AnfNodePtr &new_node, size_t src_output_idx, 229 size_t dst_output_idx); 230 void ReplaceInternalOutput(const AnfNodePtr &node, const AnfNodePtr &new_node); 231 AnfNodePtr GetInternalOutputByFrontNode(const AnfNodePtr &front_node) const; 232 bool IsInternalOutput(const AnfNodePtr &node, size_t output_idx) const; 233 bool IsInternalOutput(const AnfNodePtr &node) const; 234 bool IsUniqueTargetInternalOutput(const AnfNodePtr &node, size_t output_idx) const; 235 void AddInternalOutputTensor(const AnfNodePtr &node, size_t output_idx, const tensor::TensorPtr &tensor); 236 tensor::TensorPtr GetInternalOutputTensor(const AnfNodePtr &node, size_t output_idx); 237 238 // Cache the internal parameter and corresponding to front node into internal_parameter_to_front_node_map_. 239 void CacheInternalParameterToFrontNode(const AnfNodePtr ¶meter, const AnfWithOutIndex &front_node_with_index); 240 AnfWithOutIndex GetFrontNodeByInternalParameter(const AnfNodePtr ¶meter) const; 241 242 // Get the funcgraph to which the kernel graph belongs. 243 FuncGraphPtr GetFuncGraph(); 244 // Cache the backend graph output nodes and corresponding to front nodes with output index into 245 // graph_output_to_front_node_map_. 246 void CacheGraphOutputToFrontNodeWithIndex(const AnfNodePtr &backend_graph_output, const AnfNodePtr &front_node); 247 AnfWithOutIndex GetFrontNodeWithIndexByGraphOutput(const AnfWithOutIndex &backend_graph_output_with_index) const; 248 // Update the related map of backend graph output nodes by modified backend output nodes. 249 void UpdateGraphOutputMap(const std::vector<AnfWithOutIndex> &old_outputs, 250 const std::vector<AnfWithOutIndex> &new_outputs); 251 current_epoch()252 uint32_t current_epoch() const { return current_epoch_; } set_current_epoch(uint32_t epoch)253 void set_current_epoch(uint32_t epoch) { current_epoch_ = epoch; } 254 void UpdateChildGraphOrder(); child_graph_result()255 const std::vector<AnfNodePtr> &child_graph_result() const { return child_graph_result_; } AddChildGraphResult(const AnfNodePtr & parameter)256 void AddChildGraphResult(const AnfNodePtr ¶meter) { child_graph_result_.push_back(parameter); } set_child_graph_result(const std::vector<AnfNodePtr> & child_graph_result)257 void set_child_graph_result(const std::vector<AnfNodePtr> &child_graph_result) { 258 child_graph_result_ = child_graph_result; 259 } 260 InsertTupleParameterToMakeTupleMap(const AnfNodePtr & param,const AnfNodePtr & make_tuple)261 void InsertTupleParameterToMakeTupleMap(const AnfNodePtr ¶m, const AnfNodePtr &make_tuple) { 262 if (tuple_parameter_to_make_tuple_map_.find(param) != tuple_parameter_to_make_tuple_map_.end()) { 263 return; 264 } 265 tuple_parameter_to_make_tuple_map_[param] = make_tuple; 266 } FindTupleParameterToMakeTupleMap(const AnfNodePtr & param)267 AnfNodePtr FindTupleParameterToMakeTupleMap(const AnfNodePtr ¶m) { 268 if (tuple_parameter_to_make_tuple_map_.find(param) != tuple_parameter_to_make_tuple_map_.end()) { 269 return tuple_parameter_to_make_tuple_map_[param]; 270 } else { 271 return nullptr; 272 } 273 } 274 void RemoveNodeFromGraph(const AnfNodePtr &node); 275 void UpdateGraphDynamicAttr(); is_dynamic_shape()276 bool is_dynamic_shape() const { return is_dynamic_shape_; } 277 void SetOptimizerFlag(); 278 void SetInputNodes(); input_nodes()279 const std::vector<AnfNodePtr> &input_nodes() const { return input_nodes_; } SetInputTensors(const std::vector<tensor::TensorPtr> & input_tensors)280 void SetInputTensors(const std::vector<tensor::TensorPtr> &input_tensors) { input_tensors_ = input_tensors; } input_tensors()281 const std::vector<tensor::TensorPtr> &input_tensors() const { return input_tensors_; } 282 SetOutputNodeToTensor(const KernelMapTensor & node_to_tensor)283 void SetOutputNodeToTensor(const KernelMapTensor &node_to_tensor) { output_node_to_tensor_ = node_to_tensor; } 284 GetNodeOutputTensor(const session::KernelWithIndex & output_index)285 tensor::TensorPtr GetNodeOutputTensor(const session::KernelWithIndex &output_index) const { 286 auto iter = output_node_to_tensor_.find(output_index); 287 if (iter != output_node_to_tensor_.end()) { 288 return utils::cast<tensor::TensorPtr>(iter->second); 289 } 290 return nullptr; 291 } 292 has_optimizer()293 bool has_optimizer() const { return has_optimizer_; } IsUpdatedParameter(const ParameterPtr & param)294 bool IsUpdatedParameter(const ParameterPtr ¶m) const { 295 if (updated_parameters_.find(param) != updated_parameters_.end()) { 296 return true; 297 } 298 return false; 299 } 300 // handle graph dependency AddPreGraph(const std::shared_ptr<session::KernelGraph> & graph)301 void AddPreGraph(const std::shared_ptr<session::KernelGraph> &graph) { 302 if (graph != nullptr) { 303 pre_graphs_[graph->graph_id()] = graph; 304 } 305 } AddPostGraph(const std::shared_ptr<session::KernelGraph> & graph)306 void AddPostGraph(const std::shared_ptr<session::KernelGraph> &graph) { 307 if (graph != nullptr) { 308 post_graphs_[graph->graph_id()] = graph; 309 } 310 } 311 IsPreGraphFinished()312 bool IsPreGraphFinished() const { return pre_graphs_.size() == pre_graph_finished_count_; } IsPostGraphFinished()313 bool IsPostGraphFinished() const { 314 if (first_step_) { 315 return true; 316 } 317 return post_graphs_.size() == post_graph_finished_count_; 318 } 319 HasPostGraph()320 bool HasPostGraph() const { return !post_graphs_.empty(); } 321 IncPreGraphFinishedCount()322 void IncPreGraphFinishedCount() { pre_graph_finished_count_++; } IncPostGraphFinishedCount()323 void IncPostGraphFinishedCount() { post_graph_finished_count_++; } ResetGraphRunningStatus()324 void ResetGraphRunningStatus() { 325 first_step_ = false; 326 post_graph_finished_count_ = 0; 327 pre_graph_finished_count_ = 0; 328 } OnRunGraphFinished()329 void OnRunGraphFinished() { 330 for (auto post_graph : post_graphs_) { 331 auto post_graph_ptr = post_graph.second.lock(); 332 if (post_graph_ptr != nullptr) { 333 post_graph_ptr->IncPreGraphFinishedCount(); 334 } 335 } 336 for (auto pre_graph : pre_graphs_) { 337 auto pre_graph_ptr = pre_graph.second.lock(); 338 if (pre_graph_ptr != nullptr) { 339 pre_graph_ptr->IncPostGraphFinishedCount(); 340 } 341 } 342 } 343 // end of handle graph dependency 344 345 // The interface of allreduce send/recv pairs map. InsertFromSendRecvPair(const CNodePtr & allreduce,const std::pair<CNodePtr,CNodePtr> & send_recv_pair)346 void InsertFromSendRecvPair(const CNodePtr &allreduce, const std::pair<CNodePtr, CNodePtr> &send_recv_pair) { 347 allreduce_from_send_recv_pairs_[allreduce] = send_recv_pair; 348 } InsertToSendRecvPair(const CNodePtr & allreduce,const std::pair<CNodePtr,CNodePtr> & send_recv_pair)349 void InsertToSendRecvPair(const CNodePtr &allreduce, const std::pair<CNodePtr, CNodePtr> &send_recv_pair) { 350 allreduce_to_send_recv_pairs_[allreduce] = send_recv_pair; 351 } allreduce_from_send_recv_pairs()352 const std::unordered_map<CNodePtr, std::pair<CNodePtr, CNodePtr>> &allreduce_from_send_recv_pairs() const { 353 return allreduce_from_send_recv_pairs_; 354 } allreduce_to_send_recv_pairs()355 const std::unordered_map<CNodePtr, std::pair<CNodePtr, CNodePtr>> &allreduce_to_send_recv_pairs() const { 356 return allreduce_to_send_recv_pairs_; 357 } 358 label_num()359 uint32_t label_num() const { return label_num_; } set_label_num(uint32_t num)360 void set_label_num(uint32_t num) { label_num_ = num; } 361 // The graphs has recursion. recursive_call()362 bool recursive_call() const { return has_recursive_call_; } 363 // The graphs has subgraph multi-call. subgraph_multi_call()364 bool subgraph_multi_call() const { return has_subgraph_multicall_; } 365 // set flag to indicate whether has recursion. set_recursive_call(bool flag)366 void set_recursive_call(bool flag) { has_recursive_call_ = flag; } 367 // set flag to indicate whether has multi-call. set_subgraph_multi_call(bool flag)368 void set_subgraph_multi_call(bool flag) { has_subgraph_multicall_ = flag; } 369 is_all_nop_node()370 bool is_all_nop_node() const { return is_all_nop_node_; } set_is_all_nop_node(bool is_all_nop_node)371 void set_is_all_nop_node(bool is_all_nop_node) { is_all_nop_node_ = is_all_nop_node; } graph_output_map()372 std::map<AnfWithOutIndex, AnfWithOutIndex> graph_output_map() { return graph_output_to_front_node_map_; } 373 374 // The interface to set/get the graph GIL flag. set_is_need_gil(bool flag)375 void set_is_need_gil(bool flag) { is_need_gil_ = flag; } is_need_gil()376 bool is_need_gil() { return is_need_gil_; } 377 378 bool IsDatasetGraph() const; 379 380 private: 381 // remove value node form graph 382 bool RemoveValueNodeFromGraph(const ValueNodePtr &value_node); 383 void SetKernelInfoForNode(const AnfNodePtr &node) const; 384 AnfNodePtr MakeValueNode(const AnfNodePtr &node) const; 385 void EnqueueActiveNodes(const AnfNodePtr &node, std::queue<AnfNodePtr> *visit_queue, 386 std::unordered_set<AnfNodePtr> *visited_nodes, bool comm_first = true); 387 // update node edge list 388 void UpdateNodeEdgeList(std::queue<AnfNodePtr> *seed_nodes); 389 // add node depend edge by data edge 390 void AddDependEdge(const AnfNodePtr &node, const AnfNodePtr &input, size_t depend_edge_num); 391 std::vector<AnfNodePtr> GetOutputNodes(const AnfNodePtr &node); 392 AnfNodePtr TransValueNodeTuple(const AbstractBasePtr &abstract, const ValuePtr &value); 393 AnfNodePtr TransParameterTuple(const AbstractBasePtr &abstract); 394 AnfNodePtr TransCNodeTuple(const CNodePtr &node); 395 AnfNodePtr CreatTupleGetItemNode(const AnfNodePtr &node, size_t output_idx); 396 std::vector<CNodePtr> SortStartLabelAndEndGoto(); 397 // checkout whether loop exist in graph 398 void CheckLoop(); 399 uint32_t GetLoopNum(const std::map<AnfNodePtr, size_t> &none_zero_nodes); 400 void GetLoopNodesByDFS(const AnfNodePtr &node, uint32_t *loop_num); 401 402 // members 403 std::shared_ptr<std::vector<AnfNodePtr>> inputs_; 404 std::vector<AnfNodePtr> child_graph_result_; 405 std::vector<CNodePtr> execution_order_; 406 std::vector<CNodePtr> mem_reuse_exec_order_; 407 uint32_t graph_id_; 408 uint32_t stream_distinction_label_; 409 uint32_t root_graph_id_{0}; 410 411 // record map bettween front anf and backend anf,use two map implement bidirectional map 412 std::unordered_map<AnfNodePtr, AnfNodePtr> front_backend_anf_map_; 413 std::unordered_map<AnfNodePtr, AnfNodePtr> backend_front_anf_map_; 414 // there may be a tensor from ME backend ,a value ndoe will be create according the tensor,map record 415 std::unordered_map<tensor::TensorPtr, ValueNodePtr> tensor_to_value_node_map_; 416 // include all value nodes 417 std::unordered_set<ValueNodePtr> graph_value_nodes_; 418 std::unordered_map<AnfNodePtr, size_t> node_input_num_; 419 std::unordered_map<AnfNodePtr, std::vector<std::pair<AnfNodePtr, size_t>>> node_input_edges_; 420 // record map between ref final output anf with index and ref origin input with index 421 std::map<AnfWithOutIndex, AnfWithOutIndex> ref_out_in_map_; 422 std::unordered_map<AnfNodePtr, std::vector<std::pair<AnfNodePtr, size_t>>> node_output_edges_; 423 std::map<std::string, std::pair<AnfNodePtr, int>> summary_nodes_; 424 // parameters that will be updated when graph is executed 425 std::unordered_set<ParameterPtr> updated_parameters_; 426 // graph needn't execute 427 bool executable_{false}; 428 // exist summary node in graph 429 bool summary_node_exist_{false}; 430 // valid inputs 431 std::vector<bool> valid_inputs_; 432 433 // child graph execute order in parent graph 434 std::vector<std::weak_ptr<KernelGraph>> child_graph_order_; 435 436 // input_tensors of control parameter 437 std::shared_ptr<std::vector<tensor::TensorPtr>> input_ctrl_tensors_; 438 439 // parameter graph 440 std::weak_ptr<KernelGraph> parent_graph_; 441 442 CNodePtr start_label_; 443 CNodePtr end_goto_; 444 445 // Internal parameter is not the origin parameter of func graph, it is the output of previous kernel graph which is 446 // related to the input of this kernel graph. The first of unordered map is the input of this kernel graph, the second 447 // of unordered map is front node corresponding to the output of previous kernel graph. 448 std::unordered_map<AnfNodePtr, AnfWithOutIndex> internal_parameter_to_front_node_map_; 449 // The first of map is the backend graph output of this kernel graph, the second of map is front node corresponding to 450 // the backend node with index. 451 std::map<AnfWithOutIndex, AnfWithOutIndex> graph_output_to_front_node_map_; 452 453 std::unordered_map<AnfNodePtr, AnfNodePtr> front_to_internal_outputs_map_; 454 std::unordered_map<AnfNodePtr, std::unordered_map<size_t, std::pair<AnfNodePtr, bool>>> 455 internal_outputs_to_front_map_; 456 std::unordered_map<AnfNodePtr, std::unordered_map<size_t, tensor::TensorPtr>> internal_outputs_tensor_map_; 457 uint32_t current_epoch_; 458 std::unordered_map<AnfNodePtr, AnfNodePtr> tuple_parameter_to_make_tuple_map_; 459 std::set<AnfNodePtr> visited_nodes_; 460 std::map<AnfNodePtr, AnfNodePtr> edge_to_; 461 std::stack<AnfNodePtr> loop_nodes_; 462 std::vector<AnfNodePtr> input_nodes_; 463 std::vector<tensor::TensorPtr> input_tensors_; 464 KernelMapTensor output_node_to_tensor_; 465 std::unordered_map<uint32_t, std::weak_ptr<session::KernelGraph>> pre_graphs_; 466 std::unordered_map<uint32_t, std::weak_ptr<session::KernelGraph>> post_graphs_; 467 // The send/recv pairs inserted for allreduce, the key is allreduce kernel, the first of pair is send node, the second 468 // of pair is recv node. 469 std::unordered_map<CNodePtr, std::pair<CNodePtr, CNodePtr>> allreduce_from_send_recv_pairs_; 470 std::unordered_map<CNodePtr, std::pair<CNodePtr, CNodePtr>> allreduce_to_send_recv_pairs_; 471 std::atomic<size_t> pre_graph_finished_count_{0}; 472 std::atomic<size_t> post_graph_finished_count_{0}; 473 bool first_step_{true}; 474 bool has_optimizer_{false}; 475 bool is_dynamic_shape_{false}; 476 477 // Indicate the graphs has recursion or multi-call or not as the root graph. 478 bool has_recursive_call_{false}; 479 bool has_subgraph_multicall_{false}; 480 481 // Number of labels. This is also the 'batch_num' for DavinciModel, 482 // It should be 1 if no labels used for control flow. 483 uint32_t label_num_ = 1; 484 485 // If all the nodes of graph is the nop node. 486 bool is_all_nop_node_{false}; 487 488 // Indicate whether the kernels in the graphs acquire Python GIL. 489 bool is_need_gil_{false}; 490 }; 491 } // namespace session 492 using KernelGraphPtr = std::shared_ptr<session::KernelGraph>; 493 } // namespace mindspore 494 #endif // MINDSPORE_CCSRC_BACKEND_SESSION_KERNEL_GRAPH_H 495