• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_GRAPH_COMPILER_H_
18 #define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_GRAPH_COMPILER_H_
19 
20 #include <vector>
21 #include <memory>
22 #include <string>
23 #include <utility>
24 #include <map>
25 #include <set>
26 #include "utils/hash_map.h"
27 #include "runtime/hardware/device_context.h"
28 #include "runtime/graph_scheduler/actor/actor_common.h"
29 #include "runtime/graph_scheduler/control_node_parser.h"
30 #include "backend/common/session/session_basic.h"
31 #include "backend/common/session/session_factory.h"
32 #include "ir/tensor.h"
33 #include "include/backend/visible.h"
34 #include "kernel/framework_utils.h"
35 
36 namespace mindspore {
37 using device::DeviceContext;
38 using session::BackendOpRunInfo;
39 using session::CallBackFunc;
40 using session::GraphOutputInfo;
41 using session::InputInfo;
42 using session::KernelGraph;
43 using session::KernelWithIndex;
44 using tensor::TensorPtr;
45 
46 const char kModelNameRuntime[] = "Runtime";
47 const char kEventDeviceInit[] = "DeviceInit";
48 const char kEventCompileGraph[] = "CompileGraph";
49 const char kEventRunGraph[] = "RunGraph";
50 const char kStageDeviceInit[] = "DeviceInit";
51 const char kStageCompileGraphs[] = "CompileGraphs";
52 const char kStageGraphPartition[] = "GraphPartition";
53 const char kStageConstructKernelGraph[] = "ConstructKernelGraph";
54 const char kStageOptimizeGraph[] = "OptimizeGraph";
55 const char kStageCreateKernel[] = "CreateKernel";
56 const char kStageGraphTransform[] = "GraphTransform";
57 const char kStageBuild[] = "Build";
58 const char kStageLink[] = "Link";
59 const char kStageOptimize[] = "Optimize";
60 const char kStageRunGraph[] = "RunGraph";
61 const char kStageGetInputs[] = "GetInputs";
62 const char kStageRun[] = "Run";
63 const char kStageConstructOutputs[] = "ConstructOutputs";
64 namespace runtime {
65 // Position of kernel with index, the value pair<branch_id, vector<pos>> means the branch id of the kernel and the pos
66 // of the kernel. Generally, there is only one branch, and the branch id is 0 at this time. In control flow, there are
67 // multiple branch scenarios, and pos represents the position of the kernel in the branch.
68 using KernelMapPosition = std::map<KernelWithIndex, std::vector<size_t>, session::KernelWithIndexCmp>;
69 
70 // The graph compiler info generated by graph compiler is the express of executable graph.
71 // The device context is unified interface of interaction with device of corresponding graph.
72 // The tensors mask is used to distinguish input tensor's type.
73 // The input tensor is used to link graphs in the dynamic build scenario.
74 // The control node is used to link graphs in the control flow scenario.
75 // The control node parser is used to parse the edge info in control nodes.
76 // The origin parameters order is used to correspond to the input args.
77 // The origin outputs order is used to correspond to the output args.
78 // The need_erase means need erase this GraphCompilerInfo object after run actor set.
79 struct BACKEND_EXPORT GraphCompilerInfo {
GraphCompilerInfoGraphCompilerInfo80   GraphCompilerInfo(const std::vector<KernelGraphPtr> &graphs, const std::vector<DeviceContext *> &device_contexts,
81                     const std::vector<std::vector<int64_t> *> &tensors_mask,
82                     const std::vector<std::vector<TensorPtr> *> &input_tensors,
83                     const std::vector<AnfNodePtr> &control_nodes,
84                     const std::vector<AnfNodePtr> &origin_parameters_order, const ControlNodeParserPtr &parser,
85                     const KernelMapPosition &origin_outputs_order, size_t outputs_num, size_t inputs_num,
86                     const std::string &name, bool need_erase, GraphExecutionStrategy strategy, CompileFunc compile_func)
87       : graphs_(graphs),
88         device_contexts_(device_contexts),
89         tensors_mask_(tensors_mask),
90         input_tensors_(input_tensors),
91         control_nodes_(control_nodes),
92         control_node_parser_(parser),
93         origin_parameters_order_(origin_parameters_order),
94         origin_outputs_order_(origin_outputs_order),
95         outputs_num_(outputs_num),
96         inputs_num_(inputs_num),
97         name_(name),
98         need_erase_(need_erase),
99         exist_flatten_concat_(false),
100         strategy_(strategy),
101         compile_func_(std::move(compile_func)) {}
102   ~GraphCompilerInfo();
103   std::vector<KernelGraphPtr> graphs_;
104   std::vector<DeviceContext *> device_contexts_;
105   std::vector<std::vector<int64_t> *> tensors_mask_;
106   std::vector<std::vector<TensorPtr> *> input_tensors_;
107   std::vector<AnfNodePtr> control_nodes_;
108   ControlNodeParserPtr control_node_parser_;
109   std::vector<AnfNodePtr> origin_parameters_order_;
110   mutable mindspore::HashMap<AnfNodePtr, std::vector<std::pair<KernelWithIndex, KernelWithIndex>>>
111     origin_parameters_to_backend_parameters_;
112   KernelMapPosition origin_outputs_order_;
113   size_t outputs_num_;
114   size_t inputs_num_;
115   std::string name_;
116   bool need_erase_;
117   mutable bool exist_flatten_concat_;
118   mutable GraphExecutionStrategy strategy_;
119   CompileFunc compile_func_;
120 };
121 
122 class GraphCompiler {
123  public:
GraphCompiler()124   GraphCompiler() { session_ = session::SessionFactory::Get().Create(kSessionBasic); }
125   ~GraphCompiler() = default;
126 
127   // Construct kernel graph from anf nodes list and compile kernel graph in Graph mode,
128   // the detailed implementation of compiling graph is in 'CompileGraphImpl'.
129   GraphId CompileGraph(const GraphSegmentPtr &segment, const std::pair<AnfNodePtrList, AnfNodePtrList> &io_nodes,
130                        const DeviceContext *device_context, device::RunMode run_mode, bool run_in_pynative = false);
131 
132   GraphId CompileGraph(const KernelGraphPtr &kernel_graph, const std::pair<AnfNodePtrList, AnfNodePtrList> &io_nodes,
133                        const DeviceContext *device_context, device::RunMode run_mode, bool run_in_pynative);
134 
135   // For Pyantive dynamic shape or dynamic structure
136   GraphId CompileDynamicGraph(const GraphSegmentPtr &segment, const AnfNodePtrList &outputs,
137                               const DeviceContext *device_context);
138   GraphId CompileDynamicGraph(const KernelGraphPtr &kernel_graph, const DeviceContext *device_context);
139 
140   // Construct kernel graph from function graph and compile kernel graph in Graph mode,
141   // the detailed implementation of compiling graph is in 'CompileGraphImpl'.
142   GraphId CompileWholeGraphForGraphRunMode(const FuncGraphPtr &func_graph, const DeviceContext *device_context);
143 
144   // Get graph by graph id, if not exist return nullptr, used in Graph mode.
145   KernelGraphPtr Fetch(GraphId graph_id) const;
146 
147   // The following four methods used in PyNative back propagation to split complete kernel graph to single
148   // op graph, and these methods will be removed to class MindRTBackend after deleting session module.
149 
150   // Cache index for all parameter and output nodes of kernel graph, used to get parameter of single op and
151   // recover output of original complete back propagation kernel graph.
152   void GetParamAndOutputIndex(const KernelGraphPtr &graph, const std::vector<TensorPtr> &inputs,
153                               VectorRef *const outputs, std::map<AnfNodePtr, size_t> *parameter_index,
154                               std::map<KernelWithIndex, std::vector<std::vector<size_t>>> *output_indexes);
155 
156   // Get input tensors for single op compile and run, input tensors may convert from value node and parameter in graph
157   // and prev kernel node's output.
158   void GetSingleOpInputTensors(const CNodePtr &kernel,
159                                const std::map<KernelWithIndex, tensor::BaseTensorPtr> &op_output,
160                                const std::map<AnfNodePtr, size_t> &parameter_index,
161                                const std::vector<TensorPtr> &graph_inputs, bool is_run_pyboost,
162                                InputInfo *const input_info);
163   // Get one input tensor for single control op, such as bprop_cut.
164   tensor::BaseTensorPtr GetSingleOpInputTensorByIndex(const CNodePtr &kernel,
165                                                       const std::map<KernelWithIndex, tensor::BaseTensorPtr> &op_output,
166                                                       const std::map<AnfNodePtr, size_t> &parameter_index,
167                                                       const std::vector<TensorPtr> &graph_inputs,
168                                                       InputInfo *const input_info, size_t input_index);
169 
170   // Get OpRunInfo and GraphInfo for single op compile and run.
171   void GetSingleOpRunInfoAndGraphInfo(const CNodePtr &kernel, const InputInfo &input_info,
172                                       bool use_dynamic_shape_process, session::BackendOpRunInfoPtr *op_run_info,
173                                       const GraphOutputInfo *const graph_output_info);
174 
175   // Calculate ref count of PyNative back propagation operators.
176   void CalculateRefCount(const KernelGraphPtr &graph, std::map<KernelWithIndex, size_t> *ref_count) const;
177 
178   // Calculate forward op output ref count of PyNative back graph.
179   void CalculateForwardOpOutputCount(const KernelGraphPtr &graph, const std::vector<tensor::TensorPtr> &inputs,
180                                      std::map<std::string, size_t> *forward_op_output_tensor_id,
181                                      const std::map<AnfNodePtr, size_t> &parameter_index) const;
182 
183   // Update ref count of PyNative back propagation operators.
184   void UpdateRefCount(const std::set<KernelWithIndex> &input_kernels_with_index,
185                       std::map<KernelWithIndex, size_t> *ref_count,
186                       std::map<KernelWithIndex, tensor::BaseTensorPtr> *op_output_map) const;
187 
188   // Update forward op output ref count of PyNative back graph.
189   void UpdateForwardOpOutputRefCount(const std::vector<ValuePtr> &input_values,
190                                      std::map<std::string, size_t> *forward_op_output_tensor_id) const;
191 
192   // Handle single op output tensor and recover output of original complete kernel graph.
193   void RecoverGraphOutput(const AnfNodePtr &kernel, const VectorRef &op_outputs,
194                           const std::map<KernelWithIndex, size_t> &ref_count,
195                           std::map<KernelWithIndex, tensor::BaseTensorPtr> *op_output_map,
196                           GraphOutputInfo *const graph_output_info) const;
197 
198   // Register a summary callback function, which is called in the final stages of summary.
199   void RegisterSummaryCallBackFunc(const CallBackFunc &callback) const;
200   // Execute graph summary.
201   void Summary(const std::vector<KernelGraphPtr> &graphs) const;
202 
203   // The implementation of compiling graph in Graph Mode, including optimizing graph,
204   // setting operator info, creating kernel and transforming kernel graph to ActorSet.
205   GraphId CompileGraphImpl(const KernelGraphPtr &graph, const DeviceContext *device_context,
206                            bool run_in_pynative = true) const;
session_ptr()207   const session::SessionPtr &session_ptr() const { return session_; }
208 
209  private:
210   DISABLE_COPY_AND_ASSIGN(GraphCompiler);
211 
212   // Create device address for all anf nodes of graph.
213   void CreateDeviceAddress(const KernelGraphPtr &graph, const DeviceContext *device_context) const;
214 
215   // Set Graph's dependencies for pre_graph and post_graph
216   void SetGraphDependency(const KernelGraphPtr &graph, const GraphSegmentPtr &segment) const;
217   KernelGraphPtr ConstructKernelGraphForGraphRunMode(const FuncGraphPtr &func_graph,
218                                                      const DeviceContext *device_context,
219                                                      std::vector<KernelGraphPtr> *const all_graphs,
220                                                      bool *const need_return_ahead);
221 
222   // The member variable 'session_' will be removed after removing session module.
223   // Now all the GraphCompiler share the same 'session_'.
224   session::SessionPtr session_;
225   bool use_cache_to_compile_graph_ = false;
226   bool export_compile_cache_ = false;
227 };
228 
229 }  // namespace runtime
230 }  // namespace mindspore
231 #endif  // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_GRAPH_COMPILER_H_
232