1 /** 2 * Copyright 2021 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_GRAPH_COMPILER_H_ 18 #define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_GRAPH_COMPILER_H_ 19 20 #include <vector> 21 #include <memory> 22 #include <string> 23 #include <utility> 24 #include <map> 25 #include <set> 26 #include "utils/hash_map.h" 27 #include "runtime/hardware/device_context.h" 28 #include "runtime/graph_scheduler/actor/actor_common.h" 29 #include "runtime/graph_scheduler/control_node_parser.h" 30 #include "backend/common/session/session_basic.h" 31 #include "backend/common/session/session_factory.h" 32 #include "ir/tensor.h" 33 #include "include/backend/visible.h" 34 #include "kernel/framework_utils.h" 35 36 namespace mindspore { 37 using device::DeviceContext; 38 using session::BackendOpRunInfo; 39 using session::CallBackFunc; 40 using session::GraphOutputInfo; 41 using session::InputInfo; 42 using session::KernelGraph; 43 using session::KernelWithIndex; 44 using tensor::TensorPtr; 45 46 const char kModelNameRuntime[] = "Runtime"; 47 const char kEventDeviceInit[] = "DeviceInit"; 48 const char kEventCompileGraph[] = "CompileGraph"; 49 const char kEventRunGraph[] = "RunGraph"; 50 const char kStageDeviceInit[] = "DeviceInit"; 51 const char kStageCompileGraphs[] = "CompileGraphs"; 52 const char kStageGraphPartition[] = "GraphPartition"; 53 const char kStageConstructKernelGraph[] = "ConstructKernelGraph"; 54 const char kStageOptimizeGraph[] = "OptimizeGraph"; 55 const char kStageCreateKernel[] = "CreateKernel"; 56 const char kStageGraphTransform[] = "GraphTransform"; 57 const char kStageBuild[] = "Build"; 58 const char kStageLink[] = "Link"; 59 const char kStageOptimize[] = "Optimize"; 60 const char kStageRunGraph[] = "RunGraph"; 61 const char kStageGetInputs[] = "GetInputs"; 62 const char kStageRun[] = "Run"; 63 const char kStageConstructOutputs[] = "ConstructOutputs"; 64 namespace runtime { 65 // Position of kernel with index, the value pair<branch_id, vector<pos>> means the branch id of the kernel and the pos 66 // of the kernel. Generally, there is only one branch, and the branch id is 0 at this time. In control flow, there are 67 // multiple branch scenarios, and pos represents the position of the kernel in the branch. 68 using KernelMapPosition = std::map<KernelWithIndex, std::vector<size_t>, session::KernelWithIndexCmp>; 69 70 // The graph compiler info generated by graph compiler is the express of executable graph. 71 // The device context is unified interface of interaction with device of corresponding graph. 72 // The tensors mask is used to distinguish input tensor's type. 73 // The input tensor is used to link graphs in the dynamic build scenario. 74 // The control node is used to link graphs in the control flow scenario. 75 // The control node parser is used to parse the edge info in control nodes. 76 // The origin parameters order is used to correspond to the input args. 77 // The origin outputs order is used to correspond to the output args. 78 // The need_erase means need erase this GraphCompilerInfo object after run actor set. 79 struct BACKEND_EXPORT GraphCompilerInfo { GraphCompilerInfoGraphCompilerInfo80 GraphCompilerInfo(const std::vector<KernelGraphPtr> &graphs, const std::vector<DeviceContext *> &device_contexts, 81 const std::vector<std::vector<int64_t> *> &tensors_mask, 82 const std::vector<std::vector<TensorPtr> *> &input_tensors, 83 const std::vector<AnfNodePtr> &control_nodes, 84 const std::vector<AnfNodePtr> &origin_parameters_order, const ControlNodeParserPtr &parser, 85 const KernelMapPosition &origin_outputs_order, size_t outputs_num, size_t inputs_num, 86 const std::string &name, bool need_erase, GraphExecutionStrategy strategy, CompileFunc compile_func) 87 : graphs_(graphs), 88 device_contexts_(device_contexts), 89 tensors_mask_(tensors_mask), 90 input_tensors_(input_tensors), 91 control_nodes_(control_nodes), 92 control_node_parser_(parser), 93 origin_parameters_order_(origin_parameters_order), 94 origin_outputs_order_(origin_outputs_order), 95 outputs_num_(outputs_num), 96 inputs_num_(inputs_num), 97 name_(name), 98 need_erase_(need_erase), 99 exist_flatten_concat_(false), 100 strategy_(strategy), 101 compile_func_(std::move(compile_func)) {} 102 ~GraphCompilerInfo(); 103 std::vector<KernelGraphPtr> graphs_; 104 std::vector<DeviceContext *> device_contexts_; 105 std::vector<std::vector<int64_t> *> tensors_mask_; 106 std::vector<std::vector<TensorPtr> *> input_tensors_; 107 std::vector<AnfNodePtr> control_nodes_; 108 ControlNodeParserPtr control_node_parser_; 109 std::vector<AnfNodePtr> origin_parameters_order_; 110 mutable mindspore::HashMap<AnfNodePtr, std::vector<std::pair<KernelWithIndex, KernelWithIndex>>> 111 origin_parameters_to_backend_parameters_; 112 KernelMapPosition origin_outputs_order_; 113 size_t outputs_num_; 114 size_t inputs_num_; 115 std::string name_; 116 bool need_erase_; 117 mutable bool exist_flatten_concat_; 118 mutable GraphExecutionStrategy strategy_; 119 CompileFunc compile_func_; 120 }; 121 122 class GraphCompiler { 123 public: GraphCompiler()124 GraphCompiler() { session_ = session::SessionFactory::Get().Create(kSessionBasic); } 125 ~GraphCompiler() = default; 126 127 // Construct kernel graph from anf nodes list and compile kernel graph in Graph mode, 128 // the detailed implementation of compiling graph is in 'CompileGraphImpl'. 129 GraphId CompileGraph(const GraphSegmentPtr &segment, const std::pair<AnfNodePtrList, AnfNodePtrList> &io_nodes, 130 const DeviceContext *device_context, device::RunMode run_mode, bool run_in_pynative = false); 131 132 GraphId CompileGraph(const KernelGraphPtr &kernel_graph, const std::pair<AnfNodePtrList, AnfNodePtrList> &io_nodes, 133 const DeviceContext *device_context, device::RunMode run_mode, bool run_in_pynative); 134 135 // For Pyantive dynamic shape or dynamic structure 136 GraphId CompileDynamicGraph(const GraphSegmentPtr &segment, const AnfNodePtrList &outputs, 137 const DeviceContext *device_context); 138 GraphId CompileDynamicGraph(const KernelGraphPtr &kernel_graph, const DeviceContext *device_context); 139 140 // Construct kernel graph from function graph and compile kernel graph in Graph mode, 141 // the detailed implementation of compiling graph is in 'CompileGraphImpl'. 142 GraphId CompileWholeGraphForGraphRunMode(const FuncGraphPtr &func_graph, const DeviceContext *device_context); 143 144 // Get graph by graph id, if not exist return nullptr, used in Graph mode. 145 KernelGraphPtr Fetch(GraphId graph_id) const; 146 147 // The following four methods used in PyNative back propagation to split complete kernel graph to single 148 // op graph, and these methods will be removed to class MindRTBackend after deleting session module. 149 150 // Cache index for all parameter and output nodes of kernel graph, used to get parameter of single op and 151 // recover output of original complete back propagation kernel graph. 152 void GetParamAndOutputIndex(const KernelGraphPtr &graph, const std::vector<TensorPtr> &inputs, 153 VectorRef *const outputs, std::map<AnfNodePtr, size_t> *parameter_index, 154 std::map<KernelWithIndex, std::vector<std::vector<size_t>>> *output_indexes); 155 156 // Get input tensors for single op compile and run, input tensors may convert from value node and parameter in graph 157 // and prev kernel node's output. 158 void GetSingleOpInputTensors(const CNodePtr &kernel, 159 const std::map<KernelWithIndex, tensor::BaseTensorPtr> &op_output, 160 const std::map<AnfNodePtr, size_t> ¶meter_index, 161 const std::vector<TensorPtr> &graph_inputs, bool is_run_pyboost, 162 InputInfo *const input_info); 163 // Get one input tensor for single control op, such as bprop_cut. 164 tensor::BaseTensorPtr GetSingleOpInputTensorByIndex(const CNodePtr &kernel, 165 const std::map<KernelWithIndex, tensor::BaseTensorPtr> &op_output, 166 const std::map<AnfNodePtr, size_t> ¶meter_index, 167 const std::vector<TensorPtr> &graph_inputs, 168 InputInfo *const input_info, size_t input_index); 169 170 // Get OpRunInfo and GraphInfo for single op compile and run. 171 void GetSingleOpRunInfoAndGraphInfo(const CNodePtr &kernel, const InputInfo &input_info, 172 bool use_dynamic_shape_process, session::BackendOpRunInfoPtr *op_run_info, 173 const GraphOutputInfo *const graph_output_info); 174 175 // Calculate ref count of PyNative back propagation operators. 176 void CalculateRefCount(const KernelGraphPtr &graph, std::map<KernelWithIndex, size_t> *ref_count) const; 177 178 // Calculate forward op output ref count of PyNative back graph. 179 void CalculateForwardOpOutputCount(const KernelGraphPtr &graph, const std::vector<tensor::TensorPtr> &inputs, 180 std::map<std::string, size_t> *forward_op_output_tensor_id, 181 const std::map<AnfNodePtr, size_t> ¶meter_index) const; 182 183 // Update ref count of PyNative back propagation operators. 184 void UpdateRefCount(const std::set<KernelWithIndex> &input_kernels_with_index, 185 std::map<KernelWithIndex, size_t> *ref_count, 186 std::map<KernelWithIndex, tensor::BaseTensorPtr> *op_output_map) const; 187 188 // Update forward op output ref count of PyNative back graph. 189 void UpdateForwardOpOutputRefCount(const std::vector<ValuePtr> &input_values, 190 std::map<std::string, size_t> *forward_op_output_tensor_id) const; 191 192 // Handle single op output tensor and recover output of original complete kernel graph. 193 void RecoverGraphOutput(const AnfNodePtr &kernel, const VectorRef &op_outputs, 194 const std::map<KernelWithIndex, size_t> &ref_count, 195 std::map<KernelWithIndex, tensor::BaseTensorPtr> *op_output_map, 196 GraphOutputInfo *const graph_output_info) const; 197 198 // Register a summary callback function, which is called in the final stages of summary. 199 void RegisterSummaryCallBackFunc(const CallBackFunc &callback) const; 200 // Execute graph summary. 201 void Summary(const std::vector<KernelGraphPtr> &graphs) const; 202 203 // The implementation of compiling graph in Graph Mode, including optimizing graph, 204 // setting operator info, creating kernel and transforming kernel graph to ActorSet. 205 GraphId CompileGraphImpl(const KernelGraphPtr &graph, const DeviceContext *device_context, 206 bool run_in_pynative = true) const; session_ptr()207 const session::SessionPtr &session_ptr() const { return session_; } 208 209 private: 210 DISABLE_COPY_AND_ASSIGN(GraphCompiler); 211 212 // Create device address for all anf nodes of graph. 213 void CreateDeviceAddress(const KernelGraphPtr &graph, const DeviceContext *device_context) const; 214 215 // Set Graph's dependencies for pre_graph and post_graph 216 void SetGraphDependency(const KernelGraphPtr &graph, const GraphSegmentPtr &segment) const; 217 KernelGraphPtr ConstructKernelGraphForGraphRunMode(const FuncGraphPtr &func_graph, 218 const DeviceContext *device_context, 219 std::vector<KernelGraphPtr> *const all_graphs, 220 bool *const need_return_ahead); 221 222 // The member variable 'session_' will be removed after removing session module. 223 // Now all the GraphCompiler share the same 'session_'. 224 session::SessionPtr session_; 225 bool use_cache_to_compile_graph_ = false; 226 bool export_compile_cache_ = false; 227 }; 228 229 } // namespace runtime 230 } // namespace mindspore 231 #endif // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_GRAPH_COMPILER_H_ 232