1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_LITE_SRC_SUB_GRAPH_SPLIT_H_ 18 #define MINDSPORE_LITE_SRC_SUB_GRAPH_SPLIT_H_ 19 20 #include <stack> 21 #include <vector> 22 #include <map> 23 #include <set> 24 #include <unordered_map> 25 #include "include/model.h" 26 #include "src/lite_kernel.h" 27 #include "src/lite_model.h" 28 #include "src/inner_context.h" 29 #include "src/common/prim_util.h" 30 #include "nnacl/conv_parameter.h" 31 32 namespace mindspore::lite { 33 constexpr int kDefaultSubGraphSize = 2; 34 constexpr int kDefaultFirstSubgraph = 0; 35 constexpr int kDefaultSecondSubgraph = 1; 36 constexpr int kDefaultInputs = 1; 37 constexpr int kMaxMultyInNode = 20; 38 constexpr int kMaxSubGraphCount = 10; 39 constexpr int kMinSubgraphCost = 50; 40 constexpr double kDefaultGpu = 0.5; 41 class SearchSubGraph { 42 enum TensorType { NORMAL, CONST, INPUT }; 43 44 struct Tensor { 45 std::vector<uint32_t> in_nodes_; /* used current tensor as input */ 46 std::vector<uint32_t> out_nodes_; 47 TensorType type_; 48 }; 49 50 struct CostModel { 51 size_t mul_cost_ = 0; 52 size_t io_cost_ = 0; 53 54 CostModel operator+(const SearchSubGraph::CostModel &cost) { 55 CostModel result; 56 result.mul_cost_ = this->mul_cost_ + cost.mul_cost_; 57 result.io_cost_ = this->io_cost_ + cost.io_cost_; 58 return result; 59 } 60 CostModel operator-(const SearchSubGraph::CostModel &cost) { 61 CostModel result; 62 result.mul_cost_ = this->mul_cost_ - cost.mul_cost_; 63 result.io_cost_ = this->io_cost_ - cost.io_cost_; 64 return result; 65 } costCostModel66 int cost() { return io_cost_ + mul_cost_; } emptyCostModel67 void empty() { 68 io_cost_ = 0; 69 mul_cost_ = 0; 70 } 71 }; 72 73 struct Subgraph { 74 std::vector<uint32_t> nodes_; 75 std::vector<uint32_t> heads_; 76 std::vector<uint32_t> ends_; 77 bool search_terminate_ = false; 78 DeviceType device_; 79 size_t thread_; 80 CostModel cost_; 81 uint32_t tid_; /* 1 or 2 */ 82 }; 83 84 public: 85 SearchSubGraph(const InnerContext *context, Model *model, std::vector<lite::Tensor *> *src_tensors, 86 const std::map<int, OpParameter *> *op_parameters, std::vector<size_t> *output_nodes); 87 ~SearchSubGraph() = default; 88 89 public: 90 void SubGraphSplit(); 91 92 private: /* split by output */ 93 void SubGraphSplitByOutput(); 94 void InitSearchSubGraphByOutput(); 95 void InsertNode(uint32_t index, Subgraph *subgraph, uint32_t last_index); 96 97 private: /* split by middle */ 98 void SubGraphSplitByMiddle(); 99 void InitSearchSubGraphByMiddle(); 100 void SearchMultyInNodes(std::vector<uint32_t> *multy_in_nodes); 101 void InitMiddleSubgraph(std::vector<uint32_t> *multy_in_nodes); 102 void InsertNodeByMid(uint32_t node_index, Subgraph *subgraph, uint32_t last_index); 103 void InsertHeadNode(uint32_t index, Subgraph *subgraph); 104 void OptimizeAfterFusion(std::vector<Subgraph> *sub_graphs, uint32_t root_node_index); 105 106 private: /* split by offline */ 107 void SubGraphSplitByOffLineParallel(); 108 void UpdateOfflineParallelFlag(); 109 bool CheckIsParallelSubGraph(const std::vector<Subgraph> &subgraphs); 110 111 private: /* public graph func */ 112 void RemoveConstNode(std::vector<uint32_t> *nodes); 113 void InitSearchTensor(); 114 void InitMainGraphDevice(DeviceType dt = DT_CPU); 115 116 void InitSubgraphRuntimeInfo(std::vector<Subgraph> *sub_graphs); 117 void SubgraphFusion(std::vector<Subgraph> *sub_graphs); 118 void CalculateCostModel(std::vector<Subgraph> *sub_graphs); 119 void ConvertSubGraphToModel(std::vector<Subgraph> *sub_graphs); 120 bool ValidInParallel(); 121 void CheckSubHeadEnd(Subgraph *sub); 122 123 private: /* public schema func */ 124 void InsertParallelNode(uint32_t index, Subgraph *subgraph); 125 bool IsNodeSubGraphHead(uint32_t node_index, const std::vector<uint32_t> &ready_nodes); 126 bool IsNodeSubGraphHeadWithRoot(uint32_t node_index, const std::vector<uint32_t> &ready_nodes, 127 uint32_t root_node_index); 128 const schema::Primitive *CreatePartialPrimitive(int64_t subgraph_index); 129 130 private: /* public cost-model func */ 131 CostModel CalculateConv2DFusion(LiteGraph::Node *node); 132 void dfs(int i, int n, int current_sum, int except_value, int *min_value, std::vector<bool> *tmp_group, 133 std::vector<bool> *cor_group, std::vector<Subgraph> *sub_graphs); 134 135 private: 136 std::vector<size_t> *output_nodes_ = nullptr; 137 const InnerContext *context_ = nullptr; 138 std::vector<lite::Tensor *> *src_tensors_ = nullptr; 139 const std::map<int, OpParameter *> *op_parameters_ = nullptr; 140 LiteModel *model_ = nullptr; 141 std::vector<Tensor> tensors_; 142 std::vector<Subgraph> sub_graphs_; 143 std::unordered_map<uint32_t, std::vector<Subgraph>> node_sub_map_; 144 std::vector<LiteGraph::Node *> node_list_; 145 DeviceType major_dt_; 146 DeviceType minor_dt_; 147 size_t major_thread_; 148 size_t minor_thread_; 149 size_t total_cost_ = 0; 150 bool offline_parallel_enable_ = false; 151 }; 152 } // namespace mindspore::lite 153 154 #endif // MINDSPORE_LITE_SRC_SUB_GRAPH_SPLIT_H_ 155