• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_SRC_SUB_GRAPH_SPLIT_H_
18 #define MINDSPORE_LITE_SRC_SUB_GRAPH_SPLIT_H_
19 
20 #include <stack>
21 #include <vector>
22 #include <map>
23 #include <set>
24 #include <unordered_map>
25 #include "include/model.h"
26 #include "src/lite_kernel.h"
27 #include "src/lite_model.h"
28 #include "src/inner_context.h"
29 #include "src/common/prim_util.h"
30 #include "nnacl/conv_parameter.h"
31 
32 namespace mindspore::lite {
33 constexpr int kDefaultSubGraphSize = 2;
34 constexpr int kDefaultFirstSubgraph = 0;
35 constexpr int kDefaultSecondSubgraph = 1;
36 constexpr int kDefaultInputs = 1;
37 constexpr int kMaxMultyInNode = 20;
38 constexpr int kMaxSubGraphCount = 10;
39 constexpr int kMinSubgraphCost = 50;
40 constexpr double kDefaultGpu = 0.5;
41 class SearchSubGraph {
42   enum TensorType { NORMAL, CONST, INPUT };
43 
44   struct Tensor {
45     std::vector<uint32_t> in_nodes_; /* used current tensor as input */
46     std::vector<uint32_t> out_nodes_;
47     TensorType type_;
48   };
49 
50   struct CostModel {
51     size_t mul_cost_ = 0;
52     size_t io_cost_ = 0;
53 
54     CostModel operator+(const SearchSubGraph::CostModel &cost) {
55       CostModel result;
56       result.mul_cost_ = this->mul_cost_ + cost.mul_cost_;
57       result.io_cost_ = this->io_cost_ + cost.io_cost_;
58       return result;
59     }
60     CostModel operator-(const SearchSubGraph::CostModel &cost) {
61       CostModel result;
62       result.mul_cost_ = this->mul_cost_ - cost.mul_cost_;
63       result.io_cost_ = this->io_cost_ - cost.io_cost_;
64       return result;
65     }
costCostModel66     int cost() { return io_cost_ + mul_cost_; }
emptyCostModel67     void empty() {
68       io_cost_ = 0;
69       mul_cost_ = 0;
70     }
71   };
72 
73   struct Subgraph {
74     std::vector<uint32_t> nodes_;
75     std::vector<uint32_t> heads_;
76     std::vector<uint32_t> ends_;
77     bool search_terminate_ = false;
78     DeviceType device_;
79     size_t thread_;
80     CostModel cost_;
81     uint32_t tid_; /* 1 or 2 */
82   };
83 
84  public:
85   SearchSubGraph(const InnerContext *context, Model *model, std::vector<lite::Tensor *> *src_tensors,
86                  const std::map<int, OpParameter *> *op_parameters, std::vector<size_t> *output_nodes);
87   ~SearchSubGraph() = default;
88 
89  public:
90   void SubGraphSplit();
91 
92  private: /* split by output */
93   void SubGraphSplitByOutput();
94   void InitSearchSubGraphByOutput();
95   void InsertNode(uint32_t index, Subgraph *subgraph, uint32_t last_index);
96 
97  private: /* split by middle */
98   void SubGraphSplitByMiddle();
99   void InitSearchSubGraphByMiddle();
100   void SearchMultyInNodes(std::vector<uint32_t> *multy_in_nodes);
101   void InitMiddleSubgraph(std::vector<uint32_t> *multy_in_nodes);
102   void InsertNodeByMid(uint32_t node_index, Subgraph *subgraph, uint32_t last_index);
103   void InsertHeadNode(uint32_t index, Subgraph *subgraph);
104   void OptimizeAfterFusion(std::vector<Subgraph> *sub_graphs, uint32_t root_node_index);
105 
106  private: /* split by offline */
107   void SubGraphSplitByOffLineParallel();
108   void UpdateOfflineParallelFlag();
109   bool CheckIsParallelSubGraph(const std::vector<Subgraph> &subgraphs);
110 
111  private: /* public graph func  */
112   void RemoveConstNode(std::vector<uint32_t> *nodes);
113   void InitSearchTensor();
114   void InitMainGraphDevice(DeviceType dt = DT_CPU);
115 
116   void InitSubgraphRuntimeInfo(std::vector<Subgraph> *sub_graphs);
117   void SubgraphFusion(std::vector<Subgraph> *sub_graphs);
118   void CalculateCostModel(std::vector<Subgraph> *sub_graphs);
119   void ConvertSubGraphToModel(std::vector<Subgraph> *sub_graphs);
120   bool ValidInParallel();
121   void CheckSubHeadEnd(Subgraph *sub);
122 
123  private: /* public schema func  */
124   void InsertParallelNode(uint32_t index, Subgraph *subgraph);
125   bool IsNodeSubGraphHead(uint32_t node_index, const std::vector<uint32_t> &ready_nodes);
126   bool IsNodeSubGraphHeadWithRoot(uint32_t node_index, const std::vector<uint32_t> &ready_nodes,
127                                   uint32_t root_node_index);
128   const schema::Primitive *CreatePartialPrimitive(int64_t subgraph_index);
129 
130  private: /* public cost-model func  */
131   CostModel CalculateConv2DFusion(LiteGraph::Node *node);
132   void dfs(int i, int n, int current_sum, int except_value, int *min_value, std::vector<bool> *tmp_group,
133            std::vector<bool> *cor_group, std::vector<Subgraph> *sub_graphs);
134 
135  private:
136   std::vector<size_t> *output_nodes_ = nullptr;
137   const InnerContext *context_ = nullptr;
138   std::vector<lite::Tensor *> *src_tensors_ = nullptr;
139   const std::map<int, OpParameter *> *op_parameters_ = nullptr;
140   LiteModel *model_ = nullptr;
141   std::vector<Tensor> tensors_;
142   std::vector<Subgraph> sub_graphs_;
143   std::unordered_map<uint32_t, std::vector<Subgraph>> node_sub_map_;
144   std::vector<LiteGraph::Node *> node_list_;
145   DeviceType major_dt_;
146   DeviceType minor_dt_;
147   size_t major_thread_;
148   size_t minor_thread_;
149   size_t total_cost_ = 0;
150   bool offline_parallel_enable_ = false;
151 };
152 }  // namespace mindspore::lite
153 
154 #endif  // MINDSPORE_LITE_SRC_SUB_GRAPH_SPLIT_H_
155