• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef PARALLEL_AUTO_PARALLEL_EDGE_COSTMODEL_H_
18 #define PARALLEL_AUTO_PARALLEL_EDGE_COSTMODEL_H_
19 
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <utility>
24 #include <vector>
25 #include "frontend/parallel/auto_parallel/costmodel.h"
26 #include "frontend/parallel/ops_info/operator_info.h"
27 #include "frontend/parallel/tensor_layout/tensor_info.h"
28 #include "frontend/parallel/tensor_layout/tensor_layout.h"
29 #include "utils/ms_utils.h"
30 
31 namespace mindspore {
32 namespace parallel {
33 using CostPtrKey = std::pair<StrategyPtr, StrategyPtr>;
34 using OperatorInfoPtr = std::shared_ptr<mindspore::parallel::OperatorInfo>;
35 using EdgePtr = std::shared_ptr<mindspore::parallel::Edge>;
36 
37 class Edge {
38   // An 'Edge' connects two Operators in the CostGraph.
39  public:
Edge(const std::string & edge_name,const std::shared_ptr<OperatorInfo> & prev_op,const std::shared_ptr<OperatorInfo> & next_op,const size_t & output_index_,const size_t & input_index_,const bool & is_com)40   Edge(const std::string &edge_name, const std::shared_ptr<OperatorInfo> &prev_op,
41        const std::shared_ptr<OperatorInfo> &next_op, const size_t &output_index_, const size_t &input_index_,
42        const bool &is_com)
43       : edge_name_(edge_name),
44         prev_op_(prev_op),
45         next_op_(next_op),
46         prev_op_output_index_(output_index_),
47         next_op_input_index_(input_index_),
48         is_combined_(is_com) {
49     is_identity_edge = false;
50   }
51 
Edge(const std::string & edge_name,const std::shared_ptr<OperatorInfo> & prev_op,const std::shared_ptr<OperatorInfo> & next_op,const size_t & output_index_,const size_t & input_index_,const bool & is_com,const bool & is_iden)52   Edge(const std::string &edge_name, const std::shared_ptr<OperatorInfo> &prev_op,
53        const std::shared_ptr<OperatorInfo> &next_op, const size_t &output_index_, const size_t &input_index_,
54        const bool &is_com, const bool &is_iden)
55       : edge_name_(edge_name),
56         prev_op_(prev_op),
57         next_op_(next_op),
58         prev_op_output_index_(output_index_),
59         next_op_input_index_(input_index_),
60         is_combined_(is_com),
61         is_identity_edge(is_iden) {}
62 
Edge(const std::string & edge_name,const std::shared_ptr<OperatorInfo> & prev_op,const std::shared_ptr<OperatorInfo> & next_op,const std::vector<size_t> & output_indexs_,const std::vector<size_t> & input_indexs_,const bool & is_com)63   Edge(const std::string &edge_name, const std::shared_ptr<OperatorInfo> &prev_op,
64        const std::shared_ptr<OperatorInfo> &next_op, const std::vector<size_t> &output_indexs_,
65        const std::vector<size_t> &input_indexs_, const bool &is_com)
66       : edge_name_(edge_name),
67         prev_op_(prev_op),
68         next_op_(next_op),
69         pre_op_output_indexs_(output_indexs_),
70         next_op_input_indexs_(input_indexs_),
71         is_combined_(is_com) {
72     prev_op_output_index_ = 0;
73     next_op_input_index_ = 0;
74     is_identity_edge = false;
75   }
76 
77   ~Edge() = default;
prev_operator()78   std::shared_ptr<OperatorInfo> prev_operator() const { return prev_op_; }
next_operator()79   std::shared_ptr<OperatorInfo> next_operator() const { return next_op_; }
edge_name()80   std::string edge_name() const { return edge_name_; }
81   // Init cost_map_: for each output layout and input layout, calculate the cost
82   Status InitEdgeCost();
GetCostMap()83   std::map<CostPtrKey, CostPtrList> GetCostMap() { return cost_map_; }
84   CostPtr GetCostByStrategyPair(const CostPtrKey &);
85   StrategyPtr GetNextOpStrategyByPrevOpStrategyWithZeroComm(const StrategyPtr &);
86   StrategyPtr GetPrevOpStrategyByNextOpStrategyWithZeroComm(const StrategyPtr &);
87   void SetCostMapAndInputOutput(std::map<CostPtrKey, CostPtrList> &);
88   // For two operators u--->v, given the output tensor layout of u,
89   // and the input tensor layout of v, return the redistribution cost,
90   // and the op_list to carry out the redistribution.
91   Status GetRedistributionCost(const TensorLayout &prev_op_output_layout, const TensorLayout &next_op_input_layout,
92                                size_t, const TypePtr &type, CostPtr *cost);
93 
set_pre_op_output(const std::vector<std::pair<std::shared_ptr<Strategy>,std::vector<TensorInfo>>> & output_set)94   void set_pre_op_output(const std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> &output_set) {
95     pre_op_output_ = output_set;
96   }
set_next_op_input(const std::vector<std::pair<std::shared_ptr<Strategy>,std::vector<TensorInfo>>> & input_set)97   void set_next_op_input(const std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> &input_set) {
98     next_op_input_ = input_set;
99   }
100 
101   // Given a pair of output strategy and input strategy, return the corresponding costlist
102   CostPtrList GetCostList(StrategyPtr output_str, StrategyPtr input_str);
103 
prev_op_output()104   std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> prev_op_output() const {
105     return pre_op_output_;
106   }
next_op_input()107   std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> next_op_input() const {
108     return next_op_input_;
109   }
110 
is_combined()111   bool is_combined() const { return is_combined_; }
prev_op_output_index()112   size_t prev_op_output_index() const { return prev_op_output_index_; }
next_op_input_index()113   size_t next_op_input_index() const { return next_op_input_index_; }
prev_op_output_indexs()114   std::vector<size_t> prev_op_output_indexs() const { return pre_op_output_indexs_; }
next_op_input_indexs()115   std::vector<size_t> next_op_input_indexs() const { return next_op_input_indexs_; }
116 
117   CostPtrList CreateEdgeEliminationCostList(const StrategyPtr &output_st_ptr,
118                                             const std::vector<std::shared_ptr<Edge>> &edges,
119                                             const StrategyPtr &input_st_ptr);
120   // In the Edge Elimination operation in DP algorithm, 'edges' is replaced by a new edge. This method is used to
121   // set cost for this new edge
122   void EdgeEliminationSetNewCost(std::shared_ptr<OperatorInfo> u, const std::vector<std::shared_ptr<Edge>> &edges,
123                                  std::shared_ptr<OperatorInfo> v);
124   void CreateOpEliminationSubCostList(StrategyPtr op_strategy, const CostPtrList &left_cost_list,
125                                       const CostPtrList &middle_cost_list, const CostPtrList &right_cost_list,
126                                       CostPtrList *ret_cost_list);
127 
128   CostPtrList CreateOpEliminationCostList(const std::shared_ptr<Edge> &e1, const StrategyPtr &output_st_ptr,
129                                           const std::shared_ptr<OperatorInfo> &op, const std::shared_ptr<Edge> &e2,
130                                           const StrategyPtr &input_st_ptr);
131   // In the Operation Elimination operation in DP algorithm, 'op', 'e1' and 'e2' are replaced by a new edge.
132   // This method is used to set cost for this new edge
133   void OpEliminationSetNewCost(const std::shared_ptr<Edge> &e1, const std::shared_ptr<OperatorInfo> &op,
134                                const std::shared_ptr<Edge> &e2);
135 
set_selected_cost(const CostPtr & cost)136   void set_selected_cost(const CostPtr &cost) { selected_cost_ = cost; }
selected_cost()137   const CostPtr &selected_cost() const { return selected_cost_; }
set_parameter_involve(int64_t para_invol)138   void set_parameter_involve(int64_t para_invol) { is_output_parameter_involve_ = para_invol; }
139   // In the training phase, when the input of a operator contains WEIGHT or a output from other operators involving
140   // WEIGHT, then these input should stay in memory until it is used in the backward phase, which is kept in memory
141   // at the end of forward phase.
142   Status CalculateMemoryCost();
143   // In the inference phase,
144   Status CalculateMemoryCostForInference();
mark_output_critical()145   void mark_output_critical() { is_output_critical_ = 1; }
146   // Whether there exists any available strategy in 'cost_map_'
147   bool CheckStrategyCostPossibility() const;
148 
149  private:
150   std::string edge_name_;
151   std::shared_ptr<OperatorInfo> prev_op_, next_op_;
152   std::map<CostPtrKey, CostPtrList> cost_map_;
153   // pre_op_output_
154   std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> pre_op_output_;
155   std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> next_op_input_;
156   // the index of outputs of prev_op, and the index of inputs of next_op
157   size_t prev_op_output_index_, next_op_input_index_;
158 
159   // pre_op_output_indexs_ and next_op_input_indexs_ store the indices of inputs and outputs if is_combined = true
160   std::vector<size_t> pre_op_output_indexs_;
161   std::vector<size_t> next_op_input_indexs_;
162   // is this edge constructed by combining multiple edges? If is is, then is_combined = true, else is_combined = false
163   bool is_combined_;
164   // When a Parameter in the ANF graph being used by multiple operators, we include the Parameter in the costgraph by
165   // replace the Parameter by a TmpIdentity operator, and connecting this TmpIdentity operator with subsequent
166   // operators. The resulting edges are different from those normal edges, thus this Bool variable distinguishes them.
167   // If it is true, then we should guarantee that the strategy for output tensor consistent with the input tensor.
168   bool is_identity_edge;
169   CostPtr selected_cost_;
170   // In the training phase, 'is_output_parameter_involve_' is used to mark whether the output of the previous operator
171   // is parameter-involved
172   int64_t is_output_parameter_involve_ = -1;  // -1: unset; 0: not parameter_involved; 1: parameter_involved
173   // In the inference phase, this is used to mark whether the output of the previous operator is critical.
174   int64_t is_output_critical_ = 0;
175 };
176 }  // namespace parallel
177 }  // namespace mindspore
178 #endif  // PARALLEL_AUTO_PARALLEL_EDGE_COSTMODEL_H_
179