• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_AUTO_PARALLEL_EDGE_COSTMODEL_H_
18 #define MINDSPORE_CCSRC_FRONTEND_PARALLEL_AUTO_PARALLEL_EDGE_COSTMODEL_H_
19 
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <utility>
24 #include <vector>
25 #include <set>
26 #include "frontend/parallel/auto_parallel/costmodel.h"
27 #include "frontend/parallel/ops_info/operator_info.h"
28 #include "frontend/parallel/tensor_layout/tensor_info.h"
29 #include "frontend/parallel/tensor_layout/tensor_layout.h"
30 #include "frontend/parallel/graph_util/node_info.h"
31 #include "utils/ms_utils.h"
32 
33 namespace mindspore {
34 namespace parallel {
35 using CostPtrKey = std::pair<StrategyPtr, StrategyPtr>;
36 using EdgePtr = std::shared_ptr<mindspore::parallel::Edge>;
37 
38 struct OpsPtrCompare {
operatorOpsPtrCompare39   bool operator()(const OperatorInfoPtr &a, const OperatorInfoPtr &b) const { return a->name().compare(b->name()) < 0; }
40 };
41 
42 class Edge {
43   // An 'Edge' connects two Operators in the CostGraph.
44  public:
Edge(const std::string & edge_name,const std::shared_ptr<OperatorInfo> & prev_op,const std::shared_ptr<OperatorInfo> & next_op,const size_t & output_index_,const size_t & input_index_,const bool & is_com)45   Edge(const std::string &edge_name, const std::shared_ptr<OperatorInfo> &prev_op,
46        const std::shared_ptr<OperatorInfo> &next_op, const size_t &output_index_, const size_t &input_index_,
47        const bool &is_com)
48       : edge_name_(edge_name),
49         prev_op_(prev_op),
50         next_op_(next_op),
51         prev_op_output_index_(output_index_),
52         next_op_input_index_(input_index_),
53         is_combined_(is_com),
54         is_identity_edge(false) {}
55 
Edge(const std::string & edge_name,const std::shared_ptr<OperatorInfo> & prev_op,const std::shared_ptr<OperatorInfo> & next_op,const size_t & output_index_,const size_t & input_index_,const bool & is_com,const bool & is_iden)56   Edge(const std::string &edge_name, const std::shared_ptr<OperatorInfo> &prev_op,
57        const std::shared_ptr<OperatorInfo> &next_op, const size_t &output_index_, const size_t &input_index_,
58        const bool &is_com, const bool &is_iden)
59       : edge_name_(edge_name),
60         prev_op_(prev_op),
61         next_op_(next_op),
62         prev_op_output_index_(output_index_),
63         next_op_input_index_(input_index_),
64         is_combined_(is_com),
65         is_identity_edge(is_iden) {}
66 
Edge(const std::string & edge_name,const std::shared_ptr<OperatorInfo> & prev_op,const std::shared_ptr<OperatorInfo> & next_op,const std::vector<size_t> & output_indexs_,const std::vector<size_t> & input_indexs_,const bool & is_com)67   Edge(const std::string &edge_name, const std::shared_ptr<OperatorInfo> &prev_op,
68        const std::shared_ptr<OperatorInfo> &next_op, const std::vector<size_t> &output_indexs_,
69        const std::vector<size_t> &input_indexs_, const bool &is_com)
70       : edge_name_(edge_name),
71         prev_op_(prev_op),
72         next_op_(next_op),
73         prev_op_output_index_(0),
74         next_op_input_index_(0),
75         pre_op_output_indexs_(output_indexs_),
76         next_op_input_indexs_(input_indexs_),
77         is_combined_(is_com),
78         is_identity_edge(false) {}
79 
80   ~Edge() = default;
prev_operator()81   std::shared_ptr<OperatorInfo> prev_operator() const { return prev_op_; }
next_operator()82   std::shared_ptr<OperatorInfo> next_operator() const { return next_op_; }
edge_name()83   std::string edge_name() const { return edge_name_; }
84   // Init cost_map_: for each output layout and input layout, calculate the cost
85   Status InitEdgeCost();
GetCostMap()86   std::map<CostPtrKey, CostPtrList> GetCostMap() { return cost_map_; }
87   CostPtr GetCostByStrategyPair(const CostPtrKey &stra_pair);
88 
89   StrategyPtr GetNextOpStrategyByPrevOpStrategyWithMiniComm(const StrategyPtr &prev_op_stra);
90   StrategyPtr GetPrevOpStrategyByNextOpStrategyWithMiniComm(const StrategyPtr &next_op_stra);
91   int64_t GetReshapeSWCIndexByNextOpStrategy(const StrategyPtr &next_op_stra);
92   int64_t GetReshapeSWCIndexByPrevOpStrategy(const StrategyPtr &prev_op_stra);
93   StrategyPtr GetPrevOpStrategyByReshapeSWCIndex(int64_t swc_index);
94   StrategyPtr GetNextOpStrategyByReshapeSWCIndex(int64_t swc_index);
95   bool CheckStrategyConsistency(StrategyPtr prev_stra, StrategyPtr next_stra,
96                                 std::set<OperatorInfoPtr> *_diff_stra_params);
97 
98   void SetCostMapAndInputOutput(const std::map<CostPtrKey, CostPtrList> &cost_map);
99   // For two operators u--->v, given the output tensor layout of u,
100   // and the input tensor layout of v, return the redistribution cost,
101   // and the op_list to carry out the redistribution.
102   Status GetRedistributionCost(const TensorLayout &prev_op_output_layout, const TensorLayout &next_op_input_layout,
103                                size_t type_length, const TypePtr &type, CostPtr *cost);
104 
set_pre_op_output(const std::vector<std::pair<std::shared_ptr<Strategy>,std::vector<TensorInfo>>> & output_set)105   void set_pre_op_output(const std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> &output_set) {
106     pre_op_output_ = output_set;
107   }
set_next_op_input(const std::vector<std::pair<std::shared_ptr<Strategy>,std::vector<TensorInfo>>> & input_set)108   void set_next_op_input(const std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> &input_set) {
109     next_op_input_ = input_set;
110   }
111 
112   // Given a pair of output strategy and input strategy, return the corresponding costlist
113   CostPtrList GetCostList(StrategyPtr output_str, StrategyPtr input_str);
114 
prev_op_output()115   std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> prev_op_output() const {
116     return pre_op_output_;
117   }
next_op_input()118   std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> next_op_input() const {
119     return next_op_input_;
120   }
121 
is_combined()122   bool is_combined() const { return is_combined_; }
prev_op_output_index()123   size_t prev_op_output_index() const { return prev_op_output_index_; }
next_op_input_index()124   size_t next_op_input_index() const { return next_op_input_index_; }
prev_op_output_indexs()125   std::vector<size_t> prev_op_output_indexs() const { return pre_op_output_indexs_; }
next_op_input_indexs()126   std::vector<size_t> next_op_input_indexs() const { return next_op_input_indexs_; }
127 
128   CostPtrList CreateEdgeEliminationCostList(const StrategyPtr &output_st_ptr,
129                                             const std::vector<std::shared_ptr<Edge>> &edges,
130                                             const StrategyPtr &input_st_ptr) const;
131   // In the Edge Elimination operation in DP algorithm, 'edges' is replaced by a new edge. This method is used to
132   // set cost for this new edge
133   void EdgeEliminationSetNewCost(std::shared_ptr<OperatorInfo> u, const std::vector<std::shared_ptr<Edge>> &edges,
134                                  std::shared_ptr<OperatorInfo> v);
135   void CreateOpEliminationSubCostList(StrategyPtr op_strategy, const CostPtrList &left_cost_list,
136                                       const CostPtrList &middle_cost_list, const CostPtrList &right_cost_list,
137                                       CostPtrList *ret_cost_list) const;
138 
139   CostPtrList CreateOpEliminationCostList(const std::shared_ptr<Edge> &e1, const StrategyPtr &output_st_ptr,
140                                           const std::shared_ptr<OperatorInfo> &op, const std::shared_ptr<Edge> &e2,
141                                           const StrategyPtr &input_st_ptr) const;
142   // In the Operation Elimination operation in DP algorithm, 'op', 'e1' and 'e2' are replaced by a new edge.
143   // This method is used to set cost for this new edge
144   void OpEliminationSetNewCost(const std::shared_ptr<Edge> &e1, const std::shared_ptr<OperatorInfo> &op,
145                                const std::shared_ptr<Edge> &e2);
146 
set_selected_cost(const CostPtr & cost)147   void set_selected_cost(const CostPtr &cost) { selected_cost_ = cost; }
selected_cost()148   const CostPtr &selected_cost() const { return selected_cost_; }
set_parameter_involve(int64_t para_invol)149   void set_parameter_involve(int64_t para_invol) { is_output_parameter_involve_ = para_invol; }
150   // In the training phase, when the input of a operator contains WEIGHT or a output from other operators involving
151   // WEIGHT, then these input should stay in memory until it is used in the backward phase, which is kept in memory
152   // at the end of forward phase.
153   Status CalculateMemoryCost();
154   // In the inference phase,
155   Status CalculateMemoryCostForInference();
mark_output_critical()156   void mark_output_critical() { is_output_critical_ = 1; }
157   // Whether there exists any available strategy in 'cost_map_'
158   bool CheckStrategyCostPossibility() const;
159 
160  private:
161   std::string edge_name_;
162   std::shared_ptr<OperatorInfo> prev_op_, next_op_;
163   std::map<CostPtrKey, CostPtrList> cost_map_;
164   // pre_op_output_
165   std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> pre_op_output_;
166   std::vector<std::pair<std::shared_ptr<Strategy>, std::vector<TensorInfo>>> next_op_input_;
167   // the index of outputs of prev_op, and the index of inputs of next_op
168   size_t prev_op_output_index_, next_op_input_index_;
169 
170   // pre_op_output_indexs_ and next_op_input_indexs_ store the indices of inputs and outputs if is_combined = true
171   std::vector<size_t> pre_op_output_indexs_;
172   std::vector<size_t> next_op_input_indexs_;
173   // is this edge constructed by combining multiple edges? If is is, then is_combined = true, else is_combined = false
174   bool is_combined_;
175   // When a Parameter in the ANF graph being used by multiple operators, we include the Parameter in the costgraph by
176   // replace the Parameter by a TmpIdentity operator, and connecting this TmpIdentity operator with subsequent
177   // operators. The resulting edges are different from those normal edges, thus this Bool variable distinguishes them.
178   // If it is true, then we should guarantee that the strategy for output tensor consistent with the input tensor.
179   bool is_identity_edge;
180   CostPtr selected_cost_ = nullptr;
181   // In the training phase, 'is_output_parameter_involve_' is used to mark whether the output of the previous operator
182   // is parameter-involved
183   int64_t is_output_parameter_involve_ = -1;  // -1: unset; 0: not parameter_involved; 1: parameter_involved
184   // In the inference phase, this is used to mark whether the output of the previous operator is critical.
185   int64_t is_output_critical_ = 0;
186 
187   // Returns whether two double variable are equal.
IsDoubleEqual(double x,double y)188   bool IsDoubleEqual(double x, double y) const { return std::abs(x - y) < EPS; }
189 };
190 }  // namespace parallel
191 }  // namespace mindspore
192 #endif  // MINDSPORE_CCSRC_FRONTEND_PARALLEL_AUTO_PARALLEL_EDGE_COSTMODEL_H_
193