• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef PARALLEL_AUTO_PARALLEL_REC_COST_H_
18 #define PARALLEL_AUTO_PARALLEL_REC_COST_H_
19 
20 #include <iostream>
21 #include <memory>
22 #include <string>
23 #include <utility>
24 #include <vector>
25 #include <algorithm>
26 
27 #include "frontend/parallel/auto_parallel/rec_core/rec_graph.h"
28 #include "frontend/parallel/auto_parallel/rec_core/rec_strategy.h"
29 #include "frontend/parallel/ops_info/operator_info.h"
30 #include "utils/check_convert_utils.h"
31 
32 namespace mindspore {
33 namespace parallel {
34 #define DOUBLE_MAX (std::numeric_limits<double>::max)()
35 #define DOUBLE_LOWEST (std::numeric_limits<double>::lowest)()
36 #define DOUBLE_MIN (std::numeric_limits<double>::min)()
37 
38 constexpr size_t BMM_COEF = 1;
39 constexpr size_t REDIS_COEF = 16;
40 constexpr double EXPERT_COEF = 0.15;
41 constexpr size_t REPLICATE_BELOW = 25;
42 constexpr bool ONLY_REDIST_WITH_SAME_SHAPE = true;
43 constexpr size_t NUMBER_ASCEND_CORES = 32;
44 constexpr size_t NDIMS = 4;
45 constexpr float FL_TWO = 2.0;
46 
47 bool SameShape(const Shape4D &shape1, const Shape4D &shape2);
48 
49 double costOfDistributing(const TensorParam &t);
50 double minNodeSize(const Graph::NodeType &node);
51 
52 double CostRedis(const Graph::NodeType &node,
53                  const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
54                  const std::vector<std::vector<float>> &mode, const Graph &graph);
55 
56 double CostRedisWithAdjacentNode(const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
57                                  const std::vector<std::vector<float>> &mode, size_t i_strategy, size_t i_node,
58                                  double tensor_size, bool is_search_forward);
59 
60 // class CostMatMul is used to compute the cost of MatMul operator.
61 class CostMatMul {
62  public:
63   StrategyRec GetOptimalStr(const Graph::NodeType &node,
64                             const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
65                             const Graph &graph, const bool isTraining);
66 
67   double GetMaxCostIn(const OperatorRec &op);
68 
69  private:
StrConcatDimI(int64_t a,int64_t b)70   double StrConcatDimI(int64_t a, int64_t b) {
71     cost_in_i_ = (static_cast<double>(a) * static_cast<double>(b)) / FL_TWO;
72     const auto matmul_mem_coef = CostModelContext::GetInstance()->rp_matmul_mem_coef();
73     cost_in_i_ = cost_in_i_ * matmul_mem_coef;
74 
75     return cost_in_i_;
76   }
77 
StrConcatDimJ(int64_t a,int64_t b)78   double StrConcatDimJ(int64_t a, int64_t b) {
79     cost_in_j_ = (static_cast<double>(a) * static_cast<double>(b)) / FL_TWO;
80 
81     return cost_in_j_;
82   }
83 
StrReduceDimK(int64_t a,int64_t b)84   double StrReduceDimK(int64_t a, int64_t b) {
85     cost_in_k_ = (static_cast<double>(a) * static_cast<double>(b)) / FL_TWO;
86 
87     return cost_in_k_;
88   }
89 
StrRecom(const double & cost_if_cut_i,const double & cost_if_cut_j,const double & cost_if_cut_k)90   double StrRecom(const double &cost_if_cut_i, const double &cost_if_cut_j, const double &cost_if_cut_k) {
91     double min_size = cost_if_cut_i < cost_if_cut_j ? (cost_if_cut_i < cost_if_cut_k ? cost_if_cut_i : cost_if_cut_k)
92                                                     : (cost_if_cut_j < cost_if_cut_k ? cost_if_cut_j : cost_if_cut_k);
93     cost_in_r_ = min_size * min_size / REPLICATE_BELOW;
94 
95     return cost_in_r_;
96   }
97 
98   StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str) const;
99 
100   double cost_in_i_ = 0;
101 
102   double cost_in_j_ = 0;
103 
104   double cost_in_k_ = 0;
105 
106   double cost_in_r_ = 0;
107 };  // class CostMatMul is used to compute the cost of MatMul operator.
108 
109 bool SplitOnlyOneDimension(const Graph &graph, float str);
110 
111 // class CostBatchMatMul is used to compute the cost of MatMul operator.
112 class CostBatchMatMul {
113  public:
114   StrategyRec GetOptimalStr(const Graph::NodeType &node,
115                             const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
116                             const Graph &graph, const bool isTraining);
117   double GetMaxCostIn(const Graph::NodeType &node);
118 
119  private:
120   enum Axis { B, X, I, J, K, R };
121   size_t getBatchDimsSize(const OperatorRec &op);
122   double cost(Axis a, const Graph::NodeType &node);
123   StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str) const;
124 };  // class CostBatchMatMul is used to compute the cost of MatMul operator.
125 
126 // class CostConvolution is used to compute the cost of Conv operator.
127 class CostConvolution {
128  public:
129   StrategyRec GetOptimalStr(const Graph::NodeType &node,
130                             const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
131                             const Graph &graph, bool channel_partition);
132 
133   double GetMinCostIn(const Graph::NodeType &node);
134 
135  private:
StrDimB(int64_t TensorFilter)136   double StrDimB(int64_t TensorFilter) {
137     cost_in_b_ = static_cast<double>((TensorFilter) / FL_TWO);
138 
139     return cost_in_b_;
140   }
141 
StrDimI(int64_t TensorIn,int64_t TensorFilter)142   double StrDimI(int64_t TensorIn, int64_t TensorFilter) {
143     cost_in_i_ = static_cast<double>((TensorIn + TensorFilter) / FL_TWO);
144 
145     return cost_in_i_;
146   }
147 
StrDimJ(int64_t TensorIn,int64_t TensorFilter)148   double StrDimJ(int64_t TensorIn, int64_t TensorFilter) {
149     cost_in_j_ = static_cast<double>((TensorIn + TensorFilter) / FL_TWO);
150 
151     return cost_in_j_;
152   }
153 
StrDimK(int64_t TensorIn)154   double StrDimK(int64_t TensorIn) {
155     cost_in_k_ = static_cast<double>((TensorIn) / FL_TWO);
156 
157     return cost_in_k_;
158   }
159 
StrDimDI(int64_t TensorIn,int64_t TensorOut)160   double StrDimDI(int64_t TensorIn, int64_t TensorOut) {
161     cost_in_di_ = static_cast<double>((TensorIn + TensorOut) / FL_TWO);
162 
163     return cost_in_di_;
164   }
165 
StrDimDJ(int64_t TensorIn,int64_t TensorOut)166   double StrDimDJ(int64_t TensorIn, int64_t TensorOut) {
167     cost_in_dj_ = static_cast<double>((TensorIn + TensorOut) / FL_TWO);
168 
169     return cost_in_dj_;
170   }
171 
StrDimQ(int64_t TensorOut)172   double StrDimQ(int64_t TensorOut) {
173     cost_in_q_ = static_cast<double>((TensorOut) / FL_TWO);
174 
175     return cost_in_q_;
176   }
177 
178   StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str) const;
179 
180   double cost_in_b_ = 0;
181 
182   double cost_in_i_ = 0;
183 
184   double cost_in_j_ = 0;
185 
186   double cost_in_k_ = 0;
187 
188   double cost_in_di_ = 0;
189 
190   double cost_in_dj_ = 0;
191 
192   double cost_in_q_ = 0;
193 };  // class CostConvolution is used to compute the cost of Conv operator.
194 
195 // class CostPooling is used to compute the cost of Pooling operator.
196 class CostPooling {
197  public:
198   StrategyRec GetOptimalStr(const Graph::NodeType &node,
199                             const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
200                             const Graph &graph) const;
201 
GetMinCostIn()202   double GetMinCostIn() const { return cost_in_; }
203 
204  private:
205   StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str) const;
206 
207   double cost_in_ = 0;
208 };  // class CostPooling is used to compute the cost of Pooling operator.
209 
210 // class CostReshape is used to compute the cost of Reshape operator.
211 class CostReshape {
212  public:
213   StrategyRec GetOptimalStr(const Graph::NodeType &node) const;
214 
GetMinCostIn()215   double GetMinCostIn() const { return cost_in_; }
216 
217  private:
218   StrategyRec ChoseStr(StrategyRec str) const;
219 
220   double cost_in_ = 0;
221 };  // class CostReshape is used to compute the cost of Reshape operator.
222 
223 // class CostCommon is used to compute the cost of an element-wise operator
224 class CostCommon {
225  public:
226   virtual ~CostCommon() = default;
227 
228   virtual StrategyRec GetOptimalStr(const Graph::NodeType &node,
229                                     const std::vector<std::pair<std::string, StrategyRec>> &node_name_to_strategy,
230                                     const Graph &graph);
231 
GetMinCostIn()232   virtual double GetMinCostIn() const { return cost_in_; }
233 
234  protected:
235   virtual StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
236 
237   double cost_in_ = 0;
238 };  // class CostCommon is used to compute the cost of an element-wise operator
239 
240 // class CostBiasAdd is used to compute the cost of the addition between a tensor and a bias
241 class CostBiasAdd : public CostCommon {
242  protected:
243   StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str) override;
244 };
245 
246 // class CostAdd is used to compute the cost of Add operator.
247 class CostTensorAdd : public CostCommon {
248  protected:
249   StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str) override;
250 };
251 
252 // all the following operation are element-wise and have the same cost
253 class CostReLU : public CostCommon {};
254 class CostLog : public CostCommon {};
255 class CostExp : public CostCommon {};
256 class CostAdd : public CostCommon {};
257 class CostSub : public CostCommon {};
258 class CostMul : public CostCommon {};
259 class CostDiv : public CostCommon {};
260 class CostSqueeze : public CostCommon {};
261 class CostCast : public CostCommon {};
262 
263 // class BatchParallel is used to compute the cost of BatchParallel operator.
264 class CostBatchParallel {
265  public:
266   virtual ~CostBatchParallel() = default;
267 
268   virtual StrategyRec GetOptimalStr(const Graph::NodeType &node);
269 
GetMaxCostIn()270   virtual double GetMaxCostIn() const { return DOUBLE_MAX; }
271 
272  protected:
273   virtual StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str);
274 
275   double cost_in_ = 0;
276 };  // class BatchParallel is used to compute the cost of BatchParallel operator.
277 
278 class CostBatchNorm : public CostBatchParallel {};
279 class CostOneHot : public CostBatchParallel {};
280 class CostPRelu : public CostBatchParallel {};
281 class CostSoftmax : public CostBatchParallel {};
282 
283 class CostSoftmaxCrossEntropyWithLogits : public CostBatchParallel {
284  protected:
285   StrategyRec ChoseStr(const std::vector<double> &cost_op, StrategyRec str) override;
286 };
287 }  // namespace parallel
288 }  // namespace mindspore
289 #endif  // PARALLEL_AUTO_PARALLEL_REC_COST_H_
290