• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_
18 #define MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_
19 
20 #include <memory>
21 #include <string>
22 #include <vector>
23 
24 #include "utils/log_adapter.h"
25 #include "utils/ms_context.h"
26 
27 namespace mindspore {
28 namespace parallel {
29 #define OPERATOR_TO_OPERATOR_CONNECTOR "-"
30 #define DEFAULT_DEVICE_MEMORY_CAPACITY (1024.0 * 1024.0 * 1024.0 * 16.0)
31 #define DEFAULT_COST_MODEL_ALPHA 1.0
32 #define DEFAULT_COST_MODEL_BETA_ASCEND 400.0  // for 'device_target = Ascend'
33 #define DEFAULT_COST_MODEL_BETA_GPU 50.0      // for 'device_target = GPU'
34 #define DEFAULT_COST_MODEL_GAMMA 0.001
35 #define DEFAULT_COST_MODEL_SIMPLIFY_CALCULATION true
36 #define DEFAULT_COST_MODEL_COMMUNI_THRESHOLD 2048.0
37 #define DEFAULT_COST_MODEL_COMMUNI_CONST 3072.0
38 #define DEFAULT_COST_MODEL_COMMUNI_BIAS 1024.0
39 #define DEFAULT_TENSOR_SLICE_ALIGNMENT_ENABLE false
40 #define DEFAULT_TENSOR_SLICE_ALIGNMENT_SIZE 16
41 #define DEFAULT_FULLY_USE_DEVICES true
42 #define DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW false
43 #define DEFAULT_IS_MULTI_SUBGRAPHS false
44 #define TRAINING_PHASE 0
45 #define INFERENCE_PHASE 1
46 #define DEFAULT_TRIANGLE_STAR_STRATEGY_OVERWRITE true;
47 #define DEFAULT_DP_ALGO_ENABLE_APPROX false
48 #define DEFAULT_DP_ALGO_APPROX_EPSILON 0.1
49 #define DEFAULT_DP_ALGO_SINGLE_LOOP true
50 
51 class CostModelContext {
52  public:
53   ~CostModelContext() = default;
54   CostModelContext(const CostModelContext &) = delete;
55   CostModelContext &operator=(const CostModelContext &) = delete;
56   void ResetCostModel();
57   void ResetAlgoParameters();
58 
59   static std::shared_ptr<CostModelContext> GetInstance();
60   void PrintCostModel();
61 
62   void set_costmodel_context_for_device(const std::string &);
63   // DEVICE_MEMORY_CAPACITY
64   void set_device_memory_capacity(double);
device_memory_capacity()65   double device_memory_capacity() const { return device_memory_capacity_; }
66 
67   // COST_MODEL_ALPHA
68   void set_costmodel_alpha(double);
costmodel_alpha()69   double costmodel_alpha() const { return costmodel_alpha_; }
70 
71   // COST_MODEL_BETA
72   void set_costmodel_beta(double);
costmodel_beta()73   double costmodel_beta() const { return costmodel_beta_; }
74 
75   // COST_MODEL_GAMMA
76   void set_costmodel_gamma(double);
costmodel_gamma()77   double costmodel_gamma() const { return costmodel_gamma_; }
78 
79   // COST_MODEL_SIMPLIFY_CALCULATION
80   void set_costmodel_simplify_cal(bool);
costmodel_simplify_cal()81   bool costmodel_simplify_cal() const { return costmodel_simplify_cal_; }
82 
83   // COST_MODEL_COMMUNI_THRESHOLD
84   void set_costmodel_communi_threshold(double);
costmodel_communi_threshold()85   double costmodel_communi_threshold() const { return costmodel_communi_threshold_; }
86 
87   // COST_MODEL_COMMUNI_CONST
88   void set_costmodel_communi_const(double);
costmodel_communi_const()89   double costmodel_communi_const() const { return costmodel_communi_const_; }
90 
91   // COST_MODEL_COMMUNI_BIAS
92   void set_costmodel_communi_bias(double);
costmodel_communi_bias()93   double costmodel_communi_bias() const { return costmodel_communi_bias_; }
94 
95   void set_multi_subgraphs(bool);
is_multi_subgraphs()96   bool is_multi_subgraphs() const { return is_multi_subgraphs_; }
97 
98   void set_costmodel_allreduce_fusion_algorithm(int64_t);
costmodel_allreduce_fusion_algorithm()99   int64_t costmodel_allreduce_fusion_algorithm() const { return costmodel_allreduce_fusion_algorithm_; }
100 
101   void set_costmodel_allreduce_fusion_times(int64_t);
costmodel_allreduce_fusion_times()102   int64_t costmodel_allreduce_fusion_times() const { return costmodel_allreduce_fusion_times_; }
103 
104   void set_costmodel_allreduce_fusion_tail_percent(double);
costmodel_allreduce_fusion_tail_percent()105   double costmodel_allreduce_fusion_tail_percent() const { return costmodel_allreduce_fusion_tail_percent_; }
106 
107   void set_costmodel_allreduce_fusion_tail_time(double);
costmodel_allreduce_fusion_tail_time()108   double costmodel_allreduce_fusion_tail_time() const { return costmodel_allreduce_fusion_tail_time_; }
109 
110   void set_costmodel_allreduce_fusion_allreduce_inherent_time(double);
costmodel_allreduce_fusion_allreduce_inherent_time()111   double costmodel_allreduce_fusion_allreduce_inherent_time() const {
112     return costmodel_allreduce_fusion_allreduce_inherent_time_;
113   }
114 
115   void set_costmodel_allreduce_fusion_allreduce_bandwidth(double);
costmodel_allreduce_fusion_allreduce_bandwidth()116   double costmodel_allreduce_fusion_allreduce_bandwidth() const {
117     return costmodel_allreduce_fusion_allreduce_bandwidth_;
118   }
119 
120   void set_costmodel_allreduce_fusion_computation_time_parameter(double);
costmodel_allreduce_fusion_computation_time_parameter()121   double costmodel_allreduce_fusion_computation_time_parameter() const {
122     return costmodel_allreduce_fusion_computation_time_parameter_;
123   }
124 
125   // TENSOR_SLICE_ALIGNMENT_ENABLE
126   void set_tensor_slice_alignment_enable(bool);
tensor_slice_alignment_enable()127   bool tensor_slice_alignment_enable() const { return tensor_slice_alignment_enable_; }
128 
129   // TENSOR_SLICE_ALIGNMENT_SIZE
130   void set_tensor_slice_alignment_size(size_t);
tensor_slice_alignment_size()131   size_t tensor_slice_alignment_size() const { return tensor_slice_alignment_size_; }
132 
133   // FULLY_USE_DEVICES
134   void set_fully_use_device(bool);
fully_use_device()135   bool fully_use_device() const { return fully_use_device_; }
136 
137   // ELEMENTWISE_OP_STRA_FOLLOW
138   void set_elementwise_stra_follow(bool);
elementwise_stra_follow()139   bool elementwise_stra_follow() const { return elementwise_stra_follow_; }
140 
141   void set_triangle_star_strategy_overwrite(bool);
triangle_star_strategy_overwrite()142   bool triangle_star_strategy_overwrite() const { return triangle_star_strategy_overwrite_; }
143 
144   void set_run_phase(int64_t);
run_phase()145   int64_t run_phase() const { return run_phase_; }
146 
147   void set_dp_algo_approxi_epsilon(double);
dp_algo_approxi_epsilon()148   double dp_algo_approxi_epsilon() const { return dp_algo_approxi_epsilon_; }
149 
150   void set_dp_algo_enable_approxi(bool);
dp_algo_enable_approxi()151   bool dp_algo_enable_approxi() const { return dp_algo_enable_approxi_; }
152 
153   void set_dp_algo_single_loop(bool);
dp_algo_single_loop()154   bool dp_algo_single_loop() const { return dp_algo_single_loop_; }
155 
156  private:
157   CostModelContext();
158   static std::shared_ptr<CostModelContext> cm_context_inst_;
159 
160   // DEVICE_MEMORY_CAPACITY
161   double device_memory_capacity_;
162 
163   // COST_MODEL_ALPHA
164   double costmodel_alpha_;
165 
166   // COST_MODEL_BETA
167   double costmodel_beta_;
168 
169   // COST_MODEL_GAMMA
170   double costmodel_gamma_;
171 
172   // COST_MODEL_SIMPLIFY_CALCULATION
173   bool costmodel_simplify_cal_;
174 
175   // COST_MODEL_COMMUNI_THRESHOLD
176   double costmodel_communi_threshold_;
177 
178   // COST_MODEL_COMMUNI_CONST
179   double costmodel_communi_const_;
180 
181   // COST_MODEL_COMMUNI_BIAS
182   double costmodel_communi_bias_;
183 
184   // MULTI_SUBGRAPHS
185   bool is_multi_subgraphs_;
186 
187   // In the recovery phase of DP algorithm, when encountering triangle structure and star structure,
188   // whether overwrite the right-node strategy
189   bool triangle_star_strategy_overwrite_;
190 
191   // Whether to enable APPROXIMATION in the DP algorithm.
192   bool dp_algo_enable_approxi_;
193 
194   // When APPROXIMATION is enabled in the DP algorithm, the 'epsilon' value used in the APPROXIMATION.
195   double dp_algo_approxi_epsilon_;
196 
197   // Whether to generate a single suite of OperatorInfo for a loop.
198   bool dp_algo_single_loop_;
199 
200   int64_t run_phase_;  // 0: 'training', 1: 'inference'
201 
202   int64_t costmodel_allreduce_fusion_algorithm_;
203 
204   int64_t costmodel_allreduce_fusion_times_;
205 
206   double costmodel_allreduce_fusion_tail_percent_;
207 
208   double costmodel_allreduce_fusion_tail_time_;
209 
210   double costmodel_allreduce_fusion_allreduce_inherent_time_;
211 
212   double costmodel_allreduce_fusion_allreduce_bandwidth_;
213 
214   double costmodel_allreduce_fusion_computation_time_parameter_;
215 
216   // TENSOR_SLICE_ALIGNMENT_ENABLE
217   bool tensor_slice_alignment_enable_;
218 
219   // TENSOR_SLICE_ALIGNMENT_SIZE
220   size_t tensor_slice_alignment_size_;
221 
222   // FULLY_USE_DEVICES
223   bool fully_use_device_;
224 
225   // ELEMENTWISE_OP_STRA_FOLLOW
226   bool elementwise_stra_follow_;
227 };
228 }  // namespace parallel
229 }  // namespace mindspore
230 
231 #endif  // MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_
232