• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2019 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_
18 #define MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_
19 
20 #include <memory>
21 #include <string>
22 #include <vector>
23 #include "utils/log_adapter.h"
24 #include "utils/ms_context.h"
25 
26 namespace mindspore {
27 namespace parallel {
28 constexpr char OPERATOR_TO_OPERATOR_CONNECTOR[] = "-";
29 constexpr float DEFAULT_DEVICE_MEMORY_CAPACITY = (1024.0 * 1024.0 * 1024.0 * 16.0);
30 constexpr float DEFAULT_COST_MODEL_ALPHA = 1.0;
31 constexpr float DEFAULT_COST_MODEL_BETA_ASCEND = 400.0;  // for 'device_target = Ascend'
32 constexpr float DEFAULT_COST_MODEL_BETA_GPU = 50.0;      // for 'device_target = GPU'
33 constexpr float DEFAULT_COST_MODEL_GAMMA = 0.001;
34 #define DEFAULT_COST_MODEL_SIMPLIFY_CALCULATION true
35 constexpr float DEFAULT_COST_MODEL_COMMUNI_THRESHOLD = 2048.0;
36 constexpr float DEFAULT_COST_MODEL_COMMUNI_CONST = 3072.0;
37 constexpr float DEFAULT_COST_MODEL_COMMUNI_BIAS = 1024.0;
38 #define DEFAULT_TENSOR_SLICE_ALIGNMENT_ENABLE false
39 constexpr size_t DEFAULT_TENSOR_SLICE_ALIGNMENT_SIZE = 16;
40 #define DEFAULT_FULLY_USE_DEVICES true
41 #define DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW false
42 #define DEFAULT_IS_MULTI_SUBGRAPHS false
43 #define INFERENCE_PHASE 1
44 #define DEFAULT_TRIANGLE_STAR_STRATEGY_OVERWRITE true;
45 #define DEFAULT_DP_ALGO_ENABLE_APPROX false
46 constexpr float DEFAULT_DP_ALGO_APPROX_EPSILON = 0.1;
47 constexpr float DEFAULT_RP_MATMUL_MEM_COEF = 0.1;
48 #define DEFAULT_DP_ALGO_SINGLE_LOOP false
49 constexpr int64_t TRAINING_PHASE = 0;
50 
51 class CostModelContext {
52  public:
53   ~CostModelContext() = default;
54   CostModelContext(const CostModelContext &) = delete;
55   CostModelContext &operator=(const CostModelContext &) = delete;
56   void ResetCostModel();
57   void ResetAlgoParameters();
58 
59   static std::shared_ptr<CostModelContext> GetInstance();
60   void PrintCostModel();
61 
62   void set_costmodel_context_for_device(const std::string &device_target);
63   // DEVICE_MEMORY_CAPACITY
64   void set_device_memory_capacity(double dm_capacity);
device_memory_capacity()65   double device_memory_capacity() const { return device_memory_capacity_; }
66 
67   // COST_MODEL_ALPHA
68   void set_costmodel_alpha(double cm_alpha);
costmodel_alpha()69   double costmodel_alpha() const { return costmodel_alpha_; }
70 
71   // COST_MODEL_BETA
72   void set_costmodel_beta(double cm_beta);
costmodel_beta()73   double costmodel_beta() const { return costmodel_beta_; }
74 
75   // COST_MODEL_GAMMA
76   void set_costmodel_gamma(double cm_gamma);
costmodel_gamma()77   double costmodel_gamma() const { return costmodel_gamma_; }
78 
79   // COST_MODEL_SIMPLIFY_CALCULATION
80   void set_costmodel_simplify_cal(bool cm_simplify);
costmodel_simplify_cal()81   bool costmodel_simplify_cal() const { return costmodel_simplify_cal_; }
82 
83   // COST_MODEL_COMMUNI_THRESHOLD
84   void set_costmodel_communi_threshold(double cm_communi_th);
costmodel_communi_threshold()85   double costmodel_communi_threshold() const { return costmodel_communi_threshold_; }
86 
87   // COST_MODEL_COMMUNI_CONST
88   void set_costmodel_communi_const(double cm_communi_const);
costmodel_communi_const()89   double costmodel_communi_const() const { return costmodel_communi_const_; }
90 
91   // COST_MODEL_COMMUNI_BIAS
92   void set_costmodel_communi_bias(double cm_communi_bias);
costmodel_communi_bias()93   double costmodel_communi_bias() const { return costmodel_communi_bias_; }
94 
95   void set_multi_subgraphs(bool multi_graphs);
is_multi_subgraphs()96   bool is_multi_subgraphs() const { return is_multi_subgraphs_; }
97 
98   void set_costmodel_allreduce_fusion_algorithm(int64_t algorithm);
costmodel_allreduce_fusion_algorithm()99   int64_t costmodel_allreduce_fusion_algorithm() const { return costmodel_allreduce_fusion_algorithm_; }
100 
101   void set_costmodel_allreduce_fusion_times(int64_t allreduce_fusion_times);
costmodel_allreduce_fusion_times()102   int64_t costmodel_allreduce_fusion_times() const { return costmodel_allreduce_fusion_times_; }
103 
104   void set_costmodel_allreduce_fusion_tail_percent(double tail_percent);
costmodel_allreduce_fusion_tail_percent()105   double costmodel_allreduce_fusion_tail_percent() const { return costmodel_allreduce_fusion_tail_percent_; }
106 
107   void set_costmodel_allreduce_fusion_tail_time(double tail_time);
costmodel_allreduce_fusion_tail_time()108   double costmodel_allreduce_fusion_tail_time() const { return costmodel_allreduce_fusion_tail_time_; }
109 
110   void set_costmodel_allreduce_fusion_allreduce_inherent_time(double allreduce_inherent_time);
costmodel_allreduce_fusion_allreduce_inherent_time()111   double costmodel_allreduce_fusion_allreduce_inherent_time() const {
112     return costmodel_allreduce_fusion_allreduce_inherent_time_;
113   }
114 
115   void set_costmodel_allreduce_fusion_allreduce_bandwidth(double allreduce_bandwidth);
costmodel_allreduce_fusion_allreduce_bandwidth()116   double costmodel_allreduce_fusion_allreduce_bandwidth() const {
117     return costmodel_allreduce_fusion_allreduce_bandwidth_;
118   }
119 
120   void set_costmodel_allreduce_fusion_computation_time_parameter(double computation_time_parameter);
costmodel_allreduce_fusion_computation_time_parameter()121   double costmodel_allreduce_fusion_computation_time_parameter() const {
122     return costmodel_allreduce_fusion_computation_time_parameter_;
123   }
124 
125   // TENSOR_SLICE_ALIGNMENT_ENABLE
126   void set_tensor_slice_alignment_enable(bool ts_align);
tensor_slice_alignment_enable()127   bool tensor_slice_alignment_enable() const { return tensor_slice_alignment_enable_; }
128 
129   // TENSOR_SLICE_ALIGNMENT_SIZE
130   void set_tensor_slice_alignment_size(size_t ts_align_size);
tensor_slice_alignment_size()131   size_t tensor_slice_alignment_size() const { return tensor_slice_alignment_size_; }
132 
133   // FULLY_USE_DEVICES
134   void set_fully_use_device(bool fully_use);
fully_use_device()135   bool fully_use_device() const { return fully_use_device_; }
136 
137   // ELEMENTWISE_OP_STRA_FOLLOW
138   void set_elementwise_stra_follow(bool elementwise_follow);
elementwise_stra_follow()139   bool elementwise_stra_follow() const { return elementwise_stra_follow_; }
140 
141   void set_triangle_star_strategy_overwrite(bool overwrite);
triangle_star_strategy_overwrite()142   bool triangle_star_strategy_overwrite() const { return triangle_star_strategy_overwrite_; }
143 
144   void set_run_phase(int64_t phase);
run_phase()145   int64_t run_phase() const { return run_phase_; }
146 
147   void set_dp_algo_approxi_epsilon(double epsilon);
dp_algo_approxi_epsilon()148   double dp_algo_approxi_epsilon() const { return dp_algo_approxi_epsilon_; }
149 
150   void set_rp_matmul_mem_coef(double coef);
rp_matmul_mem_coef()151   double rp_matmul_mem_coef() const { return rp_matmul_mem_coef_; }
152 
153   void set_dp_algo_enable_approxi(bool approxi);
dp_algo_enable_approxi()154   bool dp_algo_enable_approxi() const { return dp_algo_enable_approxi_; }
155 
156   void set_dp_algo_single_loop(bool single_loop);
dp_algo_single_loop()157   bool dp_algo_single_loop() const { return dp_algo_single_loop_; }
158 
159  private:
160   CostModelContext();
161   static std::shared_ptr<CostModelContext> cm_context_inst_;
162 
163   // DEVICE_MEMORY_CAPACITY
164   double device_memory_capacity_;
165 
166   // COST_MODEL_ALPHA
167   double costmodel_alpha_;
168 
169   // COST_MODEL_BETA
170   double costmodel_beta_;
171 
172   // COST_MODEL_GAMMA
173   double costmodel_gamma_;
174 
175   // COST_MODEL_SIMPLIFY_CALCULATION
176   bool costmodel_simplify_cal_;
177 
178   // COST_MODEL_COMMUNI_THRESHOLD
179   double costmodel_communi_threshold_;
180 
181   // COST_MODEL_COMMUNI_CONST
182   double costmodel_communi_const_;
183 
184   // COST_MODEL_COMMUNI_BIAS
185   double costmodel_communi_bias_;
186 
187   // MULTI_SUBGRAPHS
188   bool is_multi_subgraphs_;
189 
190   // In the recovery phase of DP algorithm, when encountering triangle structure and star structure,
191   // whether overwrite the right-node strategy
192   bool triangle_star_strategy_overwrite_;
193 
194   // Whether to enable APPROXIMATION in the DP algorithm.
195   bool dp_algo_enable_approxi_;
196 
197   // When APPROXIMATION is enabled in the DP algorithm, the 'epsilon' value used in the APPROXIMATION.
198   double dp_algo_approxi_epsilon_;
199 
200   // In RP algorithm, the coef value can adjust the balance between data parallel and model parallel.
201   double rp_matmul_mem_coef_;
202 
203   // Whether to generate a single suite of OperatorInfo for a loop.
204   bool dp_algo_single_loop_;
205 
206   int64_t run_phase_;  // 0: 'training', 1: 'inference'
207 
208   int64_t costmodel_allreduce_fusion_algorithm_;
209 
210   int64_t costmodel_allreduce_fusion_times_;
211 
212   double costmodel_allreduce_fusion_tail_percent_;
213 
214   double costmodel_allreduce_fusion_tail_time_;
215 
216   double costmodel_allreduce_fusion_allreduce_inherent_time_;
217 
218   double costmodel_allreduce_fusion_allreduce_bandwidth_;
219 
220   double costmodel_allreduce_fusion_computation_time_parameter_;
221 
222   // TENSOR_SLICE_ALIGNMENT_ENABLE
223   bool tensor_slice_alignment_enable_;
224 
225   // TENSOR_SLICE_ALIGNMENT_SIZE
226   size_t tensor_slice_alignment_size_;
227 
228   // FULLY_USE_DEVICES
229   bool fully_use_device_;
230 
231   // ELEMENTWISE_OP_STRA_FOLLOW
232   bool elementwise_stra_follow_;
233 };
234 }  // namespace parallel
235 }  // namespace mindspore
236 
237 #endif  // MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_
238