1 /** 2 * Copyright 2019 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_ 18 #define MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_ 19 20 #include <memory> 21 #include <string> 22 #include <vector> 23 #include "utils/log_adapter.h" 24 #include "utils/ms_context.h" 25 26 namespace mindspore { 27 namespace parallel { 28 constexpr char OPERATOR_TO_OPERATOR_CONNECTOR[] = "-"; 29 constexpr float DEFAULT_DEVICE_MEMORY_CAPACITY = (1024.0 * 1024.0 * 1024.0 * 16.0); 30 constexpr float DEFAULT_COST_MODEL_ALPHA = 1.0; 31 constexpr float DEFAULT_COST_MODEL_BETA_ASCEND = 400.0; // for 'device_target = Ascend' 32 constexpr float DEFAULT_COST_MODEL_BETA_GPU = 50.0; // for 'device_target = GPU' 33 constexpr float DEFAULT_COST_MODEL_GAMMA = 0.001; 34 #define DEFAULT_COST_MODEL_SIMPLIFY_CALCULATION true 35 constexpr float DEFAULT_COST_MODEL_COMMUNI_THRESHOLD = 2048.0; 36 constexpr float DEFAULT_COST_MODEL_COMMUNI_CONST = 3072.0; 37 constexpr float DEFAULT_COST_MODEL_COMMUNI_BIAS = 1024.0; 38 #define DEFAULT_TENSOR_SLICE_ALIGNMENT_ENABLE false 39 constexpr size_t DEFAULT_TENSOR_SLICE_ALIGNMENT_SIZE = 16; 40 #define DEFAULT_FULLY_USE_DEVICES true 41 #define DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW false 42 #define DEFAULT_IS_MULTI_SUBGRAPHS false 43 #define INFERENCE_PHASE 1 44 #define DEFAULT_TRIANGLE_STAR_STRATEGY_OVERWRITE true; 45 #define DEFAULT_DP_ALGO_ENABLE_APPROX false 46 constexpr float DEFAULT_DP_ALGO_APPROX_EPSILON = 0.1; 47 constexpr float DEFAULT_RP_MATMUL_MEM_COEF = 0.1; 48 #define DEFAULT_DP_ALGO_SINGLE_LOOP false 49 constexpr int64_t TRAINING_PHASE = 0; 50 51 class CostModelContext { 52 public: 53 ~CostModelContext() = default; 54 CostModelContext(const CostModelContext &) = delete; 55 CostModelContext &operator=(const CostModelContext &) = delete; 56 void ResetCostModel(); 57 void ResetAlgoParameters(); 58 59 static std::shared_ptr<CostModelContext> GetInstance(); 60 void PrintCostModel(); 61 62 void set_costmodel_context_for_device(const std::string &device_target); 63 // DEVICE_MEMORY_CAPACITY 64 void set_device_memory_capacity(double dm_capacity); device_memory_capacity()65 double device_memory_capacity() const { return device_memory_capacity_; } 66 67 // COST_MODEL_ALPHA 68 void set_costmodel_alpha(double cm_alpha); costmodel_alpha()69 double costmodel_alpha() const { return costmodel_alpha_; } 70 71 // COST_MODEL_BETA 72 void set_costmodel_beta(double cm_beta); costmodel_beta()73 double costmodel_beta() const { return costmodel_beta_; } 74 75 // COST_MODEL_GAMMA 76 void set_costmodel_gamma(double cm_gamma); costmodel_gamma()77 double costmodel_gamma() const { return costmodel_gamma_; } 78 79 // COST_MODEL_SIMPLIFY_CALCULATION 80 void set_costmodel_simplify_cal(bool cm_simplify); costmodel_simplify_cal()81 bool costmodel_simplify_cal() const { return costmodel_simplify_cal_; } 82 83 // COST_MODEL_COMMUNI_THRESHOLD 84 void set_costmodel_communi_threshold(double cm_communi_th); costmodel_communi_threshold()85 double costmodel_communi_threshold() const { return costmodel_communi_threshold_; } 86 87 // COST_MODEL_COMMUNI_CONST 88 void set_costmodel_communi_const(double cm_communi_const); costmodel_communi_const()89 double costmodel_communi_const() const { return costmodel_communi_const_; } 90 91 // COST_MODEL_COMMUNI_BIAS 92 void set_costmodel_communi_bias(double cm_communi_bias); costmodel_communi_bias()93 double costmodel_communi_bias() const { return costmodel_communi_bias_; } 94 95 void set_multi_subgraphs(bool multi_graphs); is_multi_subgraphs()96 bool is_multi_subgraphs() const { return is_multi_subgraphs_; } 97 98 void set_costmodel_allreduce_fusion_algorithm(int64_t algorithm); costmodel_allreduce_fusion_algorithm()99 int64_t costmodel_allreduce_fusion_algorithm() const { return costmodel_allreduce_fusion_algorithm_; } 100 101 void set_costmodel_allreduce_fusion_times(int64_t allreduce_fusion_times); costmodel_allreduce_fusion_times()102 int64_t costmodel_allreduce_fusion_times() const { return costmodel_allreduce_fusion_times_; } 103 104 void set_costmodel_allreduce_fusion_tail_percent(double tail_percent); costmodel_allreduce_fusion_tail_percent()105 double costmodel_allreduce_fusion_tail_percent() const { return costmodel_allreduce_fusion_tail_percent_; } 106 107 void set_costmodel_allreduce_fusion_tail_time(double tail_time); costmodel_allreduce_fusion_tail_time()108 double costmodel_allreduce_fusion_tail_time() const { return costmodel_allreduce_fusion_tail_time_; } 109 110 void set_costmodel_allreduce_fusion_allreduce_inherent_time(double allreduce_inherent_time); costmodel_allreduce_fusion_allreduce_inherent_time()111 double costmodel_allreduce_fusion_allreduce_inherent_time() const { 112 return costmodel_allreduce_fusion_allreduce_inherent_time_; 113 } 114 115 void set_costmodel_allreduce_fusion_allreduce_bandwidth(double allreduce_bandwidth); costmodel_allreduce_fusion_allreduce_bandwidth()116 double costmodel_allreduce_fusion_allreduce_bandwidth() const { 117 return costmodel_allreduce_fusion_allreduce_bandwidth_; 118 } 119 120 void set_costmodel_allreduce_fusion_computation_time_parameter(double computation_time_parameter); costmodel_allreduce_fusion_computation_time_parameter()121 double costmodel_allreduce_fusion_computation_time_parameter() const { 122 return costmodel_allreduce_fusion_computation_time_parameter_; 123 } 124 125 // TENSOR_SLICE_ALIGNMENT_ENABLE 126 void set_tensor_slice_alignment_enable(bool ts_align); tensor_slice_alignment_enable()127 bool tensor_slice_alignment_enable() const { return tensor_slice_alignment_enable_; } 128 129 // TENSOR_SLICE_ALIGNMENT_SIZE 130 void set_tensor_slice_alignment_size(size_t ts_align_size); tensor_slice_alignment_size()131 size_t tensor_slice_alignment_size() const { return tensor_slice_alignment_size_; } 132 133 // FULLY_USE_DEVICES 134 void set_fully_use_device(bool fully_use); fully_use_device()135 bool fully_use_device() const { return fully_use_device_; } 136 137 // ELEMENTWISE_OP_STRA_FOLLOW 138 void set_elementwise_stra_follow(bool elementwise_follow); elementwise_stra_follow()139 bool elementwise_stra_follow() const { return elementwise_stra_follow_; } 140 141 void set_triangle_star_strategy_overwrite(bool overwrite); triangle_star_strategy_overwrite()142 bool triangle_star_strategy_overwrite() const { return triangle_star_strategy_overwrite_; } 143 144 void set_run_phase(int64_t phase); run_phase()145 int64_t run_phase() const { return run_phase_; } 146 147 void set_dp_algo_approxi_epsilon(double epsilon); dp_algo_approxi_epsilon()148 double dp_algo_approxi_epsilon() const { return dp_algo_approxi_epsilon_; } 149 150 void set_rp_matmul_mem_coef(double coef); rp_matmul_mem_coef()151 double rp_matmul_mem_coef() const { return rp_matmul_mem_coef_; } 152 153 void set_dp_algo_enable_approxi(bool approxi); dp_algo_enable_approxi()154 bool dp_algo_enable_approxi() const { return dp_algo_enable_approxi_; } 155 156 void set_dp_algo_single_loop(bool single_loop); dp_algo_single_loop()157 bool dp_algo_single_loop() const { return dp_algo_single_loop_; } 158 159 private: 160 CostModelContext(); 161 static std::shared_ptr<CostModelContext> cm_context_inst_; 162 163 // DEVICE_MEMORY_CAPACITY 164 double device_memory_capacity_; 165 166 // COST_MODEL_ALPHA 167 double costmodel_alpha_; 168 169 // COST_MODEL_BETA 170 double costmodel_beta_; 171 172 // COST_MODEL_GAMMA 173 double costmodel_gamma_; 174 175 // COST_MODEL_SIMPLIFY_CALCULATION 176 bool costmodel_simplify_cal_; 177 178 // COST_MODEL_COMMUNI_THRESHOLD 179 double costmodel_communi_threshold_; 180 181 // COST_MODEL_COMMUNI_CONST 182 double costmodel_communi_const_; 183 184 // COST_MODEL_COMMUNI_BIAS 185 double costmodel_communi_bias_; 186 187 // MULTI_SUBGRAPHS 188 bool is_multi_subgraphs_; 189 190 // In the recovery phase of DP algorithm, when encountering triangle structure and star structure, 191 // whether overwrite the right-node strategy 192 bool triangle_star_strategy_overwrite_; 193 194 // Whether to enable APPROXIMATION in the DP algorithm. 195 bool dp_algo_enable_approxi_; 196 197 // When APPROXIMATION is enabled in the DP algorithm, the 'epsilon' value used in the APPROXIMATION. 198 double dp_algo_approxi_epsilon_; 199 200 // In RP algorithm, the coef value can adjust the balance between data parallel and model parallel. 201 double rp_matmul_mem_coef_; 202 203 // Whether to generate a single suite of OperatorInfo for a loop. 204 bool dp_algo_single_loop_; 205 206 int64_t run_phase_; // 0: 'training', 1: 'inference' 207 208 int64_t costmodel_allreduce_fusion_algorithm_; 209 210 int64_t costmodel_allreduce_fusion_times_; 211 212 double costmodel_allreduce_fusion_tail_percent_; 213 214 double costmodel_allreduce_fusion_tail_time_; 215 216 double costmodel_allreduce_fusion_allreduce_inherent_time_; 217 218 double costmodel_allreduce_fusion_allreduce_bandwidth_; 219 220 double costmodel_allreduce_fusion_computation_time_parameter_; 221 222 // TENSOR_SLICE_ALIGNMENT_ENABLE 223 bool tensor_slice_alignment_enable_; 224 225 // TENSOR_SLICE_ALIGNMENT_SIZE 226 size_t tensor_slice_alignment_size_; 227 228 // FULLY_USE_DEVICES 229 bool fully_use_device_; 230 231 // ELEMENTWISE_OP_STRA_FOLLOW 232 bool elementwise_stra_follow_; 233 }; 234 } // namespace parallel 235 } // namespace mindspore 236 237 #endif // MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_ 238