1 /** 2 * Copyright 2019 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_ 18 #define MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_ 19 20 #include <memory> 21 #include <string> 22 #include <vector> 23 24 #include "utils/log_adapter.h" 25 #include "utils/ms_context.h" 26 27 namespace mindspore { 28 namespace parallel { 29 #define OPERATOR_TO_OPERATOR_CONNECTOR "-" 30 #define DEFAULT_DEVICE_MEMORY_CAPACITY (1024.0 * 1024.0 * 1024.0 * 16.0) 31 #define DEFAULT_COST_MODEL_ALPHA 1.0 32 #define DEFAULT_COST_MODEL_BETA_ASCEND 400.0 // for 'device_target = Ascend' 33 #define DEFAULT_COST_MODEL_BETA_GPU 50.0 // for 'device_target = GPU' 34 #define DEFAULT_COST_MODEL_GAMMA 0.001 35 #define DEFAULT_COST_MODEL_SIMPLIFY_CALCULATION true 36 #define DEFAULT_COST_MODEL_COMMUNI_THRESHOLD 2048.0 37 #define DEFAULT_COST_MODEL_COMMUNI_CONST 3072.0 38 #define DEFAULT_COST_MODEL_COMMUNI_BIAS 1024.0 39 #define DEFAULT_TENSOR_SLICE_ALIGNMENT_ENABLE false 40 #define DEFAULT_TENSOR_SLICE_ALIGNMENT_SIZE 16 41 #define DEFAULT_FULLY_USE_DEVICES true 42 #define DEFAULT_ELEMENTWISE_OP_STRA_FOLLOW false 43 #define DEFAULT_IS_MULTI_SUBGRAPHS false 44 #define TRAINING_PHASE 0 45 #define INFERENCE_PHASE 1 46 #define DEFAULT_TRIANGLE_STAR_STRATEGY_OVERWRITE true; 47 #define DEFAULT_DP_ALGO_ENABLE_APPROX false 48 #define DEFAULT_DP_ALGO_APPROX_EPSILON 0.1 49 #define DEFAULT_DP_ALGO_SINGLE_LOOP true 50 51 class CostModelContext { 52 public: 53 ~CostModelContext() = default; 54 CostModelContext(const CostModelContext &) = delete; 55 CostModelContext &operator=(const CostModelContext &) = delete; 56 void ResetCostModel(); 57 void ResetAlgoParameters(); 58 59 static std::shared_ptr<CostModelContext> GetInstance(); 60 void PrintCostModel(); 61 62 void set_costmodel_context_for_device(const std::string &); 63 // DEVICE_MEMORY_CAPACITY 64 void set_device_memory_capacity(double); device_memory_capacity()65 double device_memory_capacity() const { return device_memory_capacity_; } 66 67 // COST_MODEL_ALPHA 68 void set_costmodel_alpha(double); costmodel_alpha()69 double costmodel_alpha() const { return costmodel_alpha_; } 70 71 // COST_MODEL_BETA 72 void set_costmodel_beta(double); costmodel_beta()73 double costmodel_beta() const { return costmodel_beta_; } 74 75 // COST_MODEL_GAMMA 76 void set_costmodel_gamma(double); costmodel_gamma()77 double costmodel_gamma() const { return costmodel_gamma_; } 78 79 // COST_MODEL_SIMPLIFY_CALCULATION 80 void set_costmodel_simplify_cal(bool); costmodel_simplify_cal()81 bool costmodel_simplify_cal() const { return costmodel_simplify_cal_; } 82 83 // COST_MODEL_COMMUNI_THRESHOLD 84 void set_costmodel_communi_threshold(double); costmodel_communi_threshold()85 double costmodel_communi_threshold() const { return costmodel_communi_threshold_; } 86 87 // COST_MODEL_COMMUNI_CONST 88 void set_costmodel_communi_const(double); costmodel_communi_const()89 double costmodel_communi_const() const { return costmodel_communi_const_; } 90 91 // COST_MODEL_COMMUNI_BIAS 92 void set_costmodel_communi_bias(double); costmodel_communi_bias()93 double costmodel_communi_bias() const { return costmodel_communi_bias_; } 94 95 void set_multi_subgraphs(bool); is_multi_subgraphs()96 bool is_multi_subgraphs() const { return is_multi_subgraphs_; } 97 98 void set_costmodel_allreduce_fusion_algorithm(int64_t); costmodel_allreduce_fusion_algorithm()99 int64_t costmodel_allreduce_fusion_algorithm() const { return costmodel_allreduce_fusion_algorithm_; } 100 101 void set_costmodel_allreduce_fusion_times(int64_t); costmodel_allreduce_fusion_times()102 int64_t costmodel_allreduce_fusion_times() const { return costmodel_allreduce_fusion_times_; } 103 104 void set_costmodel_allreduce_fusion_tail_percent(double); costmodel_allreduce_fusion_tail_percent()105 double costmodel_allreduce_fusion_tail_percent() const { return costmodel_allreduce_fusion_tail_percent_; } 106 107 void set_costmodel_allreduce_fusion_tail_time(double); costmodel_allreduce_fusion_tail_time()108 double costmodel_allreduce_fusion_tail_time() const { return costmodel_allreduce_fusion_tail_time_; } 109 110 void set_costmodel_allreduce_fusion_allreduce_inherent_time(double); costmodel_allreduce_fusion_allreduce_inherent_time()111 double costmodel_allreduce_fusion_allreduce_inherent_time() const { 112 return costmodel_allreduce_fusion_allreduce_inherent_time_; 113 } 114 115 void set_costmodel_allreduce_fusion_allreduce_bandwidth(double); costmodel_allreduce_fusion_allreduce_bandwidth()116 double costmodel_allreduce_fusion_allreduce_bandwidth() const { 117 return costmodel_allreduce_fusion_allreduce_bandwidth_; 118 } 119 120 void set_costmodel_allreduce_fusion_computation_time_parameter(double); costmodel_allreduce_fusion_computation_time_parameter()121 double costmodel_allreduce_fusion_computation_time_parameter() const { 122 return costmodel_allreduce_fusion_computation_time_parameter_; 123 } 124 125 // TENSOR_SLICE_ALIGNMENT_ENABLE 126 void set_tensor_slice_alignment_enable(bool); tensor_slice_alignment_enable()127 bool tensor_slice_alignment_enable() const { return tensor_slice_alignment_enable_; } 128 129 // TENSOR_SLICE_ALIGNMENT_SIZE 130 void set_tensor_slice_alignment_size(size_t); tensor_slice_alignment_size()131 size_t tensor_slice_alignment_size() const { return tensor_slice_alignment_size_; } 132 133 // FULLY_USE_DEVICES 134 void set_fully_use_device(bool); fully_use_device()135 bool fully_use_device() const { return fully_use_device_; } 136 137 // ELEMENTWISE_OP_STRA_FOLLOW 138 void set_elementwise_stra_follow(bool); elementwise_stra_follow()139 bool elementwise_stra_follow() const { return elementwise_stra_follow_; } 140 141 void set_triangle_star_strategy_overwrite(bool); triangle_star_strategy_overwrite()142 bool triangle_star_strategy_overwrite() const { return triangle_star_strategy_overwrite_; } 143 144 void set_run_phase(int64_t); run_phase()145 int64_t run_phase() const { return run_phase_; } 146 147 void set_dp_algo_approxi_epsilon(double); dp_algo_approxi_epsilon()148 double dp_algo_approxi_epsilon() const { return dp_algo_approxi_epsilon_; } 149 150 void set_dp_algo_enable_approxi(bool); dp_algo_enable_approxi()151 bool dp_algo_enable_approxi() const { return dp_algo_enable_approxi_; } 152 153 void set_dp_algo_single_loop(bool); dp_algo_single_loop()154 bool dp_algo_single_loop() const { return dp_algo_single_loop_; } 155 156 private: 157 CostModelContext(); 158 static std::shared_ptr<CostModelContext> cm_context_inst_; 159 160 // DEVICE_MEMORY_CAPACITY 161 double device_memory_capacity_; 162 163 // COST_MODEL_ALPHA 164 double costmodel_alpha_; 165 166 // COST_MODEL_BETA 167 double costmodel_beta_; 168 169 // COST_MODEL_GAMMA 170 double costmodel_gamma_; 171 172 // COST_MODEL_SIMPLIFY_CALCULATION 173 bool costmodel_simplify_cal_; 174 175 // COST_MODEL_COMMUNI_THRESHOLD 176 double costmodel_communi_threshold_; 177 178 // COST_MODEL_COMMUNI_CONST 179 double costmodel_communi_const_; 180 181 // COST_MODEL_COMMUNI_BIAS 182 double costmodel_communi_bias_; 183 184 // MULTI_SUBGRAPHS 185 bool is_multi_subgraphs_; 186 187 // In the recovery phase of DP algorithm, when encountering triangle structure and star structure, 188 // whether overwrite the right-node strategy 189 bool triangle_star_strategy_overwrite_; 190 191 // Whether to enable APPROXIMATION in the DP algorithm. 192 bool dp_algo_enable_approxi_; 193 194 // When APPROXIMATION is enabled in the DP algorithm, the 'epsilon' value used in the APPROXIMATION. 195 double dp_algo_approxi_epsilon_; 196 197 // Whether to generate a single suite of OperatorInfo for a loop. 198 bool dp_algo_single_loop_; 199 200 int64_t run_phase_; // 0: 'training', 1: 'inference' 201 202 int64_t costmodel_allreduce_fusion_algorithm_; 203 204 int64_t costmodel_allreduce_fusion_times_; 205 206 double costmodel_allreduce_fusion_tail_percent_; 207 208 double costmodel_allreduce_fusion_tail_time_; 209 210 double costmodel_allreduce_fusion_allreduce_inherent_time_; 211 212 double costmodel_allreduce_fusion_allreduce_bandwidth_; 213 214 double costmodel_allreduce_fusion_computation_time_parameter_; 215 216 // TENSOR_SLICE_ALIGNMENT_ENABLE 217 bool tensor_slice_alignment_enable_; 218 219 // TENSOR_SLICE_ALIGNMENT_SIZE 220 size_t tensor_slice_alignment_size_; 221 222 // FULLY_USE_DEVICES 223 bool fully_use_device_; 224 225 // ELEMENTWISE_OP_STRA_FOLLOW 226 bool elementwise_stra_follow_; 227 }; 228 } // namespace parallel 229 } // namespace mindspore 230 231 #endif // MINDSPORE_CCSRC_FRONTEND_PARALLEL_COSTMODEL_CONTEXT_H_ 232