1 /** 2 * Copyright 2021-2023 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_COMMON_GRAPH_KERNEL_GRAPH_KERNEL_FLAGS_H 18 #define MINDSPORE_CCSRC_COMMON_GRAPH_KERNEL_GRAPH_KERNEL_FLAGS_H 19 20 #include <map> 21 #include <memory> 22 #include <string> 23 #include <vector> 24 #include <utility> 25 #include "include/backend/visible.h" 26 27 namespace mindspore::graphkernel { 28 constexpr unsigned int OptLevel_0 = 0; // Disabled 29 constexpr unsigned int OptLevel_1 = 1; // Basic functions 30 constexpr unsigned int OptLevel_2 = 2; // Default functions 31 constexpr unsigned int OptLevel_3 = 3; // Experimental functions 32 constexpr unsigned int OptLevel_MAX = 4; 33 34 constexpr unsigned int OpLevel_0 = 0; 35 constexpr unsigned int OpLevel_1 = 1; 36 constexpr unsigned int OpLevel_2 = 2; 37 constexpr unsigned int OpLevel_MAX = 3; 38 constexpr unsigned int default_cpu_refer_tread_num = 8; 39 40 class BACKEND_EXPORT GraphKernelFlags { 41 public: 42 static const GraphKernelFlags &GetInstance(); 43 static void SaveJitConfig(const std::map<std::string, std::string> &jit_config); 44 45 // Dump all flags to json-format string 46 std::string DumpAllFlags() const; 47 48 #if defined(ENABLE_AKG) || defined(MSLITE_ENABLE_GRAPH_KERNEL) 49 // Check whether graph_kernel is enabled IsEnableGraphKernel()50 bool IsEnableGraphKernel() const { return opt_level > OptLevel_0; } 51 #else IsEnableGraphKernel()52 bool IsEnableGraphKernel() const { return false; } 53 #endif 54 55 bool IsEnableKernelPacket() const; 56 57 // Check whether GraphKernel supports current situation. 58 void CheckSupport() const; 59 60 GraphKernelFlags(const GraphKernelFlags &flags) = delete; 61 GraphKernelFlags(GraphKernelFlags &&flags) = delete; 62 GraphKernelFlags &operator=(const GraphKernelFlags &flags) = delete; 63 GraphKernelFlags &operator=(GraphKernelFlags &&flags) = delete; 64 ~GraphKernelFlags() = default; 65 66 /** 67 * Dump info as human-readable text. 68 * A directory "graph_kernel_dump" will be created, and all information will be dumped in this directory. 69 */ 70 bool dump_as_text{false}; 71 72 /** 73 * Enable stitch fusion in graph kernel fusion strategy. 74 * 75 * Experimental feature, enabled by default when opt_level=3 76 */ 77 bool enable_stitch_fusion{false}; 78 79 /** 80 * Enable recompute fusion in graph kernel fusion strategy, enabled when op_level>=2. 81 */ 82 bool enable_recompute_fusion{false}; 83 84 /** 85 * Enable parallel fusion in graph kernel fusion strategy. 86 * 87 * Experimental feature, enabled by default when opt_level=3 88 */ 89 bool enable_parallel_fusion{false}; 90 91 /** 92 * Parallel AKG's operators by level. 93 * 0: Parallel operators by local data relation analyzation with less memory influence. 94 * 1: Parallel operators with global analyzation with more memory influence. 95 */ 96 unsigned int parallel_ops_level{OpLevel_0}; 97 98 /** 99 * Enable parallel op combination, default is false. 100 */ 101 bool enable_parallel_op_combine{false}; 102 103 /** 104 * Enable horizontal fusion in graph kernel fusion strategy, default is false. 105 */ 106 bool enable_horizontal_fusion{false}; 107 108 /** 109 * Enable auto tensor inplace in graph kernel, default is false. 110 */ 111 bool enable_auto_tensor_inplace{false}; 112 113 /** 114 * Enable dynamic batch size for akg kernels, default is false. 115 */ 116 bool enable_dynamic_batch{false}; 117 118 /** 119 * Enable low precision in data transferring between graph kernel and computing in graph kernel 120 * in graph kernel. 121 * Experimental feature, enabled by the enable_low_precision flag 122 */ 123 bool enable_low_precision{false}; 124 125 /** 126 * Debug mode for graph kernel. 127 * Enable Debug mode for graph kernel 128 */ 129 bool enable_debug_mode{false}; 130 131 /** 132 * Enable conv tuning on mindspore lite. 133 */ 134 bool enable_lite_conv_tuning{false}; 135 136 /** 137 * Enable vectorization on akg. 138 */ 139 bool enable_vectorization{true}; 140 141 /** 142 * Expand and cluster AKG's operators by level. 143 */ 144 unsigned int fusion_ops_level{OpLevel_0}; 145 146 /** 147 * Enable recompute fusion for CSR operations. 148 */ 149 bool enable_csr_fusion{false}; 150 151 /** 152 * Enable fusion for operators with dynamic shape inputs/outputs. 153 */ 154 bool enable_dynamic_shape_fusion{false}; 155 156 /** 157 * Optimization level, value from 0 to 3. 158 * 0: Disable GraphKernel 159 * 1: Enable GraphKernel with basic features only. 160 * 2: Enable GraphKernel with all stable features. 161 * 3: Enable GraphKernel with all experimental features. 162 * The default value is OptLevel_2 when the context "enable_graph_kernel" is set, 163 * but if it's also changed in "graph_kernel_flags", then the "graph_kernel_flags" will prevail. 164 */ 165 unsigned int opt_level{0}; // defaults 0 or 2 166 167 /** 168 * Maximum number of dom ops to fuse with reduce. Valid value should be non-negative. 169 * If set negative, default value(20 on GPU/CPU, 10 on Ascend) will be used. 170 */ 171 int reduce_fuse_depth{-1}; 172 173 /** 174 * Online tuning level, value from 0 to 3. 175 * 0: Disable online tuning 176 * 1-3: The higher level, the larger tuning space, and the more time it takes. 177 */ 178 unsigned int online_tuning{0}; 179 180 /** 181 * Cpu refer thread num for conv and graph split tuning, default is 8. 182 */ 183 unsigned int cpu_refer_thread_num{default_cpu_refer_tread_num}; 184 185 /** 186 * Threshold for detection of recopute's memory increment case, unit is byte. 187 */ 188 int64_t recompute_increment_threshold{0}; 189 190 /** 191 * Threshold for detection of recopute's memory peak case, unit is byte. 192 */ 193 int64_t recompute_peak_threshold{0}; 194 195 /** 196 * Threshold for composite ops number. 197 */ 198 int64_t composite_op_limit_size{200}; 199 200 /** 201 * AKG's operator repository file path. 202 */ 203 std::string repository_path; 204 205 /** 206 * Target info. 207 * These flags can be used for cross-compiling. Available when the device target is cpu. 208 * target_os: the operating system to run kernels. 209 * cpu_arch: the architecture, default value is related to the building environment (e.g. "arm" or "x86_64") 210 * cpu_feature: the instruction set to be used. (e.g. "avx" or "avx512") 211 * cpu_type: the cpu processor type. (e.g. "core-avx2" or "skylake-avx512") 212 */ 213 std::string target_os{"linux"}; 214 std::string cpu_arch; 215 std::string cpu_feature; 216 std::string cpu_type; 217 218 /** 219 * Kernel Generator. 220 * The generator used to compile kernels, AKG or MLIR or DVM. 221 */ 222 std::string kernel_generator{"AKG"}; 223 224 /** 225 * Additional expanding operators (case sensitive). 226 * The operators to be added into the default expanding operator list. 227 */ 228 std::vector<std::string> enable_expand_ops; 229 230 /** 231 * Expanding operators to be enabled (case sensitive). 232 * Unlike the "enable_expand_ops", the default list will be overwritten by this list. 233 * Note that the "enable_expand_ops" and "disable_expand_ops" will be ignored if this flag is set. 234 */ 235 std::vector<std::string> enable_expand_ops_only; 236 237 /** 238 * Expanding operators to be disabled (case sensitive). 239 * The behavior is undefined when this list overlaps with "enable_expand_ops". 240 */ 241 std::vector<std::string> disable_expand_ops; 242 243 /** 244 * Additional clustering operators (case sensitive). 245 * The operators to be added into the default clustering operator list. 246 */ 247 std::vector<std::string> enable_cluster_ops; 248 249 /** 250 * Clustering operators to be enabled (case sensitive). 251 * Unlike the "enable_cluster_ops", the default list will be overwritten by this list. 252 * Note that the "enable_cluster_ops" and "disable_cluster_ops" will be ignored if this flag is set. 253 */ 254 std::vector<std::string> enable_cluster_ops_only; 255 256 /** 257 * Clustering operators to be disabled (case sensitive). 258 * The behavior is undefined when this list overlaps with "enable_cluster_ops". 259 */ 260 std::vector<std::string> disable_cluster_ops; 261 262 /** 263 * Arithmetic simplify expressions to be enabled (case sensitive). 264 * The default list will be overwritten by this list. 265 * Note that "disable_simplify_exprs" will be ignored if this flag is set. 266 */ 267 std::vector<std::string> enable_simplify_exprs_only; 268 269 /** 270 * Arithmetic simplify expressions to be disabled (case sensitive). 271 */ 272 std::vector<std::string> disable_simplify_exprs; 273 274 /** 275 * Passes to be enabled. 276 * By default, the passes is controlled by "opt_level" and target device, 277 * user can manually enable some passes by setting this flag. 278 * The format is "stage_id.pass_id" or "stage_name.pass_name", which corresponds to the ir filename. 279 */ 280 std::vector<std::string> enable_pass; 281 282 /** 283 * Passes to be disabled. 284 * By default, the passes is controlled by "opt_level" and target device, 285 * user can manually disable some passes by setting this flag. 286 * The format is "stage_id.pass_id" or "stage_name.pass_name", which corresponds to the ir filename. 287 */ 288 std::vector<std::string> disable_pass; 289 290 /** 291 * Cluster ops to run akg cce lib. 292 */ 293 bool enable_cce_lib{false}; 294 std::vector<std::string> enable_cce_lib_ops; 295 std::vector<std::string> enable_cce_lib_ops_only; 296 std::vector<std::string> disable_cce_lib_ops; 297 298 /** 299 * The real kernel to be clustered by kernelpacket (case sensitive). 300 */ 301 std::vector<std::string> enable_packet_ops_only; 302 /** 303 * The real kernel to be disabled by kernelpacket (case sensitive). 304 */ 305 std::vector<std::string> disable_packet_ops; 306 307 private: GraphKernelFlags(const std::string & graph_kernel_flags,bool enable_graph_kernel)308 GraphKernelFlags(const std::string &graph_kernel_flags, bool enable_graph_kernel) 309 : flags_cache_(graph_kernel_flags), enable_graph_kernel_(enable_graph_kernel) {} 310 311 // get the `graph_kernel_flags` and `enable_graph_kernel` 312 static std::pair<std::string, bool> GetGraphKernelConfig(); GetJitConfig()313 static std::map<std::string, std::string> &GetJitConfig() { 314 static std::map<std::string, std::string> jit_configs{}; 315 return jit_configs; 316 } 317 318 // parse and refresh the flags 319 void Refresh(); 320 // register the flags defined above 321 void RegisterFlags(std::map<std::string, std::string> *flag_map); 322 323 // cache the flag string to check whether the flags is changed. 324 std::string flags_cache_; 325 // cache the enable_graph_kernel value to check whether the context is changed. 326 bool enable_graph_kernel_; 327 }; 328 } // namespace mindspore::graphkernel 329 #endif // MINDSPORE_CCSRC_COMMON_GRAPH_KERNEL_GRAPH_KERNEL_FLAGS_H 330