• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_COMMON_GRAPH_KERNEL_GRAPH_KERNEL_FLAGS_H
18 #define MINDSPORE_CCSRC_COMMON_GRAPH_KERNEL_GRAPH_KERNEL_FLAGS_H
19 
20 #include <map>
21 #include <memory>
22 #include <string>
23 #include <vector>
24 #include <utility>
25 #include "include/backend/visible.h"
26 
27 namespace mindspore::graphkernel {
28 constexpr unsigned int OptLevel_0 = 0;  // Disabled
29 constexpr unsigned int OptLevel_1 = 1;  // Basic functions
30 constexpr unsigned int OptLevel_2 = 2;  // Default functions
31 constexpr unsigned int OptLevel_3 = 3;  // Experimental functions
32 constexpr unsigned int OptLevel_MAX = 4;
33 
34 constexpr unsigned int OpLevel_0 = 0;
35 constexpr unsigned int OpLevel_1 = 1;
36 constexpr unsigned int OpLevel_2 = 2;
37 constexpr unsigned int OpLevel_MAX = 3;
38 constexpr unsigned int default_cpu_refer_tread_num = 8;
39 
40 class BACKEND_EXPORT GraphKernelFlags {
41  public:
42   static const GraphKernelFlags &GetInstance();
43   static void SaveJitConfig(const std::map<std::string, std::string> &jit_config);
44 
45   // Dump all flags to json-format string
46   std::string DumpAllFlags() const;
47 
48 #if defined(ENABLE_AKG) || defined(MSLITE_ENABLE_GRAPH_KERNEL)
49   // Check whether graph_kernel is enabled
IsEnableGraphKernel()50   bool IsEnableGraphKernel() const { return opt_level > OptLevel_0; }
51 #else
IsEnableGraphKernel()52   bool IsEnableGraphKernel() const { return false; }
53 #endif
54 
55   bool IsEnableKernelPacket() const;
56 
57   // Check whether GraphKernel supports current situation.
58   void CheckSupport() const;
59 
60   GraphKernelFlags(const GraphKernelFlags &flags) = delete;
61   GraphKernelFlags(GraphKernelFlags &&flags) = delete;
62   GraphKernelFlags &operator=(const GraphKernelFlags &flags) = delete;
63   GraphKernelFlags &operator=(GraphKernelFlags &&flags) = delete;
64   ~GraphKernelFlags() = default;
65 
66   /**
67    * Dump info as human-readable text.
68    * A directory "graph_kernel_dump" will be created, and all information will be dumped in this directory.
69    */
70   bool dump_as_text{false};
71 
72   /**
73    * Enable stitch fusion in graph kernel fusion strategy.
74    *
75    * Experimental feature, enabled by default when opt_level=3
76    */
77   bool enable_stitch_fusion{false};
78 
79   /**
80    * Enable recompute fusion in graph kernel fusion strategy, enabled when op_level>=2.
81    */
82   bool enable_recompute_fusion{false};
83 
84   /**
85    * Enable parallel fusion in graph kernel fusion strategy.
86    *
87    * Experimental feature, enabled by default when opt_level=3
88    */
89   bool enable_parallel_fusion{false};
90 
91   /**
92    * Parallel AKG's operators by level.
93    * 0: Parallel operators by local data relation analyzation with less memory influence.
94    * 1: Parallel operators with global analyzation with more memory influence.
95    */
96   unsigned int parallel_ops_level{OpLevel_0};
97 
98   /**
99    * Enable parallel op combination, default is false.
100    */
101   bool enable_parallel_op_combine{false};
102 
103   /**
104    * Enable horizontal fusion in graph kernel fusion strategy, default is false.
105    */
106   bool enable_horizontal_fusion{false};
107 
108   /**
109    * Enable auto tensor inplace in graph kernel, default is false.
110    */
111   bool enable_auto_tensor_inplace{false};
112 
113   /**
114    * Enable dynamic batch size for akg kernels, default is false.
115    */
116   bool enable_dynamic_batch{false};
117 
118   /**
119    * Enable low precision in data transferring between graph kernel and computing in graph kernel
120    * in graph kernel.
121    * Experimental feature, enabled by the enable_low_precision flag
122    */
123   bool enable_low_precision{false};
124 
125   /**
126    * Debug mode for graph kernel.
127    * Enable Debug mode for graph kernel
128    */
129   bool enable_debug_mode{false};
130 
131   /**
132    * Enable conv tuning on mindspore lite.
133    */
134   bool enable_lite_conv_tuning{false};
135 
136   /**
137    * Enable vectorization on akg.
138    */
139   bool enable_vectorization{true};
140 
141   /**
142    * Expand and cluster AKG's operators by level.
143    */
144   unsigned int fusion_ops_level{OpLevel_0};
145 
146   /**
147    * Enable recompute fusion for CSR operations.
148    */
149   bool enable_csr_fusion{false};
150 
151   /**
152    * Enable fusion for operators with dynamic shape inputs/outputs.
153    */
154   bool enable_dynamic_shape_fusion{false};
155 
156   /**
157    * Optimization level, value from 0 to 3.
158    * 0: Disable GraphKernel
159    * 1: Enable GraphKernel with basic features only.
160    * 2: Enable GraphKernel with all stable features.
161    * 3: Enable GraphKernel with all experimental features.
162    * The default value is OptLevel_2 when the context "enable_graph_kernel" is set,
163    * but if it's also changed in "graph_kernel_flags", then the "graph_kernel_flags" will prevail.
164    */
165   unsigned int opt_level{0};  // defaults 0 or 2
166 
167   /**
168    * Maximum number of dom ops to fuse with reduce. Valid value should be non-negative.
169    * If set negative, default value(20 on GPU/CPU, 10 on Ascend) will be used.
170    */
171   int reduce_fuse_depth{-1};
172 
173   /**
174    * Online tuning level, value from 0 to 3.
175    * 0: Disable online tuning
176    * 1-3: The higher level, the larger tuning space, and the more time it takes.
177    */
178   unsigned int online_tuning{0};
179 
180   /**
181    * Cpu refer thread num for conv and graph split tuning, default is 8.
182    */
183   unsigned int cpu_refer_thread_num{default_cpu_refer_tread_num};
184 
185   /**
186    * Threshold for detection of recopute's memory increment case, unit is byte.
187    */
188   int64_t recompute_increment_threshold{0};
189 
190   /**
191    * Threshold for detection of recopute's memory peak case, unit is byte.
192    */
193   int64_t recompute_peak_threshold{0};
194 
195   /**
196    * Threshold for composite ops number.
197    */
198   int64_t composite_op_limit_size{200};
199 
200   /**
201    * AKG's operator repository file path.
202    */
203   std::string repository_path;
204 
205   /**
206    * Target info.
207    * These flags can be used for cross-compiling. Available when the device target is cpu.
208    * target_os: the operating system to run kernels.
209    * cpu_arch: the architecture, default value is related to the building environment (e.g. "arm" or "x86_64")
210    * cpu_feature: the instruction set to be used. (e.g. "avx" or "avx512")
211    * cpu_type: the cpu processor type. (e.g. "core-avx2" or "skylake-avx512")
212    */
213   std::string target_os{"linux"};
214   std::string cpu_arch;
215   std::string cpu_feature;
216   std::string cpu_type;
217 
218   /**
219    * Kernel Generator.
220    * The generator used to compile kernels, AKG or MLIR or DVM.
221    */
222   std::string kernel_generator{"AKG"};
223 
224   /**
225    * Additional expanding operators (case sensitive).
226    * The operators to be added into the default expanding operator list.
227    */
228   std::vector<std::string> enable_expand_ops;
229 
230   /**
231    * Expanding operators to be enabled (case sensitive).
232    * Unlike the "enable_expand_ops", the default list will be overwritten by this list.
233    * Note that the "enable_expand_ops" and "disable_expand_ops" will be ignored if this flag is set.
234    */
235   std::vector<std::string> enable_expand_ops_only;
236 
237   /**
238    * Expanding operators to be disabled (case sensitive).
239    * The behavior is undefined when this list overlaps with "enable_expand_ops".
240    */
241   std::vector<std::string> disable_expand_ops;
242 
243   /**
244    * Additional clustering operators (case sensitive).
245    * The operators to be added into the default clustering operator list.
246    */
247   std::vector<std::string> enable_cluster_ops;
248 
249   /**
250    * Clustering operators to be enabled (case sensitive).
251    * Unlike the "enable_cluster_ops", the default list will be overwritten by this list.
252    * Note that the "enable_cluster_ops" and "disable_cluster_ops" will be ignored if this flag is set.
253    */
254   std::vector<std::string> enable_cluster_ops_only;
255 
256   /**
257    * Clustering operators to be disabled (case sensitive).
258    * The behavior is undefined when this list overlaps with "enable_cluster_ops".
259    */
260   std::vector<std::string> disable_cluster_ops;
261 
262   /**
263    * Arithmetic simplify expressions to be enabled (case sensitive).
264    * The default list will be overwritten by this list.
265    * Note that "disable_simplify_exprs" will be ignored if this flag is set.
266    */
267   std::vector<std::string> enable_simplify_exprs_only;
268 
269   /**
270    * Arithmetic simplify expressions to be disabled (case sensitive).
271    */
272   std::vector<std::string> disable_simplify_exprs;
273 
274   /**
275    * Passes to be enabled.
276    * By default, the passes is controlled by "opt_level" and target device,
277    * user can manually enable some passes by setting this flag.
278    * The format is "stage_id.pass_id" or "stage_name.pass_name", which corresponds to the ir filename.
279    */
280   std::vector<std::string> enable_pass;
281 
282   /**
283    * Passes to be disabled.
284    * By default, the passes is controlled by "opt_level" and target device,
285    * user can manually disable some passes by setting this flag.
286    * The format is "stage_id.pass_id" or "stage_name.pass_name", which corresponds to the ir filename.
287    */
288   std::vector<std::string> disable_pass;
289 
290   /**
291    * Cluster ops to run akg cce lib.
292    */
293   bool enable_cce_lib{false};
294   std::vector<std::string> enable_cce_lib_ops;
295   std::vector<std::string> enable_cce_lib_ops_only;
296   std::vector<std::string> disable_cce_lib_ops;
297 
298   /**
299    * The real kernel to be clustered by kernelpacket (case sensitive).
300    */
301   std::vector<std::string> enable_packet_ops_only;
302   /**
303    * The real kernel to be disabled by kernelpacket (case sensitive).
304    */
305   std::vector<std::string> disable_packet_ops;
306 
307  private:
GraphKernelFlags(const std::string & graph_kernel_flags,bool enable_graph_kernel)308   GraphKernelFlags(const std::string &graph_kernel_flags, bool enable_graph_kernel)
309       : flags_cache_(graph_kernel_flags), enable_graph_kernel_(enable_graph_kernel) {}
310 
311   // get the `graph_kernel_flags` and `enable_graph_kernel`
312   static std::pair<std::string, bool> GetGraphKernelConfig();
GetJitConfig()313   static std::map<std::string, std::string> &GetJitConfig() {
314     static std::map<std::string, std::string> jit_configs{};
315     return jit_configs;
316   }
317 
318   // parse and refresh the flags
319   void Refresh();
320   // register the flags defined above
321   void RegisterFlags(std::map<std::string, std::string> *flag_map);
322 
323   // cache the flag string to check whether the flags is changed.
324   std::string flags_cache_;
325   // cache the enable_graph_kernel value to check whether the context is changed.
326   bool enable_graph_kernel_;
327 };
328 }  // namespace mindspore::graphkernel
329 #endif  // MINDSPORE_CCSRC_COMMON_GRAPH_KERNEL_GRAPH_KERNEL_FLAGS_H
330