1 /**
2 * Copyright 2019 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_COMMON_UTILS_H_
18 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_COMMON_UTILS_H_
19
20 #include <dirent.h>
21 #include <memory>
22 #include <unordered_map>
23 #include <unordered_set>
24 #include <map>
25 #include <string>
26 #include <algorithm>
27 #include <vector>
28 #include <utility>
29 #include <nlohmann/json.hpp>
30 #include "backend/kernel_compiler/kernel.h"
31 #include "backend/kernel_compiler/oplib/opinfo.h"
32 #include "backend/kernel_compiler/kernel_build_info.h"
33
34 namespace mindspore {
35 namespace kernel {
36 constexpr auto kCceKernelMeta = "./kernel_meta/";
37 constexpr auto kGpuKernelMeta = "./cuda_meta";
38 constexpr auto kProcessorAiCore = "aicore";
39 constexpr auto kProcessorAiCpu = "aicpu";
40 constexpr auto kProcessorCuda = "cuda";
41 constexpr auto kProcessorUnknown = "unknown";
42 constexpr auto kJsonSuffix = ".json";
43 constexpr auto kInfoSuffix = ".info";
44 constexpr unsigned int AUTODIFF_COMPILE_OVERTIME = 600;
45 constexpr auto kArgDataformat = "data_format";
46
47 const std::vector<std::string> support_devices = {"aicore", "aicpu", "cuda"};
48
49 struct KernelMetaInfo {
50 uintptr_t func_stub_;
51 uint32_t block_dim_;
52 };
53 using KernelMetaPtr = std::shared_ptr<KernelMetaInfo>;
54
55 class KernelMeta {
56 public:
57 KernelMeta() = default;
58 void Initialize();
59 std::string Search(const std::string &kernel_name) const;
60 bool Insert(const std::string &kernel_name, const std::string &kernel_json);
kernel_meta_path()61 std::string kernel_meta_path() const { return kernel_meta_path_; }
initialized()62 bool initialized() const { return initialized_; }
GetInstance()63 static KernelMeta *GetInstance() {
64 static KernelMeta kernel_meta;
65 return &kernel_meta;
66 }
67 ~KernelMeta() = default;
68
69 private:
70 bool initialized_ = false;
71 std::string kernel_meta_path_;
72 std::unordered_map<std::string, std::string> kernel_meta_map_;
73 };
74
75 bool CheckCache(const std::string &kernel_name);
76 KernelPackPtr SearchCache(const std::string &kernel_name, const std::string &processor);
77 KernelPackPtr InsertCache(const std::string &kernel_name, const std::string &processor);
78 TypeId DtypeToTypeId(const std::string &dtypes);
79 std::string Dtype2ShortType(const std::string &dtypes);
80 std::string TypeId2String(TypeId type_id, bool unknown_as_default = false);
81 size_t GetDtypeNbyte(const std::string &dtypes);
82 bool GetShapeSize(const std::vector<size_t> &shape, const TypePtr &type_ptr, int64_t *size_i);
83 bool ParseMetadata(const CNodePtr &kernel_node, const std::shared_ptr<const OpInfo> &op_info_ptr, Processor processor,
84 std::vector<std::shared_ptr<KernelBuildInfo>> *const kernel_info_list);
85 void SaveJsonInfo(const std::string &json_name, const std::string &info, const std::string &base_path = kCceKernelMeta);
86 std::string GetProcessor(const AnfNodePtr &anf_node);
87 Processor GetProcessor(const string &processor);
88 bool IsSameShape(const std::vector<size_t> &shape_a, const std::vector<size_t> &shape_b);
89 int Sign(float x);
90 std::pair<AnfNodePtr, size_t> GetKernelInput(const AnfNodePtr &anf_node, size_t index);
91 std::vector<std::pair<AnfNodePtr, std::pair<size_t, size_t>>> GetInputIndex(const std::vector<AnfNodePtr> &node_list,
92 const std::vector<AnfNodePtr> &input_list);
93 std::vector<std::pair<AnfNodePtr, size_t>> GetOutputIndex(const std::vector<AnfNodePtr> &node_list,
94 const std::vector<AnfNodePtr> &input_list,
95 const std::vector<AnfNodePtr> &output_list);
96 void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *node_list);
97 void GetValidKernelNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *node_list,
98 std::vector<AnfNodePtr> *input_list, std::vector<AnfNodePtr> *output_list);
99 void GetFuncGraphOutputNodes(const FuncGraphPtr &func_graph, std::vector<AnfNodePtr> *output_list);
100 bool GetInputTensorValue(const AnfNodePtr &anf_node, size_t input_idx, nlohmann::json *const node_json);
101 void GetGraphRealOutput(const FuncGraphPtr &func_graph, std::vector<std::pair<AnfNodePtr, size_t>> *node_list);
102 bool IsWeightBoundary(const AnfNodePtr &node);
103 std::vector<int64_t> GetReduceAttrAxis(const CNodePtr &cnode);
104 std::string GetProcessorStr(const AnfNodePtr &anf_node);
105 Processor GetProcessorFromContext();
106 std::string GetStrProcessorFromContext();
107 float Scaling(size_t in_size, size_t out_size, bool align_corners);
108 float ScaleGrid(const int x, const float scale);
109 FusionType GetFusionTypeByName(const std::string &name);
110 std::string GetFusionNameByType(const kernel::FusionType &type);
111 struct CachedInterpolation {
112 size_t lower;
113 size_t upper;
114 float lerp;
115 };
116
117 void ComputeInterpolationWeights(const size_t out_size, const size_t in_size, const float scale,
118 CachedInterpolation *interpolation);
119
120 template <typename T>
Vector2Str(const std::vector<T> & inputs)121 inline std::string Vector2Str(const std::vector<T> &inputs) {
122 if (!inputs.empty()) {
123 std::ostringstream oss;
124 (void)std::copy(inputs.begin(), inputs.end() - 1, std::ostream_iterator<T>(oss, ", "));
125 oss << inputs.back();
126 return oss.str();
127 }
128 return "";
129 }
130
131 template <typename T>
ComputeLerp(T top_left,T top_right,T bottom_left,T bottom_right,T x_lerp,T y_lerp)132 inline T ComputeLerp(T top_left, T top_right, T bottom_left, T bottom_right, T x_lerp, T y_lerp) {
133 T top = top_left + (top_right - top_left) * x_lerp;
134 T bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
135 return top + (bottom - top) * y_lerp;
136 }
137
138 void CastShapeSizeToLong(const std::vector<size_t> &shape, std::vector<int64_t> *long_shape);
139 void CheckSliceValid(const std::vector<int64_t> &start, const std::vector<int64_t> &stop,
140 const std::vector<int64_t> &step, const std::vector<int64_t> &input_shape);
141 size_t CalOffset(const std::vector<int64_t> &start, const std::vector<int64_t> &stop,
142 const std::vector<int64_t> &dim_offset);
143 std::vector<int64_t> CalDimOffset(const std::vector<int64_t> &input_shape);
144 size_t GetCopySize(const std::vector<int64_t> &dim_offset, const std::vector<int64_t> &start,
145 const std::vector<int64_t> &stop);
146 size_t UnitSizeInBytes(const mindspore::TypeId &t);
147
148 #define CHECK_KERNEL_INPUTS_NUM(actual_inputs_num, expect_inputs_num, kernel_name) \
149 do { \
150 if ((actual_inputs_num) != (expect_inputs_num)) { \
151 MS_LOG(EXCEPTION) << (kernel_name) << " requires " << (expect_inputs_num) << " inputs, but got " \
152 << (actual_inputs_num) << "."; \
153 } \
154 } while (0)
155
156 #define CHECK_KERNEL_OUTPUTS_NUM(actual_outputs_num, expect_outputs_num, kernel_name) \
157 do { \
158 if ((actual_outputs_num) != (expect_outputs_num)) { \
159 MS_LOG(EXCEPTION) << (kernel_name) << " should have " << (expect_outputs_num) << " outputs, but got " \
160 << (actual_outputs_num) << "."; \
161 } \
162 } while (0)
163
164 #define CHECK_KERNEL_WORKSPACE_SIZE(actual_size, expect_size, kernel_name) \
165 do { \
166 if ((actual_size) != (expect_size)) { \
167 MS_LOG(EXCEPTION) << (kernel_name) << " requires " << (expect_size) << " workspace, but got " << (actual_size) \
168 << "."; \
169 } \
170 } while (0)
171 } // namespace kernel
172 } // namespace mindspore
173
174 #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_COMMON_UTILS_H_
175