1 /**
2 * Copyright 2021-2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "tools/graph_kernel/converter/akg/utils.h"
18
19 #include <unistd.h>
20 #include <sys/wait.h>
21 #include <sys/stat.h>
22 #include <sys/mman.h>
23 #include <iostream>
24 #include <fstream>
25 #include <sstream>
26 #include <utility>
27 #include <algorithm>
28
29 #include "backend/common/graph_kernel/core/graph_kernel_utils.h"
30 #include "ir/anf.h"
31 #include "ir/func_graph.h"
32 #include "thread/threadpool.h"
33 #include "tools/common/tensor_util.h"
34 #include "utils/anf_utils.h"
35 #include "utils/file_utils.h"
36 #include "utils/log_adapter.h"
37 #include "utils/system/env.h"
38 #include "mindspore/ccsrc/include/common/debug/common.h"
39
40 namespace mindspore::graphkernel {
SaveJsonInfo(const std::string & json_name,const std::string & info)41 bool SaveJsonInfo(const std::string &json_name, const std::string &info) {
42 std::string path = json_name + ".info";
43 std::ofstream filewrite(path);
44 if (!filewrite.is_open()) {
45 MS_LOG(ERROR) << "Open file '" << path << "' failed!";
46 return false;
47 }
48 filewrite << info << std::endl;
49 filewrite.close();
50 return true;
51 }
52
SaveNodesInfo(const AnfNodePtrList & nodes,const std::string & dir,const DumpOption & option,std::map<AnfNodePtr,std::string> * node_kernel,std::set<std::string> * kernel_names)53 std::string SaveNodesInfo(const AnfNodePtrList &nodes, const std::string &dir, const DumpOption &option,
54 std::map<AnfNodePtr, std::string> *node_kernel, std::set<std::string> *kernel_names) {
55 auto dir_path = FileUtils::CreateNotExistDirs(dir);
56 if (!dir_path.has_value()) {
57 MS_LOG(ERROR) << "Failed to CreateNotExistDirs: " << dir;
58 return "";
59 }
60 std::set<std::string> unique_kernel_name;
61 for (const auto &node : nodes) {
62 graphkernel::GraphKernelJsonGenerator graph_kernel_json_generator(option);
63 auto fg = GetCNodeFuncGraph(node);
64 MS_EXCEPTION_IF_NULL(fg);
65 auto mng = fg->manager();
66 if (mng == nullptr) {
67 mng = Manage(fg, true);
68 fg->set_manager(mng);
69 }
70 std::vector<AnfNodePtr> node_list, input_list, output_list;
71 auto cnode = dyn_cast_ptr<CNode>(node);
72 auto use_akg_cce = false;
73 if (cnode != nullptr && cnode->HasAttr("use_akg_cce")) {
74 use_akg_cce = true;
75 }
76 GkUtils::GetValidKernelNodes(fg, &node_list, &input_list, &output_list);
77 (void)graph_kernel_json_generator.CollectFusedJson(node_list, input_list, output_list, use_akg_cce);
78 auto json_kernel_name = graph_kernel_json_generator.kernel_name();
79 if (node_kernel != nullptr) {
80 (*node_kernel)[node] = json_kernel_name;
81 }
82 if (!unique_kernel_name.insert(json_kernel_name).second) {
83 continue;
84 }
85 if (!SaveJsonInfo(dir_path.value() + "/" + json_kernel_name, graph_kernel_json_generator.kernel_json_str())) {
86 return "";
87 }
88 }
89 if (kernel_names != nullptr) {
90 *kernel_names = std::move(unique_kernel_name);
91 }
92 return dir_path.value();
93 }
94
GetCNodeDynamicInputIndex(const CNodePtr & cnode)95 std::string GetCNodeDynamicInputIndex(const CNodePtr &cnode) {
96 std::string dynamic_input_index;
97 auto cb = Callback::Instance();
98 for (size_t i = 1; i < cnode->size(); i++) {
99 if (cnode->input(i)->isa<CNode>() || cnode->input(i)->isa<Parameter>()) {
100 auto input_shape = cb->GetInputShape(cnode, i - 1);
101 if (input_shape.size() <= 0 || input_shape[0] != 1) {
102 MS_LOG(EXCEPTION) << "Dynamic inputs' batch size should be 1";
103 }
104 dynamic_input_index += std::to_string(i - 1) + ",";
105 }
106 }
107 return dynamic_input_index;
108 }
109
GetCNodeInputShapeStr(const CNodePtr & cnode)110 std::string GetCNodeInputShapeStr(const CNodePtr &cnode) {
111 std::string input_shape_str;
112 auto cb = Callback::Instance();
113 for (size_t i = 1; i < cnode->size(); i++) {
114 auto input_shape = cb->GetInputShape(cnode, i - 1);
115 input_shape_str += std::to_string(input_shape.size()) + ",";
116 for (auto &v : input_shape) {
117 input_shape_str += std::to_string(v) + ",";
118 }
119 }
120 return input_shape_str;
121 }
122
GetCNodeOutputShapeStr(const CNodePtr & cnode)123 std::string GetCNodeOutputShapeStr(const CNodePtr &cnode) {
124 std::string output_shape_str;
125 auto output_num = AnfUtils::GetOutputTensorNum(cnode);
126 auto cb = Callback::Instance();
127 for (size_t i = 0; i < output_num; i++) {
128 auto output_shape = cb->GetOutputShape(cnode, i);
129 output_shape_str += std::to_string(output_shape.size()) + ",";
130 for (auto &v : output_shape) {
131 output_shape_str += std::to_string(v) + ",";
132 }
133 }
134 return output_shape_str;
135 }
136
GetCNodeOutputTypeStr(const CNodePtr & cnode)137 std::string GetCNodeOutputTypeStr(const CNodePtr &cnode) {
138 std::string output_type_str;
139 auto output_num = AnfUtils::GetOutputTensorNum(cnode);
140 auto cb = Callback::Instance();
141 for (size_t i = 0; i < output_num; i++) {
142 auto output_type = cb->GetOutputType(cnode, i);
143 output_type_str += std::to_string(static_cast<int>(output_type)) + ",";
144 }
145 return output_type_str;
146 }
147
GetCNodeOutputFormatStr(const CNodePtr & cnode)148 std::string GetCNodeOutputFormatStr(const CNodePtr &cnode) {
149 std::string output_format_str;
150 auto output_num = AnfUtils::GetOutputTensorNum(cnode);
151 auto cb = Callback::Instance();
152 for (size_t i = 0; i < output_num; i++) {
153 auto output_format = cb->GetOutputFormat(cnode, i);
154 if (output_format == kOpFormat_NHWC) {
155 output_format_str += "1,";
156 } else { // default, NCHW
157 output_format_str += "0,";
158 }
159 }
160 return output_format_str;
161 }
162
CreateAkgKernelParameter(const FuncGraphPtr & func_graph,const std::string & path,const std::string & kernel_name)163 ParameterPtr CreateAkgKernelParameter(const FuncGraphPtr &func_graph, const std::string &path,
164 const std::string &kernel_name) {
165 MS_CHECK_TRUE_RET(func_graph != nullptr, nullptr);
166 auto param_node = func_graph->add_parameter();
167 MS_CHECK_TRUE_RET(param_node != nullptr, nullptr);
168 param_node->set_name(kernel_name);
169 if (path.empty()) {
170 return nullptr;
171 }
172 if (!Common::FileExists(path)) {
173 return nullptr;
174 }
175 auto akg_fd = open(path.c_str(), O_RDONLY);
176 struct stat sb;
177 if (akg_fd < 0) {
178 MS_LOG(ERROR) << "open " << path << " failed.";
179 return nullptr;
180 }
181 if (fstat(akg_fd, &sb) == -1) {
182 MS_LOG(ERROR) << "fstat " << path << " failed.";
183 return nullptr;
184 }
185 auto akg_mmap = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, akg_fd, 0);
186 if (akg_mmap == nullptr) {
187 MS_LOG(ERROR) << "mmap " << path << " failed.";
188 return nullptr;
189 }
190 (void)close(akg_fd);
191 auto tensor_info = lite::CreateTensorInfo(akg_mmap, sb.st_size, {sb.st_size}, kNumberTypeUInt8);
192 if (tensor_info == nullptr) {
193 MS_LOG(ERROR) << "Create tensor info failed";
194 return nullptr;
195 }
196 (void)munmap(akg_mmap, sb.st_size);
197 auto status = lite::InitParameterFromTensorInfo(param_node, tensor_info);
198 if (status != lite::RET_OK) {
199 MS_LOG(ERROR) << "init parameter from tensor info failed";
200 return nullptr;
201 }
202 return param_node;
203 }
204
CompileSingleJson(const std::string & json_name)205 bool CompileSingleJson(const std::string &json_name) {
206 std::string attrs = "None";
207 std::ostringstream py_cmd;
208 py_cmd << kAddMSLiteAkg;
209 py_cmd << "from akg.ms import compilewithjsonname\n";
210 py_cmd << "if not compilewithjsonname(\'" << json_name << "\', " << attrs << "):\n";
211 py_cmd << " raise RuntimeError(\'Compile fail for json: " << json_name << "\')";
212 std::string cmd = "python -c \"" + py_cmd.str() + "\"";
213 auto ret = std::system(cmd.c_str());
214 if (!WIFEXITED(ret)) {
215 MS_LOG(ERROR) << "Python process start fail! process content is as follows:\n" << cmd;
216 return false;
217 }
218 if (WEXITSTATUS(ret) != 0) {
219 MS_LOG(ERROR) << "Failed to compile json: " << json_name;
220 return false;
221 }
222 return true;
223 }
224
RetStatus(const int status)225 bool RetStatus(const int status) {
226 if (WIFEXITED(status)) {
227 if (WEXITSTATUS(status) == 0) {
228 MS_LOG(INFO) << "compile all pass for subprocess!";
229 return true;
230 } else {
231 MS_LOG(ERROR) << "Some jsons compile fail, please check log!";
232 }
233 } else if (WIFSIGNALED(status)) {
234 MS_LOG(ERROR) << "compile stopped by signal, maybe cost too long time!";
235 } else if (WSTOPSIG(status)) {
236 MS_LOG(ERROR) << "compile process is stopped by others!";
237 } else {
238 MS_LOG(ERROR) << "unknown error in compiling!";
239 }
240 return false;
241 }
242
CompileJsonsInList(const std::string & dir_path,const std::vector<std::string> & json_list)243 bool CompileJsonsInList(const std::string &dir_path, const std::vector<std::string> &json_list) {
244 auto json_list_size = static_cast<int>(json_list.size());
245 auto thread_num = std::min(PROCESS_LIMIT, json_list_size);
246 if (thread_num == 0) {
247 return true;
248 }
249 auto func = [&](void *cdata, int task_id, float lhs_scale, float rhs_scale) -> int {
250 bool all_pass{true};
251 for (int j = task_id; j < json_list_size; j += PROCESS_LIMIT) {
252 auto res = CompileSingleJson(dir_path + "/" + json_list[j] + ".info");
253 if (!res) {
254 all_pass = false;
255 }
256 }
257 if (!all_pass) {
258 MS_LOG(ERROR) << "Some task failed.";
259 return lite::RET_ERROR;
260 }
261 return lite::RET_OK;
262 };
263 auto *pool = ThreadPool::CreateThreadPool(thread_num);
264 if (pool && pool->ParallelLaunch(func, nullptr, thread_num) == lite::RET_OK) {
265 return true;
266 }
267 return false;
268 }
269
ExcludeTunedObj(const std::string & dir_path,std::set<std::string> * kernel_names,std::map<AnfNodePtr,std::string> * node_kernel)270 void ExcludeTunedObj(const std::string &dir_path, std::set<std::string> *kernel_names,
271 std::map<AnfNodePtr, std::string> *node_kernel) {
272 auto fs = system::Env::GetFileSystem();
273 std::map<std::string, std::string> tuned_obj_map; // < tuned_signature, best split object name >
274 for (auto &iter : *node_kernel) {
275 auto fg = GetCNodeFuncGraph(iter.first);
276 MS_EXCEPTION_IF_NULL(fg);
277 auto tuned_sign = fg->has_attr(kTunedSign) ? GetValue<std::string>(fg->get_attr(kTunedSign)) : "";
278 if (tuned_sign == iter.second) {
279 // the kernel name is the same as signature, find cache.
280 auto cache = tuned_obj_map.find(tuned_sign);
281 if (cache != tuned_obj_map.end()) {
282 iter.second = cache->second;
283 }
284 if (!fg->has_attr(kAttrNodeName)) {
285 continue;
286 }
287 auto best_split_kernel = std::string("best_split_") + GetValue<std::string>(fg->get_attr(kAttrNodeName));
288 auto best_split_file = dir_path + "/" + best_split_kernel + ".o";
289 if (!fs->FileExist(best_split_file)) {
290 continue;
291 }
292 // the cache file exists, use it.
293 tuned_obj_map[tuned_sign] = best_split_kernel;
294 iter.second = best_split_kernel;
295 (void)kernel_names->erase(tuned_sign);
296 MS_LOG(INFO) << "Reuse the object file " << best_split_file;
297 } else {
298 if (!tuned_sign.empty()) {
299 MS_LOG(INFO) << "The kernel_name of " << iter.first->fullname_with_scope() << " mismatch its signature. "
300 << "kernel_name is " << iter.second << ", and tuned_signature is " << tuned_sign;
301 }
302 }
303 }
304 }
305 } // namespace mindspore::graphkernel
306