• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "tools/graph_kernel/converter/akg/utils.h"
18 
19 #include <unistd.h>
20 #include <sys/wait.h>
21 #include <sys/stat.h>
22 #include <sys/mman.h>
23 #include <iostream>
24 #include <fstream>
25 #include <sstream>
26 #include <utility>
27 #include <algorithm>
28 
29 #include "backend/common/graph_kernel/core/graph_kernel_utils.h"
30 #include "ir/anf.h"
31 #include "ir/func_graph.h"
32 #include "thread/threadpool.h"
33 #include "tools/common/tensor_util.h"
34 #include "utils/anf_utils.h"
35 #include "utils/file_utils.h"
36 #include "utils/log_adapter.h"
37 #include "utils/system/env.h"
38 #include "mindspore/ccsrc/include/common/debug/common.h"
39 
40 namespace mindspore::graphkernel {
SaveJsonInfo(const std::string & json_name,const std::string & info)41 bool SaveJsonInfo(const std::string &json_name, const std::string &info) {
42   std::string path = json_name + ".info";
43   std::ofstream filewrite(path);
44   if (!filewrite.is_open()) {
45     MS_LOG(ERROR) << "Open file '" << path << "' failed!";
46     return false;
47   }
48   filewrite << info << std::endl;
49   filewrite.close();
50   return true;
51 }
52 
SaveNodesInfo(const AnfNodePtrList & nodes,const std::string & dir,const DumpOption & option,std::map<AnfNodePtr,std::string> * node_kernel,std::set<std::string> * kernel_names)53 std::string SaveNodesInfo(const AnfNodePtrList &nodes, const std::string &dir, const DumpOption &option,
54                           std::map<AnfNodePtr, std::string> *node_kernel, std::set<std::string> *kernel_names) {
55   auto dir_path = FileUtils::CreateNotExistDirs(dir);
56   if (!dir_path.has_value()) {
57     MS_LOG(ERROR) << "Failed to CreateNotExistDirs: " << dir;
58     return "";
59   }
60   std::set<std::string> unique_kernel_name;
61   for (const auto &node : nodes) {
62     graphkernel::GraphKernelJsonGenerator graph_kernel_json_generator(option);
63     auto fg = GetCNodeFuncGraph(node);
64     MS_EXCEPTION_IF_NULL(fg);
65     auto mng = fg->manager();
66     if (mng == nullptr) {
67       mng = Manage(fg, true);
68       fg->set_manager(mng);
69     }
70     std::vector<AnfNodePtr> node_list, input_list, output_list;
71     auto cnode = dyn_cast_ptr<CNode>(node);
72     auto use_akg_cce = false;
73     if (cnode != nullptr && cnode->HasAttr("use_akg_cce")) {
74       use_akg_cce = true;
75     }
76     GkUtils::GetValidKernelNodes(fg, &node_list, &input_list, &output_list);
77     (void)graph_kernel_json_generator.CollectFusedJson(node_list, input_list, output_list, use_akg_cce);
78     auto json_kernel_name = graph_kernel_json_generator.kernel_name();
79     if (node_kernel != nullptr) {
80       (*node_kernel)[node] = json_kernel_name;
81     }
82     if (!unique_kernel_name.insert(json_kernel_name).second) {
83       continue;
84     }
85     if (!SaveJsonInfo(dir_path.value() + "/" + json_kernel_name, graph_kernel_json_generator.kernel_json_str())) {
86       return "";
87     }
88   }
89   if (kernel_names != nullptr) {
90     *kernel_names = std::move(unique_kernel_name);
91   }
92   return dir_path.value();
93 }
94 
GetCNodeDynamicInputIndex(const CNodePtr & cnode)95 std::string GetCNodeDynamicInputIndex(const CNodePtr &cnode) {
96   std::string dynamic_input_index;
97   auto cb = Callback::Instance();
98   for (size_t i = 1; i < cnode->size(); i++) {
99     if (cnode->input(i)->isa<CNode>() || cnode->input(i)->isa<Parameter>()) {
100       auto input_shape = cb->GetInputShape(cnode, i - 1);
101       if (input_shape.size() <= 0 || input_shape[0] != 1) {
102         MS_LOG(EXCEPTION) << "Dynamic inputs' batch size should be 1";
103       }
104       dynamic_input_index += std::to_string(i - 1) + ",";
105     }
106   }
107   return dynamic_input_index;
108 }
109 
GetCNodeInputShapeStr(const CNodePtr & cnode)110 std::string GetCNodeInputShapeStr(const CNodePtr &cnode) {
111   std::string input_shape_str;
112   auto cb = Callback::Instance();
113   for (size_t i = 1; i < cnode->size(); i++) {
114     auto input_shape = cb->GetInputShape(cnode, i - 1);
115     input_shape_str += std::to_string(input_shape.size()) + ",";
116     for (auto &v : input_shape) {
117       input_shape_str += std::to_string(v) + ",";
118     }
119   }
120   return input_shape_str;
121 }
122 
GetCNodeOutputShapeStr(const CNodePtr & cnode)123 std::string GetCNodeOutputShapeStr(const CNodePtr &cnode) {
124   std::string output_shape_str;
125   auto output_num = AnfUtils::GetOutputTensorNum(cnode);
126   auto cb = Callback::Instance();
127   for (size_t i = 0; i < output_num; i++) {
128     auto output_shape = cb->GetOutputShape(cnode, i);
129     output_shape_str += std::to_string(output_shape.size()) + ",";
130     for (auto &v : output_shape) {
131       output_shape_str += std::to_string(v) + ",";
132     }
133   }
134   return output_shape_str;
135 }
136 
GetCNodeOutputTypeStr(const CNodePtr & cnode)137 std::string GetCNodeOutputTypeStr(const CNodePtr &cnode) {
138   std::string output_type_str;
139   auto output_num = AnfUtils::GetOutputTensorNum(cnode);
140   auto cb = Callback::Instance();
141   for (size_t i = 0; i < output_num; i++) {
142     auto output_type = cb->GetOutputType(cnode, i);
143     output_type_str += std::to_string(static_cast<int>(output_type)) + ",";
144   }
145   return output_type_str;
146 }
147 
GetCNodeOutputFormatStr(const CNodePtr & cnode)148 std::string GetCNodeOutputFormatStr(const CNodePtr &cnode) {
149   std::string output_format_str;
150   auto output_num = AnfUtils::GetOutputTensorNum(cnode);
151   auto cb = Callback::Instance();
152   for (size_t i = 0; i < output_num; i++) {
153     auto output_format = cb->GetOutputFormat(cnode, i);
154     if (output_format == kOpFormat_NHWC) {
155       output_format_str += "1,";
156     } else {  // default, NCHW
157       output_format_str += "0,";
158     }
159   }
160   return output_format_str;
161 }
162 
CreateAkgKernelParameter(const FuncGraphPtr & func_graph,const std::string & path,const std::string & kernel_name)163 ParameterPtr CreateAkgKernelParameter(const FuncGraphPtr &func_graph, const std::string &path,
164                                       const std::string &kernel_name) {
165   MS_CHECK_TRUE_RET(func_graph != nullptr, nullptr);
166   auto param_node = func_graph->add_parameter();
167   MS_CHECK_TRUE_RET(param_node != nullptr, nullptr);
168   param_node->set_name(kernel_name);
169   if (path.empty()) {
170     return nullptr;
171   }
172   if (!Common::FileExists(path)) {
173     return nullptr;
174   }
175   auto akg_fd = open(path.c_str(), O_RDONLY);
176   struct stat sb;
177   if (akg_fd < 0) {
178     MS_LOG(ERROR) << "open " << path << " failed.";
179     return nullptr;
180   }
181   if (fstat(akg_fd, &sb) == -1) {
182     MS_LOG(ERROR) << "fstat " << path << " failed.";
183     return nullptr;
184   }
185   auto akg_mmap = mmap(NULL, sb.st_size, PROT_READ, MAP_SHARED, akg_fd, 0);
186   if (akg_mmap == nullptr) {
187     MS_LOG(ERROR) << "mmap " << path << " failed.";
188     return nullptr;
189   }
190   (void)close(akg_fd);
191   auto tensor_info = lite::CreateTensorInfo(akg_mmap, sb.st_size, {sb.st_size}, kNumberTypeUInt8);
192   if (tensor_info == nullptr) {
193     MS_LOG(ERROR) << "Create tensor info failed";
194     return nullptr;
195   }
196   (void)munmap(akg_mmap, sb.st_size);
197   auto status = lite::InitParameterFromTensorInfo(param_node, tensor_info);
198   if (status != lite::RET_OK) {
199     MS_LOG(ERROR) << "init parameter from tensor info failed";
200     return nullptr;
201   }
202   return param_node;
203 }
204 
CompileSingleJson(const std::string & json_name)205 bool CompileSingleJson(const std::string &json_name) {
206   std::string attrs = "None";
207   std::ostringstream py_cmd;
208   py_cmd << kAddMSLiteAkg;
209   py_cmd << "from akg.ms import compilewithjsonname\n";
210   py_cmd << "if not compilewithjsonname(\'" << json_name << "\', " << attrs << "):\n";
211   py_cmd << "    raise RuntimeError(\'Compile fail for json: " << json_name << "\')";
212   std::string cmd = "python -c \"" + py_cmd.str() + "\"";
213   auto ret = std::system(cmd.c_str());
214   if (!WIFEXITED(ret)) {
215     MS_LOG(ERROR) << "Python process start fail! process content is as follows:\n" << cmd;
216     return false;
217   }
218   if (WEXITSTATUS(ret) != 0) {
219     MS_LOG(ERROR) << "Failed to compile json: " << json_name;
220     return false;
221   }
222   return true;
223 }
224 
RetStatus(const int status)225 bool RetStatus(const int status) {
226   if (WIFEXITED(status)) {
227     if (WEXITSTATUS(status) == 0) {
228       MS_LOG(INFO) << "compile all pass for subprocess!";
229       return true;
230     } else {
231       MS_LOG(ERROR) << "Some jsons compile fail, please check log!";
232     }
233   } else if (WIFSIGNALED(status)) {
234     MS_LOG(ERROR) << "compile stopped by signal, maybe cost too long time!";
235   } else if (WSTOPSIG(status)) {
236     MS_LOG(ERROR) << "compile process is stopped by others!";
237   } else {
238     MS_LOG(ERROR) << "unknown error in compiling!";
239   }
240   return false;
241 }
242 
CompileJsonsInList(const std::string & dir_path,const std::vector<std::string> & json_list)243 bool CompileJsonsInList(const std::string &dir_path, const std::vector<std::string> &json_list) {
244   auto json_list_size = static_cast<int>(json_list.size());
245   auto thread_num = std::min(PROCESS_LIMIT, json_list_size);
246   if (thread_num == 0) {
247     return true;
248   }
249   auto func = [&](void *cdata, int task_id, float lhs_scale, float rhs_scale) -> int {
250     bool all_pass{true};
251     for (int j = task_id; j < json_list_size; j += PROCESS_LIMIT) {
252       auto res = CompileSingleJson(dir_path + "/" + json_list[j] + ".info");
253       if (!res) {
254         all_pass = false;
255       }
256     }
257     if (!all_pass) {
258       MS_LOG(ERROR) << "Some task failed.";
259       return lite::RET_ERROR;
260     }
261     return lite::RET_OK;
262   };
263   auto *pool = ThreadPool::CreateThreadPool(thread_num);
264   if (pool && pool->ParallelLaunch(func, nullptr, thread_num) == lite::RET_OK) {
265     return true;
266   }
267   return false;
268 }
269 
ExcludeTunedObj(const std::string & dir_path,std::set<std::string> * kernel_names,std::map<AnfNodePtr,std::string> * node_kernel)270 void ExcludeTunedObj(const std::string &dir_path, std::set<std::string> *kernel_names,
271                      std::map<AnfNodePtr, std::string> *node_kernel) {
272   auto fs = system::Env::GetFileSystem();
273   std::map<std::string, std::string> tuned_obj_map;  // < tuned_signature, best split object name >
274   for (auto &iter : *node_kernel) {
275     auto fg = GetCNodeFuncGraph(iter.first);
276     MS_EXCEPTION_IF_NULL(fg);
277     auto tuned_sign = fg->has_attr(kTunedSign) ? GetValue<std::string>(fg->get_attr(kTunedSign)) : "";
278     if (tuned_sign == iter.second) {
279       // the kernel name is the same as signature, find cache.
280       auto cache = tuned_obj_map.find(tuned_sign);
281       if (cache != tuned_obj_map.end()) {
282         iter.second = cache->second;
283       }
284       if (!fg->has_attr(kAttrNodeName)) {
285         continue;
286       }
287       auto best_split_kernel = std::string("best_split_") + GetValue<std::string>(fg->get_attr(kAttrNodeName));
288       auto best_split_file = dir_path + "/" + best_split_kernel + ".o";
289       if (!fs->FileExist(best_split_file)) {
290         continue;
291       }
292       // the cache file exists, use it.
293       tuned_obj_map[tuned_sign] = best_split_kernel;
294       iter.second = best_split_kernel;
295       (void)kernel_names->erase(tuned_sign);
296       MS_LOG(INFO) << "Reuse the object file " << best_split_file;
297     } else {
298       if (!tuned_sign.empty()) {
299         MS_LOG(INFO) << "The kernel_name of " << iter.first->fullname_with_scope() << " mismatch its signature. "
300                      << "kernel_name is " << iter.second << ", and tuned_signature is " << tuned_sign;
301       }
302     }
303   }
304 }
305 }  // namespace mindspore::graphkernel
306