• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "plugin/device/gpu/kernel/dynamic_akg/dynamic_akg_gpu_kernel_mod.h"
18 #include <fstream>
19 #include <numeric>
20 #include <functional>
21 #include <chrono>
22 #include <cmath>
23 #include <algorithm>
24 #include <map>
25 #include "nlohmann/json.hpp"
26 #include "utils/ms_utils.h"
27 #include "kernel/framework_utils.h"
28 #include "mindspore/ccsrc/include/common/debug/common.h"
29 #include "plugin/device/gpu/hal/device/gpu_common.h"
30 
31 namespace mindspore {
32 namespace kernel {
33 using std::fstream;
34 using std::string;
35 using std::unordered_map;
36 using std::vector;
37 
38 constexpr auto kStaticTileImpl = "StaticTileImpl";
39 constexpr auto kSupportInfo = "SupportInfo";
40 
41 constexpr int64_t kRemove = -100000;
42 constexpr int64_t kKeep = -99999;
43 
44 DynamicAkgGpuKernelManagerPtr DynamicAkgGpuKernelMod::kernel_manager_ = std::make_shared<DynamicAkgGpuKernelManager>();
DynamicAkgGpuKernelManager()45 DynamicAkgGpuKernelManager::DynamicAkgGpuKernelManager() {}
46 
GetCUResult(const char * kernel_content,bool force_reload,vector<uint32_t> * thread_info,CUfunction * func,const string kernel_name)47 CUresult DynamicAkgGpuKernelManager::GetCUResult(const char *kernel_content, bool force_reload,
48                                                  vector<uint32_t> *thread_info, CUfunction *func,
49                                                  const string kernel_name) {
50   string fn = kernel_name;
51   CUmodule module;
52   CUjit_option options[] = {};
53   void *optionValues[] = {};
54   CUresult result = cuModuleLoadDataEx(&module, kernel_content, 0, options, optionValues);
55   if (result != CUDA_SUCCESS) {
56     const char *msg = nullptr;
57     cuGetErrorName(result, &msg);
58     MS_LOG(ERROR) << "cuModuleLoadDataEx failed. Kernel name: << " << fn << ". Error message: " << msg;
59     return result;
60   }
61   result = cuModuleGetFunction(func, module, fn.c_str());
62   if (result != CUDA_SUCCESS) {
63     const char *msg = nullptr;
64     cuGetErrorName(result, &msg);
65     MS_LOG(ERROR) << "cuModuleGetFunction failed. Kernel name: << " << fn << ". Error message: " << msg;
66     return result;
67   }
68   infotable_[fn] = std::make_shared<GpuKernelMeta>(*func, module, *thread_info);
69   return result;
70 }
71 
GetFunction(const KernelPackPtr & kernel_pack,bool force_reload,vector<uint32_t> * thread_info,CUfunction * func,const string kernel_name)72 CUresult DynamicAkgGpuKernelManager::GetFunction(const KernelPackPtr &kernel_pack, bool force_reload,
73                                                  vector<uint32_t> *thread_info, CUfunction *func,
74                                                  const string kernel_name) {
75   if (kernel_pack->GetJson() == nullptr || kernel_pack->GetJson()->contents == nullptr ||
76       kernel_pack->GetKernel() == nullptr || kernel_pack->GetKernel()->contents == nullptr) {
77     MS_LOG(ERROR) << "Invalid kernel pack, json or kernel is nullptr of kernel : " << kernel_name << ".\n";
78     return CUDA_ERROR_INVALID_IMAGE;
79   }
80   return GetCUResult(&kernel_pack->GetKernel()->contents[0], force_reload, thread_info, func, kernel_name);
81 }
82 
UpdateShapeList(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & outputs)83 void DynamicAkgGpuKernelMod::UpdateShapeList(const std::vector<KernelTensor *> &inputs,
84                                              const std::vector<KernelTensor *> &outputs) {
85   shape_list_.clear();
86   for (size_t i = 0; i < inputs.size(); i++) {
87     auto in_shape = inputs[i]->GetShapeVector();
88     (void)shape_list_.emplace_back(in_shape);
89   }
90   for (size_t i = 0; i < outputs.size(); i++) {
91     auto out_shape = outputs[i]->GetShapeVector();
92     (void)shape_list_.emplace_back(out_shape);
93   }
94   for (auto it : kernel_map_) {
95     it.second->shape_list_ = shape_list_;
96   }
97   MS_LOG(INFO) << "Done UpdateShapeList for " << kernel_name_ << " shape_list_ = " << shape_list_;
98 }
99 
UpdateStaticShapeMappingInfo()100 void DynamicAkgGpuKernelMod::UpdateStaticShapeMappingInfo() {
101   thread_info_.clear();
102   thread_info_.emplace_back(parsed_js_[kBlockIdxX]);
103   thread_info_.emplace_back(parsed_js_[kBlockIdxY]);
104   thread_info_.emplace_back(parsed_js_[kBlockIdxZ]);
105   thread_info_.emplace_back(parsed_js_[kThreadIdxX]);
106   thread_info_.emplace_back(parsed_js_[kThreadIdxY]);
107   thread_info_.emplace_back(parsed_js_[kThreadIdxZ]);
108   MS_LOG(INFO) << "Done UpdateStaticShapeMappingInfo for " << kernel_name_;
109 }
110 
DynamicAkgGpuKernelMod(const KernelPackPtr & kernel_pack)111 DynamicAkgGpuKernelMod::DynamicAkgGpuKernelMod(const KernelPackPtr &kernel_pack) : kernel_pack_(kernel_pack) {
112   if (kernel_pack != nullptr) {
113     auto js = kernel_pack->GetJson();
114     if (js != nullptr) {
115       parsed_js_ = nlohmann::json::parse(js->contents, js->contents + js->len);
116       kernel_name_ = parsed_js_["kernelName"];
117     }
118   }
119 }
120 
CheckJsonParsed()121 void DynamicAkgGpuKernelMod::CheckJsonParsed() {
122   if (parsed_js_ != nullptr) {
123     return;
124   }
125   if (kernel_pack_ == nullptr) {
126     MS_EXCEPTION(RuntimeError) << "Invalid kernel pack for kernel: " << kernel_name_ << ".";
127   }
128   auto js = kernel_pack_->GetJson();
129   if (js == nullptr) {
130     MS_EXCEPTION(RuntimeError) << "Invalid kernel pack, json is nullptr for kernel: " << kernel_name_ << ".";
131   }
132   parsed_js_ = nlohmann::json::parse(js->contents, js->contents + js->len);
133 }
134 
Initialize()135 void DynamicAkgGpuKernelMod::Initialize() {
136   CheckJsonParsed();
137   if (is_dynamic_) {
138     InitAkgKernelImpls();
139   } else {
140     UpdateStaticShapeMappingInfo();
141     const size_t thread_info_num = 6;
142     if (thread_info_.size() != thread_info_num ||
143         (std::any_of(thread_info_.begin(), thread_info_.end(), [](uint32_t t) { return t <= 0; }))) {
144       MS_EXCEPTION(ValueError) << "For " << kernel_name_
145                                << ", gpu mapping config must be updated to 6 positive numbers before "
146                                << "launch, but got thread_info = " << thread_info_;
147     }
148   }
149 }
150 
InitAkgKernelImpls()151 void DynamicAkgGpuKernelMod::InitAkgKernelImpls() {
152   kernel_map_[kernel_name_] = std::make_shared<DynamicTileImpl>(kernel_name_, parsed_js_);
153   if (parsed_js_.find(kStaticTileImpl) != parsed_js_.end()) {
154     auto static_kernel_json = parsed_js_.at(kStaticTileImpl);
155     auto static_kernel_name = static_kernel_json["kernelName"];
156     kernel_map_[static_kernel_name] = std::make_shared<StaticTileImpl>(static_kernel_name, static_kernel_json);
157   }
158   for (auto it : kernel_map_) {
159     it.second->InitJsonShapeInformation();
160     it.second->InitJsonMappingInformation();
161     if (it.second->parsed_js_[kSupportInfo]["OperatorType"] == "Reduce") {
162       it.second->preprocessDynamicReduceTiling();
163     }
164   }
165   MS_LOG(INFO) << "InitAkgKernelImpls " << kernel_map_.size();
166 }
167 
SelectKernelImpl()168 AkgKernelImplInfoPtr DynamicAkgGpuKernelMod::SelectKernelImpl() {
169   if (kernel_map_.find(kernel_name_) == kernel_map_.end()) {
170     MS_EXCEPTION(RuntimeError) << "No default kernel for " << kernel_name_;
171   }
172   auto default_kernel = kernel_map_[kernel_name_];
173   AkgKernelImplInfoPtr static_kernel = nullptr;
174   for (auto it : kernel_map_) {
175     if (it.second == default_kernel) {
176       continue;
177     }
178     static_kernel = it.second;
179   }
180   if (static_kernel == nullptr) {
181     MS_LOG(DEBUG) << "For " << kernel_name_ << ", only have default kernel, return";
182     return default_kernel;
183   }
184   if (default_kernel->runtime_vars_.empty()) {
185     MS_LOG(DEBUG) << "For " << kernel_name_ << ", default kernel is static tile, return";
186     return default_kernel;
187   }
188   static_kernel->Init();
189   default_kernel->Init();
190   for (auto it : default_kernel->runtime_vars_) {
191     MS_LOG(INFO) << "Runtime var: " << it.second->ToString();
192     bool is_thread = it.second->curr_map_id >= AKG_KERNEL_MOD_TX_IDX && it.second->curr_map_id <= AKG_KERNEL_MOD_TZ_IDX;
193     const int64_t upper_bound_limit = 32;
194     if (is_thread && it.second->upper_bound <= upper_bound_limit) {
195       return static_kernel;
196     }
197   }
198   MS_LOG(INFO) << kernel_name_ << " use dynamic tile kernel, shape " << shape_list_ << "; Static thread info "
199                << static_kernel->thread_info_;
200   return kernel_map_[kernel_name_];
201 }
202 
Resize(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & outputs)203 int DynamicAkgGpuKernelMod::Resize(const std::vector<KernelTensor *> &inputs,
204                                    const std::vector<KernelTensor *> &outputs) {
205   int ret = KernelMod::Resize(inputs, outputs);
206   UpdateShapeList(inputs, outputs);
207   kernel_impl_ = SelectKernelImpl();
208   kernel_impl_->Resize();
209   MS_LOG(INFO) << "Done resize for DynamicAkgGpuKernelMod for " << kernel_name_;
210   return ret;
211 }
212 
Launch(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & workspace,const std::vector<KernelTensor * > & outputs,void * stream_ptr)213 bool DynamicAkgGpuKernelMod::Launch(const std::vector<KernelTensor *> &inputs,
214                                     const std::vector<KernelTensor *> &workspace,
215                                     const std::vector<KernelTensor *> &outputs, void *stream_ptr) {
216   if (kernel_impl_ != nullptr) {
217     kernel_name_ = kernel_impl_->kernel_name_;
218     thread_info_ = kernel_impl_->thread_info_;
219     arg_size_vec_ = kernel_impl_->arg_size_vec_;
220   }
221 
222   if (stream_ptr == 0) {
223     MS_LOG(ERROR) << "stream_ptr should not be nullptr. Kernel name: " << kernel_name_;
224     return false;
225   }
226   if (kernel_pack_ == nullptr) {
227     MS_LOG(ERROR) << "kernel pack should not be nullptr. Kernel name: " << kernel_name_;
228     return false;
229   }
230   MS_LOG(INFO) << "Start Launch for " << kernel_name_;
231   CUresult result;
232   if (kernel_addr_ == nullptr) {
233     result = kernel_manager_->GetFunction(kernel_pack_, false, &thread_info_, &kernel_addr_, kernel_name_);
234     if (result != CUDA_SUCCESS) {
235       const char *msg = nullptr;
236       cuGetErrorName(result, &msg);
237       MS_LOG(ERROR) << "Get function " << kernel_name_ << " failed. Error message: " << msg;
238       return false;
239     }
240   }
241 
242   vector<void *> runtimeargs;
243   vector<void *> dev_addrs;
244   vector<size_t> shp;
245   CUdeviceptr dev_ptr_fake;
246   if (is_dynamic_) {
247     runtimeargs.reserve(arg_size_vec_.size());
248     dev_addrs.reserve(arg_size_vec_.size());
249     shp.reserve(arg_size_vec_.size());
250     size_t inum = 0;
251     size_t onum = 0;
252     for (size_t idx = 0; idx < arg_size_vec_.size(); idx++) {
253       if (arg_size_vec_[idx] == kRemove) {
254         runtimeargs.push_back(reinterpret_cast<void *>(&dev_ptr_fake));
255       } else if (arg_size_vec_[idx] == kKeep) {
256         if (inum < inputs.size()) {
257           runtimeargs.push_back(reinterpret_cast<void *>(&dev_addrs.emplace_back(inputs[inum]->device_ptr())));
258           inum++;
259         } else if (onum < outputs.size()) {
260           runtimeargs.push_back(reinterpret_cast<void *>(&dev_addrs.emplace_back(outputs[onum]->device_ptr())));
261           onum++;
262         }
263       } else {
264         size_t arg_size = static_cast<size_t>(arg_size_vec_[idx]);
265         runtimeargs.push_back(reinterpret_cast<void *>(&shp.emplace_back(arg_size)));
266       }
267     }
268   } else {
269     runtimeargs.reserve(inputs.size() + outputs.size() + workspace.size());
270     dev_addrs.reserve(inputs.size() + outputs.size() + workspace.size());
271     (void)std::transform(std::begin(inputs), std::end(inputs), std::back_inserter(runtimeargs),
272                          [&dev_addrs](const KernelTensor *input) {
273                            return reinterpret_cast<void *>(&dev_addrs.emplace_back(input->device_ptr()));
274                          });
275     (void)std::transform(std::begin(outputs), std::end(outputs), std::back_inserter(runtimeargs),
276                          [&dev_addrs](const KernelTensor *output) {
277                            return reinterpret_cast<void *>(&dev_addrs.emplace_back(output->device_ptr()));
278                          });
279     if (!workspace.empty()) {
280       (void)std::transform(std::begin(workspace), std::end(workspace), std::back_inserter(runtimeargs),
281                            [&dev_addrs](const KernelTensor *ws) {
282                              return reinterpret_cast<void *>(&dev_addrs.emplace_back(ws->device_ptr()));
283                            });
284     }
285   }
286 
287   result = cuLaunchKernel(kernel_addr_, thread_info_[AKG_KERNEL_MOD_BX_IDX], thread_info_[AKG_KERNEL_MOD_BY_IDX],
288                           thread_info_[AKG_KERNEL_MOD_BZ_IDX], thread_info_[AKG_KERNEL_MOD_TX_IDX],
289                           thread_info_[AKG_KERNEL_MOD_TY_IDX], thread_info_[AKG_KERNEL_MOD_TZ_IDX], 0,
290                           reinterpret_cast<CUstream>(stream_ptr), reinterpret_cast<void **>(&runtimeargs[0]), 0);
291   if (result != CUDA_SUCCESS) {
292     const char *msg = nullptr;
293     cuGetErrorName(result, &msg);
294     MS_LOG(ERROR) << "Launch kernel failed. Kernel name: " << kernel_name_ << ". cuLaunchKernel error message: " << msg;
295     return false;
296   }
297   MS_LOG(INFO) << "End Launch for " << kernel_name_;
298   return true;
299 }
300 }  // namespace kernel
301 }  // namespace mindspore
302