• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_GPU_AKG_GPU_KERNEL_MOD_H_
18 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_GPU_AKG_GPU_KERNEL_MOD_H_
19 #include <cuda.h>
20 #include <string>
21 #include <vector>
22 #include <unordered_map>
23 #include <memory>
24 #include "kernel/kernel.h"
25 #include "plugin/device/gpu/kernel/gpu_kernel_mod.h"
26 #include "kernel/common_utils.h"
27 
28 namespace mindspore {
29 namespace kernel {
30 using std::fstream;
31 using std::string;
32 using std::vector;
33 
34 const int MAX_REGISTER_PER_THREAD_BLOCK = 65536;
35 const int REGISTER_UNIT_IN_WARP = 256;
36 const int WARP_SIZE = 32;
37 const int WARP_ALLOC_GRAN = 4;
38 const int AKG_KERNEL_MOD_BX_IDX = 0;
39 const int AKG_KERNEL_MOD_BY_IDX = 1;
40 const int AKG_KERNEL_MOD_BZ_IDX = 2;
41 const int AKG_KERNEL_MOD_TX_IDX = 3;
42 const int AKG_KERNEL_MOD_TY_IDX = 4;
43 const int AKG_KERNEL_MOD_TZ_IDX = 5;
44 
45 struct GpuKernelMeta {
46   CUfunction func_addr_;
47   CUmodule module_;
48   std::vector<uint32_t> thread_info_;
GpuKernelMetaGpuKernelMeta49   GpuKernelMeta(CUfunction funcAddr, CUmodule module, const std::vector<uint32_t> &thread_info)
50       : func_addr_(funcAddr), module_(module), thread_info_(thread_info) {}
51 };
52 using GpuKernelMetaPtr = std::shared_ptr<GpuKernelMeta>;
53 
54 class AkgGpuKernelManager {
55  public:
56   AkgGpuKernelManager();
~AkgGpuKernelManager()57   virtual ~AkgGpuKernelManager() {
58     for (auto iter = infotable_.begin(); iter != infotable_.end(); ++iter) {
59       CUresult ret = cuModuleUnload(iter->second->module_);
60       if (ret != CUDA_SUCCESS && ret != CUDA_ERROR_DEINITIALIZED) {
61         const char *msg = nullptr;
62         cuGetErrorName(ret, &msg);
63         MS_LOG(ERROR) << "Unload GPU Module failed. cuModuleUnload error message: " << msg;
64       }
65     }
66   }
67   CUresult GetFunction(const KernelPackPtr &kernel_pack, bool force_reload, std::vector<uint32_t> *thread_info,
68                        CUfunction *func);
69 
70  private:
71   std::unordered_map<std::string, GpuKernelMetaPtr> infotable_;
72 };
73 using AkgGpuKernelManagerPtr = std::shared_ptr<AkgGpuKernelManager>;
74 
75 class AkgGpuKernelMod : public GpuKernelMod {
76  public:
77   explicit AkgGpuKernelMod(const KernelPackPtr &kernel_pack);
~AkgGpuKernelMod()78   virtual ~AkgGpuKernelMod() {}
79 
80   bool Launch(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &workspace,
81               const std::vector<KernelTensor *> &outputs, void *stream_ptr) override;
82 
83   static AkgGpuKernelManagerPtr kernel_manager_;
GetOpSupport()84   std::vector<KernelAttr> GetOpSupport() override { return {}; }
85 
86  private:
87   KernelPackPtr kernel_pack_;
88   std::vector<uint32_t> thread_info_;
89   CUfunction kernel_addr_{nullptr};
90 };
91 class AkgGpuKernelModDebug : public AkgGpuKernelMod {
92  public:
AkgGpuKernelModDebug(const KernelPackPtr & kernel_pack)93   explicit AkgGpuKernelModDebug(const KernelPackPtr &kernel_pack) : AkgGpuKernelMod(kernel_pack) {}
~AkgGpuKernelModDebug()94   virtual ~AkgGpuKernelModDebug() {}
Launch(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & workspace,const std::vector<KernelTensor * > & outputs,void * stream_ptr)95   bool Launch(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &workspace,
96               const std::vector<KernelTensor *> &outputs, void *stream_ptr) {
97     auto ptr = reinterpret_cast<CUstream>(stream_ptr);
98     CUresult before_launch = cuStreamSynchronize(ptr);
99     const char *msg = nullptr;
100     if (before_launch != CUDA_SUCCESS) {
101       cuGetErrorName(before_launch, &msg);
102       MS_LOG(ERROR) << "before_launch sycn failed, Kernel name is : " << kernel_name_ << ", Error message: " << msg;
103     }
104     auto result = AkgGpuKernelMod::Launch(inputs, workspace, outputs, stream_ptr);
105     CUresult after_launch = cuStreamSynchronize(ptr);
106     if (after_launch != CUDA_SUCCESS) {
107       cuGetErrorName(after_launch, &msg);
108       MS_LOG(ERROR) << "after_launch sycn failed, Kernel name is : " << kernel_name_ << ", Error message: " << msg;
109     }
110     return result;
111   }
112 };
113 using AkgGpuKernelModPtr = std::shared_ptr<AkgGpuKernelMod>;
114 }  // namespace kernel
115 }  // namespace mindspore
116 
117 #endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_GPU_AKG_GPU_KERNEL_MOD_H_
118