1 /** 2 * Copyright 2020-2022 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_GPU_AKG_GPU_KERNEL_MOD_H_ 18 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_GPU_AKG_GPU_KERNEL_MOD_H_ 19 #include <cuda.h> 20 #include <string> 21 #include <vector> 22 #include <unordered_map> 23 #include <memory> 24 #include "kernel/kernel.h" 25 #include "plugin/device/gpu/kernel/gpu_kernel_mod.h" 26 #include "kernel/common_utils.h" 27 28 namespace mindspore { 29 namespace kernel { 30 using std::fstream; 31 using std::string; 32 using std::vector; 33 34 const int MAX_REGISTER_PER_THREAD_BLOCK = 65536; 35 const int REGISTER_UNIT_IN_WARP = 256; 36 const int WARP_SIZE = 32; 37 const int WARP_ALLOC_GRAN = 4; 38 const int AKG_KERNEL_MOD_BX_IDX = 0; 39 const int AKG_KERNEL_MOD_BY_IDX = 1; 40 const int AKG_KERNEL_MOD_BZ_IDX = 2; 41 const int AKG_KERNEL_MOD_TX_IDX = 3; 42 const int AKG_KERNEL_MOD_TY_IDX = 4; 43 const int AKG_KERNEL_MOD_TZ_IDX = 5; 44 45 struct GpuKernelMeta { 46 CUfunction func_addr_; 47 CUmodule module_; 48 std::vector<uint32_t> thread_info_; GpuKernelMetaGpuKernelMeta49 GpuKernelMeta(CUfunction funcAddr, CUmodule module, const std::vector<uint32_t> &thread_info) 50 : func_addr_(funcAddr), module_(module), thread_info_(thread_info) {} 51 }; 52 using GpuKernelMetaPtr = std::shared_ptr<GpuKernelMeta>; 53 54 class AkgGpuKernelManager { 55 public: 56 AkgGpuKernelManager(); ~AkgGpuKernelManager()57 virtual ~AkgGpuKernelManager() { 58 for (auto iter = infotable_.begin(); iter != infotable_.end(); ++iter) { 59 CUresult ret = cuModuleUnload(iter->second->module_); 60 if (ret != CUDA_SUCCESS && ret != CUDA_ERROR_DEINITIALIZED) { 61 const char *msg = nullptr; 62 cuGetErrorName(ret, &msg); 63 MS_LOG(ERROR) << "Unload GPU Module failed. cuModuleUnload error message: " << msg; 64 } 65 } 66 } 67 CUresult GetFunction(const KernelPackPtr &kernel_pack, bool force_reload, std::vector<uint32_t> *thread_info, 68 CUfunction *func); 69 70 private: 71 std::unordered_map<std::string, GpuKernelMetaPtr> infotable_; 72 }; 73 using AkgGpuKernelManagerPtr = std::shared_ptr<AkgGpuKernelManager>; 74 75 class AkgGpuKernelMod : public GpuKernelMod { 76 public: 77 explicit AkgGpuKernelMod(const KernelPackPtr &kernel_pack); ~AkgGpuKernelMod()78 virtual ~AkgGpuKernelMod() {} 79 80 bool Launch(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &workspace, 81 const std::vector<KernelTensor *> &outputs, void *stream_ptr) override; 82 83 static AkgGpuKernelManagerPtr kernel_manager_; GetOpSupport()84 std::vector<KernelAttr> GetOpSupport() override { return {}; } 85 86 private: 87 KernelPackPtr kernel_pack_; 88 std::vector<uint32_t> thread_info_; 89 CUfunction kernel_addr_{nullptr}; 90 }; 91 class AkgGpuKernelModDebug : public AkgGpuKernelMod { 92 public: AkgGpuKernelModDebug(const KernelPackPtr & kernel_pack)93 explicit AkgGpuKernelModDebug(const KernelPackPtr &kernel_pack) : AkgGpuKernelMod(kernel_pack) {} ~AkgGpuKernelModDebug()94 virtual ~AkgGpuKernelModDebug() {} Launch(const std::vector<KernelTensor * > & inputs,const std::vector<KernelTensor * > & workspace,const std::vector<KernelTensor * > & outputs,void * stream_ptr)95 bool Launch(const std::vector<KernelTensor *> &inputs, const std::vector<KernelTensor *> &workspace, 96 const std::vector<KernelTensor *> &outputs, void *stream_ptr) { 97 auto ptr = reinterpret_cast<CUstream>(stream_ptr); 98 CUresult before_launch = cuStreamSynchronize(ptr); 99 const char *msg = nullptr; 100 if (before_launch != CUDA_SUCCESS) { 101 cuGetErrorName(before_launch, &msg); 102 MS_LOG(ERROR) << "before_launch sycn failed, Kernel name is : " << kernel_name_ << ", Error message: " << msg; 103 } 104 auto result = AkgGpuKernelMod::Launch(inputs, workspace, outputs, stream_ptr); 105 CUresult after_launch = cuStreamSynchronize(ptr); 106 if (after_launch != CUDA_SUCCESS) { 107 cuGetErrorName(after_launch, &msg); 108 MS_LOG(ERROR) << "after_launch sycn failed, Kernel name is : " << kernel_name_ << ", Error message: " << msg; 109 } 110 return result; 111 } 112 }; 113 using AkgGpuKernelModPtr = std::shared_ptr<AkgGpuKernelMod>; 114 } // namespace kernel 115 } // namespace mindspore 116 117 #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_AKG_GPU_AKG_GPU_KERNEL_MOD_H_ 118