1 /** 2 * Copyright 2019-2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 #ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNEL_H_ 17 #define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNEL_H_ 18 #include <vector> 19 #include <string> 20 #include <memory> 21 #include "nlohmann/json.hpp" 22 #include "ir/anf.h" 23 #include "ir/dtype.h" 24 #include "utils/utils.h" 25 #include "ir/tensor.h" 26 #include "abstract/dshape.h" 27 #include "utils/log_adapter.h" 28 #include "runtime/device/executor/dynamic_kernel.h" 29 30 #ifdef _MSC_VER 31 #undef OPAQUE 32 #endif 33 34 namespace mindspore { 35 enum KernelType : int { 36 UNKNOWN_KERNEL_TYPE = 0, 37 AKG_KERNEL, 38 AICPU_KERNEL, 39 RT_KERNEL, 40 HCCL_KERNEL, 41 TBE_KERNEL, 42 HOST_KERNEL, 43 CPU_KERNEL, 44 }; 45 46 namespace kernel { 47 // Supported fusion type 48 enum FusionType { 49 CONV = 0, 50 ELEMWISE, 51 COMMREDUCE, 52 SEGMENT, 53 OPAQUE, 54 BN_UPDATE_GRAD, 55 BN_GRAD_REDUCE, 56 LAYER_NORM_GRAD, 57 L2LOSS_MUL_ADDN, 58 PURE_BROADCAST, 59 INPLACE, 60 MATMUL, 61 MATMUL_V2, 62 GEMM, 63 CONV2D_BACKPROP_INPUT, 64 CONV2D_BACKPROP_FILTER, 65 CONV3D_BACKPROP_INPUT, 66 CONV3D_BACKPROP_FILTER, 67 CUBE_LAYER_NORM, 68 BN_REDUCE, 69 BN_UPDATE, 70 SOFTMAX_CROSS_ENTROPY_WITH_LOGITS, 71 L2_NORMALIZE, 72 SOFTMAX, 73 L2_LOSS, 74 ASCEND_QUANT, 75 ASCEND_DEQUANT, 76 ASCEND_ANTI_QUANT, 77 STRIDED_READ, 78 STRIDED_WRITE, 79 ASCEND_DEQUANT_S16, 80 ASCEND_REQUANT, 81 ASCEND_REQUANT_S16, 82 MAX_POOL, 83 DEPTHWISECONV, 84 CONV3D, 85 POOL2D, 86 POOL3D, 87 READ_SELECT, 88 WRITE_SELECT, 89 COSINE_EMBEDDING_LOSS, 90 DILATION_PATTERN, 91 BROAD_CAST, 92 BATCH_MATMUL, 93 CONFUSION_TRANSPOSE, 94 UNKNOWN_FUSION_TYPE = -1, 95 }; 96 97 enum OpPattern { 98 kCommonPattern = 0, 99 kFormatAgnosticPattern = 1, 100 kBroadcastPattern = 2, 101 kReducePattern = 3, 102 }; 103 104 // Backend processor 105 enum Processor { 106 UNKNOWN = -1, 107 AICORE = 0, 108 AICPU, 109 CUDA, 110 }; 111 112 struct FlexArray { 113 size_t len; 114 char contents[]; 115 }; 116 117 struct KernelJsonInfo { 118 std::string bin_file_name; 119 std::string bin_file_suffix; 120 uint32_t block_dim; 121 std::string kernel_name; 122 std::string magic; 123 std::vector<size_t> parameters; 124 std::string sha256; 125 std::vector<size_t> workspaces; 126 uint32_t op_para_size; KernelJsonInfoKernelJsonInfo127 KernelJsonInfo() : block_dim(0), op_para_size(0) {} 128 }; 129 130 class KernelPack { 131 public: KernelPack()132 KernelPack() : json_(nullptr), kernel_(nullptr) {} 133 KernelPack(const KernelPack &) = default; 134 KernelJsonInfo kernel_json_info() const; 135 bool LoadKernelMeta(const std::string &json_f); 136 bool ReadFromJsonFile(const std::string &json_f, const std::string &processor); GetJson()137 const FlexArray *GetJson() const { return json_; } GetKernel()138 const FlexArray *GetKernel() const { return kernel_; } ~KernelPack()139 ~KernelPack() { 140 if (json_ != nullptr) { 141 delete[] json_; 142 json_ = nullptr; 143 } 144 if (kernel_ != nullptr) { 145 delete[] kernel_; 146 kernel_ = nullptr; 147 } 148 } 149 150 private: 151 bool ReadFromJsonFileHelper(std::ifstream &kernel_bin); 152 void ParseKernelJson(const nlohmann::json &js); 153 KernelJsonInfo kernel_json_info_; 154 FlexArray *json_; 155 FlexArray *kernel_; 156 }; 157 using KernelPackPtr = std::shared_ptr<KernelPack>; 158 159 /** 160 * @brief base class for autotensor kernel and cce kernel. 161 */ 162 struct Address { AddressAddress163 Address() : addr(nullptr), size(0) {} AddressAddress164 Address(void *address_addr, size_t address_size) : addr(address_addr), size(address_size) {} 165 void *addr; 166 size_t size; 167 }; 168 using AddressPtr = std::shared_ptr<Address>; 169 170 // The memory info of kernel launch. 171 struct KernelLaunchInfo { 172 std::vector<AddressPtr> inputs_; 173 std::vector<AddressPtr> outputs_; 174 std::vector<AddressPtr> workspaces_; 175 }; 176 177 class KernelMod { 178 public: 179 virtual const std::vector<size_t> &GetInputSizeList() const = 0; 180 virtual const std::vector<size_t> &GetOutputSizeList() const = 0; 181 virtual const std::vector<size_t> &GetWorkspaceSizeList() const = 0; 182 virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace, 183 const std::vector<AddressPtr> &outputs, void *stream_ptr) = 0; GenDynamicKernel(const CNodePtr & cnode_ptr,void * stream_ptr)184 virtual device::DynamicKernelPtr GenDynamicKernel(const CNodePtr &cnode_ptr, void *stream_ptr) { return nullptr; } GenParameters()185 virtual std::vector<size_t> GenParameters() { return {}; } ReleaseResource()186 virtual void ReleaseResource() {} 187 188 virtual ~KernelMod() = default; set_unique_name(const std::string & unique_name)189 void set_unique_name(const std::string &unique_name) { unique_name_ = unique_name; } set_fullname(const std::string & fullname)190 void set_fullname(const std::string &fullname) { fullname_ = fullname; } set_is_monad(bool is_monad)191 void set_is_monad(bool is_monad) { is_monad_ = is_monad; } set_inputs_addr(const std::vector<AddressPtr> & addr)192 void set_inputs_addr(const std::vector<AddressPtr> &addr) { inputs_addr_ = addr; } set_workspaces_addr(const std::vector<AddressPtr> & addr)193 void set_workspaces_addr(const std::vector<AddressPtr> &addr) { workspaces_addr_ = addr; } set_outputs_addr(const std::vector<AddressPtr> & addr)194 void set_outputs_addr(const std::vector<AddressPtr> &addr) { outputs_addr_ = addr; } GetInputsAddr()195 const std::vector<AddressPtr> &GetInputsAddr() { return inputs_addr_; } GetWorkSpacesAddr()196 const std::vector<AddressPtr> &GetWorkSpacesAddr() { return workspaces_addr_; } GetOutputsAddr()197 const std::vector<AddressPtr> &GetOutputsAddr() { return outputs_addr_; } SetStream(void * stream)198 void SetStream(void *stream) { stream_ = stream; } GetStream()199 void *GetStream() const { return stream_; } 200 201 protected: 202 std::string kernel_name_; 203 std::string unique_name_; 204 std::string fullname_; 205 bool is_monad_{false}; 206 void *stream_{nullptr}; 207 208 private: 209 std::vector<AddressPtr> inputs_addr_; 210 std::vector<AddressPtr> workspaces_addr_; 211 std::vector<AddressPtr> outputs_addr_; 212 }; 213 using KernelModPtr = std::shared_ptr<KernelMod>; 214 } // namespace kernel 215 } // namespace mindspore 216 217 #endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_KERNEL_H_ 218