1 /** 2 * Copyright 2020 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_LITE_SRC_RUNTIME_SUB_GRAPH_KERNEL_H_ 18 #define MINDSPORE_LITE_SRC_RUNTIME_SUB_GRAPH_KERNEL_H_ 19 20 #include <atomic> 21 #include <utility> 22 #include <string> 23 #include <vector> 24 #include <map> 25 #include <memory> 26 #include "src/executor/kernel_exec.h" 27 #include "src/litert/executor.h" 28 #include "src/common/log_adapter.h" 29 #include "src/common/version_manager.h" 30 #include "src/litert/cpu_info.h" 31 #if defined(ENABLE_ARM) && defined(ENABLE_FP16) 32 #include "nnacl/constant_of_shape_parameter.h" 33 #endif 34 35 namespace mindspore::kernel { 36 // store origin data and allocator of input tensor of subgraph for PreProcess and PostProcess 37 struct DataStore { 38 void *data_ = nullptr; 39 Allocator *allocator_ = nullptr; 40 bool own_data_ = true; 41 static DataStore *CreateDataStore(void *data = nullptr, bool own_data = true, Allocator *data_allocator = nullptr, 42 Allocator *allocator = nullptr) { 43 DataStore *data_store = nullptr; 44 if (allocator == nullptr) { 45 data_store = static_cast<DataStore *>(malloc(sizeof(DataStore))); 46 } else { 47 data_store = static_cast<DataStore *>(allocator->Malloc(sizeof(DataStore))); 48 } 49 if (data_store == nullptr) { 50 MS_LOG(ERROR) << "Malloc data_store failed"; 51 return nullptr; 52 } 53 data_store->data_ = data; 54 data_store->own_data_ = own_data; 55 data_store->allocator_ = data_allocator; 56 return data_store; 57 } 58 }; 59 60 typedef struct KernelsArray { 61 struct KernelsArrayUnit { 62 std::vector<KernelExec *> kernels = {}; 63 std::vector<size_t> input_indexs = {}; 64 std::vector<size_t> output_indexs = {}; 65 }; 66 std::vector<KernelsArrayUnit> units = {}; 67 std::vector<size_t> graph_input = {}; 68 } KernelsArray; 69 70 class SubGraphKernel : public KernelExec { 71 public: SubGraphKernel(std::vector<KernelExec * > in_kernels,std::vector<KernelExec * > out_kernels,std::vector<KernelExec * > nodes,MSKernel * kernel)72 SubGraphKernel(std::vector<KernelExec *> in_kernels, std::vector<KernelExec *> out_kernels, 73 std::vector<KernelExec *> nodes, MSKernel *kernel) 74 : KernelExec(std::shared_ptr<MSKernel>(kernel)), 75 nodes_(std::move(nodes)), 76 in_nodes_(std::move(in_kernels)), 77 out_nodes_(std::move(out_kernels)) { 78 subgraph_type_ = kCpuFP32SubGraph; 79 desc_.data_type = kNumberTypeFloat32; 80 } 81 ~SubGraphKernel()82 ~SubGraphKernel() override { 83 for (auto *node : nodes_) { 84 delete node; 85 } 86 nodes_.clear(); 87 for (auto *tensor : tensors_) { 88 delete tensor; 89 } 90 tensors_.clear(); 91 } 92 IsReady(const std::vector<lite::Tensor * > & scope_tensors)93 bool IsReady(const std::vector<lite::Tensor *> &scope_tensors) override { 94 return std::all_of(this->in_nodes_.begin(), this->in_nodes_.end(), 95 [&](KernelExec *kernel) { return kernel->IsReady(scope_tensors); }); 96 } 97 98 // called while compiling graph. Call node->Prepare() by default. Prepare()99 int Prepare() override { return RET_OK; }; 100 // called before Run Execute()101 int Execute() override { return Execute(nullptr, nullptr); } 102 103 int Execute(const KernelCallBack &before, const KernelCallBack &after) override; 104 105 int ReSize() override; 106 107 int InferShape() override; 108 109 virtual int MallocSubgraphInputs(); 110 111 void InitOutTensorInitRefCount(const std::vector<KernelExec *> *mask_kernels) override; 112 113 void InitInputTensorInitRefCount(); 114 SetFp16Attr()115 virtual int SetFp16Attr() { return mindspore::lite::RET_OK; } 116 117 std::string ToString() const override; 118 set_nodes(const std::vector<KernelExec * > & node)119 void set_nodes(const std::vector<KernelExec *> &node) { this->nodes_ = node; } 120 nodes()121 std::vector<KernelExec *> &nodes() { return this->nodes_; } 122 immutable_nodes()123 const std::vector<KernelExec *> &immutable_nodes() const { return this->nodes_; } 124 125 void DropNode(KernelExec *node); 126 in_nodes()127 std::vector<KernelExec *> in_nodes() const { return this->in_nodes_; } 128 out_nodes()129 std::vector<KernelExec *> out_nodes() const { return this->out_nodes_; } 130 tensors()131 std::vector<InferTensor *> &tensors() { return this->tensors_; } 132 SetInNodes(const std::vector<KernelExec * > & in_nodes)133 void SetInNodes(const std::vector<KernelExec *> &in_nodes) { in_nodes_ = in_nodes; } 134 SetOutNodes(const std::vector<KernelExec * > & out_nodes)135 void SetOutNodes(const std::vector<KernelExec *> &out_nodes) { out_nodes_ = out_nodes; } 136 SetTensors(const std::vector<InferTensor * > & tensors)137 void SetTensors(const std::vector<InferTensor *> &tensors) { tensors_ = tensors; } 138 SetSchemaVersion(int schema_version)139 void SetSchemaVersion(int schema_version) { schema_version_ = schema_version; } 140 141 int TopologicalSortNodes(); 142 143 void InsertInEdge(KernelExec *kernel, KernelExec *replace_kernel, const size_t &tensor_index); 144 145 void InsertOutEdge(KernelExec *kernel, KernelExec *replace_kernel, const size_t &tensor_index); 146 147 void UpdateInOutKernels(KernelExec *in_kernel, std::vector<KernelExec *> out_kernels, KernelExec *in_post_kernel, 148 KernelExec *out_pre_kernel); 149 150 int UpdateInOutTensors(KernelExec *in_kernel, const std::vector<KernelExec *> &out_kernels, lite::Tensor *in_tensor, 151 lite::Tensor *out_tensor, bool keep_input); 152 153 int DeleteSingleWayNode(KernelExec *kernel, bool keep_input); 154 GetGraphChanged()155 inline bool GetGraphChanged() const { return graph_changed_; } 156 SetGraphChanged(bool flag)157 inline void SetGraphChanged(bool flag) { graph_changed_ = flag; } 158 159 int SubGraphSplitByOperator(KernelsArray *out_kernels); 160 161 protected: 162 std::vector<KernelExec *> nodes_{}; 163 // entry nodes in nodes 164 std::vector<KernelExec *> in_nodes_{}; 165 // exit nodes in nodes 166 std::vector<KernelExec *> out_nodes_{}; 167 std::vector<InferTensor *> tensors_{}; 168 mindspore::lite::Executor *executor_ = nullptr; 169 int schema_version_ = lite::SCHEMA_VERSION::SCHEMA_CUR; 170 bool graph_changed_ = false; 171 }; 172 173 class CpuSubGraph : public SubGraphKernel { 174 public: CpuSubGraph(std::vector<KernelExec * > in_kernels,std::vector<KernelExec * > out_kernels,std::vector<KernelExec * > nodes,MSKernel * kernel)175 CpuSubGraph(std::vector<KernelExec *> in_kernels, std::vector<KernelExec *> out_kernels, 176 std::vector<KernelExec *> nodes, MSKernel *kernel) 177 : SubGraphKernel(std::move(in_kernels), std::move(out_kernels), std::move(nodes), kernel) { 178 subgraph_type_ = kCpuFP32SubGraph; 179 desc_.arch = kernel::KERNEL_ARCH::kCPU; 180 } 181 ~CpuSubGraph()182 ~CpuSubGraph() override { delete this->executor_; } 183 int Prepare() override; SetFp16Attr()184 int SetFp16Attr() override { return SubGraphKernel::SetFp16Attr(); } Execute()185 int Execute() override { return Execute(nullptr, nullptr); } 186 int Execute(const KernelCallBack &before, const KernelCallBack &after) override; 187 }; 188 189 class CpuFp32SubGraph : public CpuSubGraph { 190 public: CpuFp32SubGraph(std::vector<KernelExec * > in_kernels,std::vector<KernelExec * > out_kernels,std::vector<KernelExec * > nodes,MSKernel * kernel)191 CpuFp32SubGraph(std::vector<KernelExec *> in_kernels, std::vector<KernelExec *> out_kernels, 192 std::vector<KernelExec *> nodes, MSKernel *kernel) 193 : CpuSubGraph(std::move(in_kernels), std::move(out_kernels), std::move(nodes), kernel) { 194 subgraph_type_ = kCpuFP32SubGraph; 195 static std::atomic_int index = {0}; 196 this->set_name("CpuFP32SubGraph" + std::to_string(index++)); 197 desc_.data_type = kNumberTypeFloat32; 198 } 199 ~CpuFp32SubGraph() override = default; 200 }; 201 202 #if defined(ENABLE_ARM) && defined(ENABLE_FP16) 203 class CpuFp16SubGraph : public CpuSubGraph { 204 public: CpuFp16SubGraph(std::vector<KernelExec * > in_kernels,std::vector<KernelExec * > out_kernels,std::vector<KernelExec * > nodes,MSKernel * kernel)205 CpuFp16SubGraph(std::vector<KernelExec *> in_kernels, std::vector<KernelExec *> out_kernels, 206 std::vector<KernelExec *> nodes, MSKernel *kernel) 207 : CpuSubGraph(std::move(in_kernels), std::move(out_kernels), std::move(nodes), kernel) { 208 subgraph_type_ = kCpuFP16SubGraph; 209 static std::atomic_int index = 0; 210 this->set_name("CpuFP16SubGraph" + std::to_string(index++)); 211 desc_.data_type = kNumberTypeFloat16; 212 } 213 214 ~CpuFp16SubGraph() override = default; SetFp16Attr()215 int SetFp16Attr() override { 216 const auto *context = this->Context(); 217 MS_ASSERT(context != nullptr); 218 support_fp16_ = context->device_and_pkg_support_fp16_; 219 return CpuSubGraph::SetFp16Attr(); 220 } 221 222 #if !defined(ENABLE_MINDRT) 223 int Execute(const KernelCallBack &before, const KernelCallBack &after) override; 224 #endif 225 Prepare()226 int Prepare() override { 227 auto ret = CpuSubGraph::Prepare(); 228 if (ret != RET_OK) { 229 return ret; 230 } 231 for (auto &node : this->nodes_) { 232 if (node->type() == schema::PrimitiveType_Cast) { 233 auto inputs = node->in_tensors(); 234 #define kCaseInputNum 2 235 MS_ASSERT(inputs.size() >= kCaseInputNum); 236 #undef kCaseInputNum 237 auto dst_tensor = inputs[1]; 238 MS_ASSERT(dst_tensor != nullptr); 239 MS_ASSERT(dst_tensor->data_type() == kNumberTypeInt32); 240 MS_ASSERT(dst_tensor->data() != nullptr); 241 MS_ASSERT(dst_tensor->ElementsNum() == 1); 242 auto *dst_data = reinterpret_cast<int32_t *>(dst_tensor->data()); 243 if (dst_data[0] == kNumberTypeFloat32) { 244 dst_data[0] = kNumberTypeFloat16; 245 } 246 auto outputs = node->out_tensors(); 247 MS_ASSERT(outputs.size() == 1); 248 auto output = outputs.front(); 249 MS_ASSERT(output != nullptr); 250 if (output->data_type() == kNumberTypeFloat32) { 251 output->set_data_type(kNumberTypeFloat16); 252 } 253 } else if (node->type() == schema::PrimitiveType_ConstantOfShape) { 254 auto param = node->op_parameter(); 255 MS_ASSERT(param != nullptr); 256 if (static_cast<TypeId>(reinterpret_cast<ConstantOfShapeParameter *>(param)->data_type_ == 257 kNumberTypeFloat32)) { 258 reinterpret_cast<ConstantOfShapeParameter *>(param)->data_type_ = kNumberTypeFloat16; 259 } 260 auto outputs = node->out_tensors(); 261 MS_ASSERT(outputs.size() == 1); 262 auto output = outputs.front(); 263 MS_ASSERT(output != nullptr); 264 if (output->data_type() == kNumberTypeFloat32) { 265 output->set_data_type(kNumberTypeFloat16); 266 } 267 } 268 } 269 return RET_OK; 270 } 271 272 private: 273 #if !defined(ENABLE_MINDRT) 274 int PreProcess(); 275 int PostProcess(); 276 void FreeOriginInputData(); 277 std::vector<DataStore *> origin_input_data_{}; 278 #endif 279 bool support_fp16_ = false; 280 }; 281 #endif 282 283 class CustomSubGraph : public SubGraphKernel { 284 public: CustomSubGraph(std::vector<KernelExec * > in_kernels,std::vector<KernelExec * > out_kernels,std::vector<KernelExec * > nodes,MSKernel * kernel)285 CustomSubGraph(std::vector<KernelExec *> in_kernels, std::vector<KernelExec *> out_kernels, 286 std::vector<KernelExec *> nodes, MSKernel *kernel) 287 : SubGraphKernel(std::move(in_kernels), std::move(out_kernels), std::move(nodes), kernel) { 288 subgraph_type_ = kCustomSubGraph; 289 desc_.arch = kernel::KERNEL_ARCH::kCustom; 290 } 291 ~CustomSubGraph()292 ~CustomSubGraph() override { delete this->executor_; } 293 int Prepare() override; Execute()294 int Execute() override { return Execute(nullptr, nullptr); } 295 int Execute(const KernelCallBack &before, const KernelCallBack &after) override; 296 }; 297 298 class AclSubGraph : public SubGraphKernel { 299 public: AclSubGraph(std::vector<KernelExec * > in_kernels,std::vector<KernelExec * > out_kernels,std::vector<KernelExec * > nodes,Kernel * kernel)300 AclSubGraph(std::vector<KernelExec *> in_kernels, std::vector<KernelExec *> out_kernels, 301 std::vector<KernelExec *> nodes, Kernel *kernel) 302 : SubGraphKernel(std::move(in_kernels), std::move(out_kernels), std::move(nodes), kernel) { 303 subgraph_type_ = kAclSubGraph; 304 desc_.arch = kernel::KERNEL_ARCH::kACL; 305 } 306 ~AclSubGraph()307 ~AclSubGraph() override { delete this->executor_; } 308 int Prepare() override; SetFp16Attr()309 int SetFp16Attr() override { return SubGraphKernel::SetFp16Attr(); } Execute()310 int Execute() override { return Execute(nullptr, nullptr); } 311 int Execute(const KernelCallBack &before, const KernelCallBack &after) override; 312 }; 313 } // namespace mindspore::kernel 314 #endif // MINDSPORE_LITE_SRC_RUNTIME_SUB_GRAPH_KERNEL_H_ 315