• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_SRC_RUNTIME_SUB_GRAPH_KERNEL_H_
18 #define MINDSPORE_LITE_SRC_RUNTIME_SUB_GRAPH_KERNEL_H_
19 
20 #include <atomic>
21 #include <utility>
22 #include <string>
23 #include <vector>
24 #include <map>
25 #include <memory>
26 #include "src/executor/kernel_exec.h"
27 #include "src/litert/executor.h"
28 #include "src/common/log_adapter.h"
29 #include "src/common/version_manager.h"
30 #include "src/litert/cpu_info.h"
31 #if defined(ENABLE_ARM) && defined(ENABLE_FP16)
32 #include "nnacl/constant_of_shape_parameter.h"
33 #endif
34 
35 namespace mindspore::kernel {
36 // store origin data and allocator of input tensor of subgraph for PreProcess and PostProcess
37 struct DataStore {
38   void *data_ = nullptr;
39   Allocator *allocator_ = nullptr;
40   bool own_data_ = true;
41   static DataStore *CreateDataStore(void *data = nullptr, bool own_data = true, Allocator *data_allocator = nullptr,
42                                     Allocator *allocator = nullptr) {
43     DataStore *data_store = nullptr;
44     if (allocator == nullptr) {
45       data_store = static_cast<DataStore *>(malloc(sizeof(DataStore)));
46     } else {
47       data_store = static_cast<DataStore *>(allocator->Malloc(sizeof(DataStore)));
48     }
49     if (data_store == nullptr) {
50       MS_LOG(ERROR) << "Malloc data_store failed";
51       return nullptr;
52     }
53     data_store->data_ = data;
54     data_store->own_data_ = own_data;
55     data_store->allocator_ = data_allocator;
56     return data_store;
57   }
58 };
59 
60 typedef struct KernelsArray {
61   struct KernelsArrayUnit {
62     std::vector<KernelExec *> kernels = {};
63     std::vector<size_t> input_indexs = {};
64     std::vector<size_t> output_indexs = {};
65   };
66   std::vector<KernelsArrayUnit> units = {};
67   std::vector<size_t> graph_input = {};
68 } KernelsArray;
69 
70 class SubGraphKernel : public KernelExec {
71  public:
SubGraphKernel(std::vector<KernelExec * > in_kernels,std::vector<KernelExec * > out_kernels,std::vector<KernelExec * > nodes,MSKernel * kernel)72   SubGraphKernel(std::vector<KernelExec *> in_kernels, std::vector<KernelExec *> out_kernels,
73                  std::vector<KernelExec *> nodes, MSKernel *kernel)
74       : KernelExec(std::shared_ptr<MSKernel>(kernel)),
75         nodes_(std::move(nodes)),
76         in_nodes_(std::move(in_kernels)),
77         out_nodes_(std::move(out_kernels)) {
78     subgraph_type_ = kCpuFP32SubGraph;
79     desc_.data_type = kNumberTypeFloat32;
80   }
81 
~SubGraphKernel()82   ~SubGraphKernel() override {
83     for (auto *node : nodes_) {
84       delete node;
85     }
86     nodes_.clear();
87     for (auto *tensor : tensors_) {
88       delete tensor;
89     }
90     tensors_.clear();
91   }
92 
IsReady(const std::vector<lite::Tensor * > & scope_tensors)93   bool IsReady(const std::vector<lite::Tensor *> &scope_tensors) override {
94     return std::all_of(this->in_nodes_.begin(), this->in_nodes_.end(),
95                        [&](KernelExec *kernel) { return kernel->IsReady(scope_tensors); });
96   }
97 
98   // called while compiling graph. Call node->Prepare() by default.
Prepare()99   int Prepare() override { return RET_OK; };
100   // called before Run
Execute()101   int Execute() override { return Execute(nullptr, nullptr); }
102 
103   int Execute(const KernelCallBack &before, const KernelCallBack &after) override;
104 
105   int ReSize() override;
106 
107   int InferShape() override;
108 
109   virtual int MallocSubgraphInputs();
110 
111   void InitOutTensorInitRefCount(const std::vector<KernelExec *> *mask_kernels) override;
112 
113   void InitInputTensorInitRefCount();
114 
SetFp16Attr()115   virtual int SetFp16Attr() { return mindspore::lite::RET_OK; }
116 
117   std::string ToString() const override;
118 
set_nodes(const std::vector<KernelExec * > & node)119   void set_nodes(const std::vector<KernelExec *> &node) { this->nodes_ = node; }
120 
nodes()121   std::vector<KernelExec *> &nodes() { return this->nodes_; }
122 
immutable_nodes()123   const std::vector<KernelExec *> &immutable_nodes() const { return this->nodes_; }
124 
125   void DropNode(KernelExec *node);
126 
in_nodes()127   std::vector<KernelExec *> in_nodes() const { return this->in_nodes_; }
128 
out_nodes()129   std::vector<KernelExec *> out_nodes() const { return this->out_nodes_; }
130 
tensors()131   std::vector<InferTensor *> &tensors() { return this->tensors_; }
132 
SetInNodes(const std::vector<KernelExec * > & in_nodes)133   void SetInNodes(const std::vector<KernelExec *> &in_nodes) { in_nodes_ = in_nodes; }
134 
SetOutNodes(const std::vector<KernelExec * > & out_nodes)135   void SetOutNodes(const std::vector<KernelExec *> &out_nodes) { out_nodes_ = out_nodes; }
136 
SetTensors(const std::vector<InferTensor * > & tensors)137   void SetTensors(const std::vector<InferTensor *> &tensors) { tensors_ = tensors; }
138 
SetSchemaVersion(int schema_version)139   void SetSchemaVersion(int schema_version) { schema_version_ = schema_version; }
140 
141   int TopologicalSortNodes();
142 
143   void InsertInEdge(KernelExec *kernel, KernelExec *replace_kernel, const size_t &tensor_index);
144 
145   void InsertOutEdge(KernelExec *kernel, KernelExec *replace_kernel, const size_t &tensor_index);
146 
147   void UpdateInOutKernels(KernelExec *in_kernel, std::vector<KernelExec *> out_kernels, KernelExec *in_post_kernel,
148                           KernelExec *out_pre_kernel);
149 
150   int UpdateInOutTensors(KernelExec *in_kernel, const std::vector<KernelExec *> &out_kernels, lite::Tensor *in_tensor,
151                          lite::Tensor *out_tensor, bool keep_input);
152 
153   int DeleteSingleWayNode(KernelExec *kernel, bool keep_input);
154 
GetGraphChanged()155   inline bool GetGraphChanged() const { return graph_changed_; }
156 
SetGraphChanged(bool flag)157   inline void SetGraphChanged(bool flag) { graph_changed_ = flag; }
158 
159   int SubGraphSplitByOperator(KernelsArray *out_kernels);
160 
161  protected:
162   std::vector<KernelExec *> nodes_{};
163   // entry nodes in nodes
164   std::vector<KernelExec *> in_nodes_{};
165   // exit nodes in nodes
166   std::vector<KernelExec *> out_nodes_{};
167   std::vector<InferTensor *> tensors_{};
168   mindspore::lite::Executor *executor_ = nullptr;
169   int schema_version_ = lite::SCHEMA_VERSION::SCHEMA_CUR;
170   bool graph_changed_ = false;
171 };
172 
173 class CpuSubGraph : public SubGraphKernel {
174  public:
CpuSubGraph(std::vector<KernelExec * > in_kernels,std::vector<KernelExec * > out_kernels,std::vector<KernelExec * > nodes,MSKernel * kernel)175   CpuSubGraph(std::vector<KernelExec *> in_kernels, std::vector<KernelExec *> out_kernels,
176               std::vector<KernelExec *> nodes, MSKernel *kernel)
177       : SubGraphKernel(std::move(in_kernels), std::move(out_kernels), std::move(nodes), kernel) {
178     subgraph_type_ = kCpuFP32SubGraph;
179     desc_.arch = kernel::KERNEL_ARCH::kCPU;
180   }
181 
~CpuSubGraph()182   ~CpuSubGraph() override { delete this->executor_; }
183   int Prepare() override;
SetFp16Attr()184   int SetFp16Attr() override { return SubGraphKernel::SetFp16Attr(); }
Execute()185   int Execute() override { return Execute(nullptr, nullptr); }
186   int Execute(const KernelCallBack &before, const KernelCallBack &after) override;
187 };
188 
189 class CpuFp32SubGraph : public CpuSubGraph {
190  public:
CpuFp32SubGraph(std::vector<KernelExec * > in_kernels,std::vector<KernelExec * > out_kernels,std::vector<KernelExec * > nodes,MSKernel * kernel)191   CpuFp32SubGraph(std::vector<KernelExec *> in_kernels, std::vector<KernelExec *> out_kernels,
192                   std::vector<KernelExec *> nodes, MSKernel *kernel)
193       : CpuSubGraph(std::move(in_kernels), std::move(out_kernels), std::move(nodes), kernel) {
194     subgraph_type_ = kCpuFP32SubGraph;
195     static std::atomic_int index = {0};
196     this->set_name("CpuFP32SubGraph" + std::to_string(index++));
197     desc_.data_type = kNumberTypeFloat32;
198   }
199   ~CpuFp32SubGraph() override = default;
200 };
201 
202 #if defined(ENABLE_ARM) && defined(ENABLE_FP16)
203 class CpuFp16SubGraph : public CpuSubGraph {
204  public:
CpuFp16SubGraph(std::vector<KernelExec * > in_kernels,std::vector<KernelExec * > out_kernels,std::vector<KernelExec * > nodes,MSKernel * kernel)205   CpuFp16SubGraph(std::vector<KernelExec *> in_kernels, std::vector<KernelExec *> out_kernels,
206                   std::vector<KernelExec *> nodes, MSKernel *kernel)
207       : CpuSubGraph(std::move(in_kernels), std::move(out_kernels), std::move(nodes), kernel) {
208     subgraph_type_ = kCpuFP16SubGraph;
209     static std::atomic_int index = 0;
210     this->set_name("CpuFP16SubGraph" + std::to_string(index++));
211     desc_.data_type = kNumberTypeFloat16;
212   }
213 
214   ~CpuFp16SubGraph() override = default;
SetFp16Attr()215   int SetFp16Attr() override {
216     const auto *context = this->Context();
217     MS_ASSERT(context != nullptr);
218     support_fp16_ = context->device_and_pkg_support_fp16_;
219     return CpuSubGraph::SetFp16Attr();
220   }
221 
222 #if !defined(ENABLE_MINDRT)
223   int Execute(const KernelCallBack &before, const KernelCallBack &after) override;
224 #endif
225 
Prepare()226   int Prepare() override {
227     auto ret = CpuSubGraph::Prepare();
228     if (ret != RET_OK) {
229       return ret;
230     }
231     for (auto &node : this->nodes_) {
232       if (node->type() == schema::PrimitiveType_Cast) {
233         auto inputs = node->in_tensors();
234 #define kCaseInputNum 2
235         MS_ASSERT(inputs.size() >= kCaseInputNum);
236 #undef kCaseInputNum
237         auto dst_tensor = inputs[1];
238         MS_ASSERT(dst_tensor != nullptr);
239         MS_ASSERT(dst_tensor->data_type() == kNumberTypeInt32);
240         MS_ASSERT(dst_tensor->data() != nullptr);
241         MS_ASSERT(dst_tensor->ElementsNum() == 1);
242         auto *dst_data = reinterpret_cast<int32_t *>(dst_tensor->data());
243         if (dst_data[0] == kNumberTypeFloat32) {
244           dst_data[0] = kNumberTypeFloat16;
245         }
246         auto outputs = node->out_tensors();
247         MS_ASSERT(outputs.size() == 1);
248         auto output = outputs.front();
249         MS_ASSERT(output != nullptr);
250         if (output->data_type() == kNumberTypeFloat32) {
251           output->set_data_type(kNumberTypeFloat16);
252         }
253       } else if (node->type() == schema::PrimitiveType_ConstantOfShape) {
254         auto param = node->op_parameter();
255         MS_ASSERT(param != nullptr);
256         if (static_cast<TypeId>(reinterpret_cast<ConstantOfShapeParameter *>(param)->data_type_ ==
257                                 kNumberTypeFloat32)) {
258           reinterpret_cast<ConstantOfShapeParameter *>(param)->data_type_ = kNumberTypeFloat16;
259         }
260         auto outputs = node->out_tensors();
261         MS_ASSERT(outputs.size() == 1);
262         auto output = outputs.front();
263         MS_ASSERT(output != nullptr);
264         if (output->data_type() == kNumberTypeFloat32) {
265           output->set_data_type(kNumberTypeFloat16);
266         }
267       }
268     }
269     return RET_OK;
270   }
271 
272  private:
273 #if !defined(ENABLE_MINDRT)
274   int PreProcess();
275   int PostProcess();
276   void FreeOriginInputData();
277   std::vector<DataStore *> origin_input_data_{};
278 #endif
279   bool support_fp16_ = false;
280 };
281 #endif
282 
283 class CustomSubGraph : public SubGraphKernel {
284  public:
CustomSubGraph(std::vector<KernelExec * > in_kernels,std::vector<KernelExec * > out_kernels,std::vector<KernelExec * > nodes,MSKernel * kernel)285   CustomSubGraph(std::vector<KernelExec *> in_kernels, std::vector<KernelExec *> out_kernels,
286                  std::vector<KernelExec *> nodes, MSKernel *kernel)
287       : SubGraphKernel(std::move(in_kernels), std::move(out_kernels), std::move(nodes), kernel) {
288     subgraph_type_ = kCustomSubGraph;
289     desc_.arch = kernel::KERNEL_ARCH::kCustom;
290   }
291 
~CustomSubGraph()292   ~CustomSubGraph() override { delete this->executor_; }
293   int Prepare() override;
Execute()294   int Execute() override { return Execute(nullptr, nullptr); }
295   int Execute(const KernelCallBack &before, const KernelCallBack &after) override;
296 };
297 
298 class AclSubGraph : public SubGraphKernel {
299  public:
AclSubGraph(std::vector<KernelExec * > in_kernels,std::vector<KernelExec * > out_kernels,std::vector<KernelExec * > nodes,Kernel * kernel)300   AclSubGraph(std::vector<KernelExec *> in_kernels, std::vector<KernelExec *> out_kernels,
301               std::vector<KernelExec *> nodes, Kernel *kernel)
302       : SubGraphKernel(std::move(in_kernels), std::move(out_kernels), std::move(nodes), kernel) {
303     subgraph_type_ = kAclSubGraph;
304     desc_.arch = kernel::KERNEL_ARCH::kACL;
305   }
306 
~AclSubGraph()307   ~AclSubGraph() override { delete this->executor_; }
308   int Prepare() override;
SetFp16Attr()309   int SetFp16Attr() override { return SubGraphKernel::SetFp16Attr(); }
Execute()310   int Execute() override { return Execute(nullptr, nullptr); }
311   int Execute(const KernelCallBack &before, const KernelCallBack &after) override;
312 };
313 }  // namespace mindspore::kernel
314 #endif  // MINDSPORE_LITE_SRC_RUNTIME_SUB_GRAPH_KERNEL_H_
315