• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/executor/sub_graph_kernel.h"
18 #include <algorithm>
19 #include <fstream>
20 #include <queue>
21 #include "src/tensor.h"
22 #include "src/tensorlist.h"
23 #ifdef ENABLE_FP16
24 #include "src/litert/kernel/cpu/fp16/fp16_op_handler.h"
25 #endif
26 #include "src/common/version_manager.h"
27 #include "src/common/tensor_util.h"
28 #include "src/common/file_utils.h"
29 #include "src/common/utils.h"
30 #include "src/litert/kernel_exec_util.h"
31 
32 namespace mindspore::kernel {
33 using mindspore::lite::RET_ERROR;
34 using mindspore::lite::RET_INFER_ERR;
35 using mindspore::lite::RET_INFER_INVALID;
36 using mindspore::lite::RET_OK;
37 
ToString() const38 std::string SubGraphKernel::ToString() const {
39   std::ostringstream oss;
40   oss << "===============================================" << std::endl
41       << "Subgraph type : " << this->subgraph_type_ << std::endl;
42   oss << this->in_tensors().size() << " Subgraph inputTensors:" << std::endl;
43   for (auto tensor : in_tensors()) {
44     oss << tensor->ToString() << std::endl;
45   }
46   oss << std::endl << this->out_tensors().size() << " Subgraph outputTensors:" << std::endl;
47   for (auto tensor : out_tensors()) {
48     oss << tensor->ToString() << std::endl;
49   }
50   oss << std::endl << this->in_nodes_.size() << " Subgraph input nodes:" << std::endl;
51   for (auto kernel : this->in_nodes_) {
52     oss << "***********************************************" << std::endl;
53     oss << kernel->ToString() << std::endl;
54   }
55   oss << std::endl << this->out_nodes_.size() << " Subgraph output nodes:" << std::endl;
56   for (auto kernel : this->out_nodes_) {
57     oss << "***********************************************" << std::endl;
58     oss << kernel->ToString() << std::endl;
59   }
60   oss << std::endl << nodes_.size() << " nodes in subgraph:" << std::endl;
61   for (auto kernel : this->nodes_) {
62     oss << "***********************************************" << std::endl;
63     oss << kernel->ToString() << std::endl;
64   }
65   return oss.str();
66 }
67 
Execute(const KernelCallBack & before,const KernelCallBack & after)68 int SubGraphKernel::Execute(const KernelCallBack &before, const KernelCallBack &after) {
69   if (this->executor_ == nullptr) {
70     MS_LOG(ERROR) << "executor is nullptr";
71     return RET_ERROR;
72   }
73   auto ret = executor_->Run(this->in_tensors(), this->out_tensors(), this->nodes_, before, after);
74   if (ret != RET_OK) {
75     MS_LOG(ERROR) << "Run sub graph failed: " << ret;
76     return ret;
77   }
78 
79   return lite::RET_OK;
80 }
81 
InferShape()82 int SubGraphKernel::InferShape() {
83   int infer_ret = RET_OK;
84   for (auto kernel : nodes_) {
85     MS_ASSERT(kernel != nullptr);
86     auto ret = kernel->InferShape();
87     if (ret == RET_INFER_INVALID) {
88       MS_LOG(INFO) << "InferShape shouldn't be done before runtime, type:" << kernel->type() << "flag set to false.";
89       infer_ret = RET_INFER_INVALID;
90     } else if (ret != RET_OK) {
91       MS_LOG(ERROR) << "InferShape failed, type: " << kernel->type() << ", name:" << kernel->name();
92       return RET_INFER_ERR;
93     }
94   }
95   return infer_ret;
96 }
97 
ReSize()98 int SubGraphKernel::ReSize() {
99   for (auto kernel : nodes_) {
100     MS_CHECK_FALSE_MSG(kernel == nullptr, RET_ERROR, "input kernel is nullptr.");
101     MS_CHECK_FALSE_MSG(kernel->subgraph_type() != kernel::kNotSubGraph, RET_ERROR,
102                        "all nodes in should be kernel in subgraph kernels");
103     std::vector<lite::Tensor *> inputs = kernel->in_tensors();
104     std::vector<lite::Tensor *> outputs = kernel->out_tensors();
105     for (auto &output : outputs) {
106       output->FreeData();
107     }
108     auto ret = kernel->InferShape();
109     if (ret == RET_INFER_INVALID) {
110       MS_LOG(INFO) << "InferShape shouldn't be done before runtime, type:" << kernel->type() << "flag set to false.";
111     } else if (ret != RET_OK) {
112       MS_LOG(ERROR) << "InferShape failed, type: " << kernel->type() << ", name: " << kernel->name();
113       return RET_INFER_ERR;
114     }
115     if (ret == RET_OK) {
116       ret = kernel->ReSize();
117       if (ret != RET_OK) {
118         MS_LOG(ERROR) << "kernel " << kernel->name() << " resize fail!ret = " << ret;
119         return ret;
120       }
121     }
122   }
123   return RET_OK;
124 }
125 
MallocSubgraphInputs()126 int SubGraphKernel::MallocSubgraphInputs() {
127   for (auto input : in_tensors()) {
128     auto ret = lite::MallocTensorData(input);
129     if (ret != RET_OK) {
130       return ret;
131     }
132   }
133   return RET_OK;
134 }
135 
InitInputTensorInitRefCount()136 void SubGraphKernel::InitInputTensorInitRefCount() {
137   for (auto &input : this->in_tensors()) {
138     int input_init_refcount = input->init_ref_count();
139     for (auto *node : nodes_) {
140       input_init_refcount +=
141         static_cast<int>(std::count_if(node->in_tensors().begin(), node->in_tensors().end(),
142                                        [&input](const lite::Tensor *item) { return item == input; }));
143     }
144     input->set_init_ref_count(input_init_refcount);
145   }
146 }
147 
InitOutTensorInitRefCount(const std::vector<KernelExec * > * mask_kernels)148 void SubGraphKernel::InitOutTensorInitRefCount(const std::vector<KernelExec *> *mask_kernels) {
149   for (auto *node : nodes_) {
150     node->InitOutTensorInitRefCount(mask_kernels);
151   }
152   for (auto &output : this->out_tensors()) {
153     if (output->init_ref_count() == 0) {  // true only when output is also an input and only exist in control-flow model
154       output->set_init_ref_count(1);
155     }
156   }
157 }
158 
TopologicalSortNodes()159 int SubGraphKernel::TopologicalSortNodes() {
160   in_nodes_ = kernel::KernelExecUtil::SubgraphInputNodes(nodes_);
161   auto ret = KernelExecUtil::TopologicalSortNodes(&nodes_, in_nodes_);
162   if (ret != RET_OK) {
163     MS_LOG(ERROR) << "TopologicalSortNodes failed";
164   }
165   return ret;
166 }
167 
InsertInEdge(KernelExec * kernel,KernelExec * replace_kernel,const size_t & tensor_index)168 void SubGraphKernel::InsertInEdge(KernelExec *kernel, KernelExec *replace_kernel, const size_t &tensor_index) {
169   // replace_kernel is a kernel with ont input tensor and output tensor
170   auto in_kernel = KernelExecUtil::FindInKernelForInTensor(kernel, kernel->in_tensors().at(tensor_index));
171   if (in_kernel != nullptr) {
172     in_kernel->RemoveOutKernel(kernel);  // Assume there is only one tensor between in_kernel and kernel.
173     in_kernel->AddOutKernel(replace_kernel);
174     auto in_tensors = kernel->in_tensors();
175     if (std::count(in_tensors.begin(), in_tensors.end(), in_tensors[tensor_index]) == 1) {
176       kernel->RemoveInKernel(in_kernel);
177     }
178     replace_kernel->AddInKernel(in_kernel);
179   }
180   replace_kernel->AddOutKernel(kernel);
181   kernel->AddInKernel(replace_kernel);
182   kernel->set_in_tensor(replace_kernel->out_tensors().at(0), tensor_index);
183 
184   nodes_.push_back(replace_kernel);
185 }
186 
InsertOutEdge(KernelExec * kernel,KernelExec * replace_kernel,const size_t & tensor_index)187 void SubGraphKernel::InsertOutEdge(KernelExec *kernel, KernelExec *replace_kernel, const size_t &tensor_index) {
188   // replace_kernel is a kernel with ont input tensor and output tensor
189   auto out_kernels = KernelExecUtil::FindOutKernelsForOutTensor(kernel, kernel->out_tensors().at(tensor_index));
190   for (const auto &post_kernel : out_kernels) {
191     post_kernel->RemoveInKernel(kernel);  // Assume there is only one tensor between kernel and post_kernel.
192     post_kernel->AddInKernel(replace_kernel);
193     kernel->RemoveOutKernel(post_kernel);
194     replace_kernel->AddOutKernel(post_kernel);
195   }
196   replace_kernel->AddInKernel(kernel);
197   kernel->AddOutKernel(replace_kernel);
198   kernel->set_out_tensor(replace_kernel->in_tensors().at(0), tensor_index);
199 
200   nodes_.push_back(replace_kernel);
201 }
202 
203 // in_kernel -> in_post_kernel -> out_pre_kernel -> out_kernels.
204 // remove in_post_kernel and out_pre_kernel, link in_kernel and out_kernels.
205 // in_post_kernel and out_pre_kernel can be the same kernel sometimes.
UpdateInOutKernels(KernelExec * in_kernel,std::vector<KernelExec * > out_kernels,KernelExec * in_post_kernel,KernelExec * out_pre_kernel)206 void SubGraphKernel::UpdateInOutKernels(KernelExec *in_kernel, std::vector<KernelExec *> out_kernels,
207                                         KernelExec *in_post_kernel, KernelExec *out_pre_kernel) {
208   for (const auto &out_kernel : out_kernels) {
209     out_kernel->RemoveInKernel(out_pre_kernel);
210     out_pre_kernel->RemoveOutKernel(out_kernel);
211     if (in_kernel != nullptr) {
212       out_kernel->AddInKernel(in_kernel);
213       in_kernel->AddOutKernel(out_kernel);
214     }
215   }
216 
217   if (in_post_kernel != out_pre_kernel) {
218     in_post_kernel->RemoveOutKernel(out_pre_kernel);
219     out_pre_kernel->RemoveInKernel(in_post_kernel);
220   }
221 
222   if (in_post_kernel->out_kernels().empty() && in_kernel != nullptr && !lite::IsContain(out_nodes_, in_post_kernel)) {
223     in_kernel->RemoveOutKernel(in_post_kernel);
224     in_post_kernel->RemoveInKernel(in_kernel);
225   }
226 
227   // update subgraph input node
228   if (lite::IsContain(in_nodes_, in_post_kernel)) {
229     for (const auto &out_kernel : out_kernels) {
230       in_nodes_.push_back(out_kernel);
231     }
232     if (in_post_kernel->out_kernels().empty() && !lite::IsContain(out_nodes_, in_post_kernel)) {
233       (void)lite::VectorErase(&in_nodes_, in_post_kernel);
234     }
235   }
236 
237   // update subgraph output node
238   if (lite::IsContain(out_nodes_, out_pre_kernel) && in_kernel != nullptr) {
239     in_post_kernel->RemoveInKernel(in_kernel);
240     in_kernel->RemoveOutKernel(in_post_kernel);
241     out_nodes_.push_back(in_kernel);
242     if (out_pre_kernel->in_kernels().empty() && !lite::IsContain(in_nodes_, out_pre_kernel)) {
243       (void)lite::VectorErase(&out_nodes_, out_pre_kernel);
244     }
245   }
246 }
247 
248 // Update tensor according to the subgraph.
249 // Because the model input must be subgraph input, and the model output must be subgraph output.
UpdateInOutTensors(KernelExec * in_kernel,const std::vector<KernelExec * > & out_kernels,lite::Tensor * in_tensor,lite::Tensor * out_tensor,bool keep_input)250 int SubGraphKernel::UpdateInOutTensors(KernelExec *in_kernel, const std::vector<KernelExec *> &out_kernels,
251                                        lite::Tensor *in_tensor, lite::Tensor *out_tensor, bool keep_input) {
252   auto reserve_input = (keep_input && !lite::IsContain(out_tensors(), out_tensor)) ||
253                        (!keep_input && lite::IsContain(in_tensors(), in_tensor));
254   if (reserve_input) {
255     for (const auto &post_kernel : out_kernels) {
256       CHECK_NULL_RETURN(post_kernel);
257       auto indexes = post_kernel->FindAllInTensorIndex(out_tensor);
258       for (auto &index : indexes) {
259         post_kernel->set_in_tensor(in_tensor, index);
260       }
261     }
262   } else {
263     CHECK_NULL_RETURN(in_kernel);
264     auto index = in_kernel->FindOutTensorIndex(in_tensor);
265     in_kernel->set_out_tensor(out_tensor, index);
266 
267     for (const auto &out_kernel : in_kernel->out_kernels()) {
268       if (lite::IsContain(out_kernel->in_tensors(), in_tensor)) {
269         auto input_indexes = out_kernel->FindAllInTensorIndex(in_tensor);
270         for (auto input_index : input_indexes) {
271           out_kernel->set_in_tensor(out_tensor, input_index);
272         }
273       }
274     }
275   }
276   return RET_OK;
277 }
278 
279 // Remove a single way kernel.
280 // Before removing, pre_kernel -> in_tensor -> kernel -> out_tensor -> post_kernel.
281 // Keep_input is true, reserve the input tensor: pre_kernel -> in_tensor -> post_kernel.
282 // Keep_input is false, reserve the output tensor: pre_kernel -> out_tensor -> post_kernel.
DeleteSingleWayNode(KernelExec * kernel,bool keep_input)283 int SubGraphKernel::DeleteSingleWayNode(KernelExec *kernel, bool keep_input) {
284   if (lite::IsContain(in_nodes_, kernel) && lite::IsContain(out_nodes_, kernel)) {
285     MS_LOG(INFO) << "A single kernel subgraph can't delete this kernel.";
286     return RET_OK;
287   }
288   auto in_tensor = kernel->in_tensors().at(0);
289   auto out_tensor = kernel->out_tensors().at(0);
290   auto in_kernel = KernelExecUtil::FindInKernelForInTensor(kernel, in_tensor);
291   auto out_kernels = KernelExecUtil::FindOutKernelsForOutTensor(kernel, out_tensor);
292   if (in_kernel == nullptr && out_kernels.empty()) {
293     MS_LOG(INFO) << "A single kernel model can't delete this kernel.";
294     return RET_OK;
295   }
296 
297   // update kernel link
298   UpdateInOutKernels(in_kernel, out_kernels, kernel, kernel);
299 
300   // update tensor link
301   auto ret = UpdateInOutTensors(in_kernel, out_kernels, in_tensor, out_tensor, keep_input);
302   if (ret != RET_OK) {
303     MS_LOG(ERROR) << "Update tensor failed when removing kernel " << kernel->name();
304     return RET_ERROR;
305   }
306   DropNode(kernel);
307   delete kernel;
308   return RET_OK;
309 }
310 
DropNode(KernelExec * node)311 void SubGraphKernel::DropNode(KernelExec *node) {
312   lite::VectorErase(&nodes_, node);
313   lite::VectorErase(&in_nodes_, node);
314   lite::VectorErase(&out_nodes_, node);
315 }
316 
SubGraphSplitByOperator(KernelsArray * kernels_array)317 int SubGraphKernel::SubGraphSplitByOperator(KernelsArray *kernels_array) {
318   kernels_array->units.clear();
319   auto graph_input = this->in_tensors();
320   std::vector<KernelExec *> nodes_tmp = nodes_;
321   size_t kernels_num = nodes_tmp.size();
322   for (size_t kernel_index = 0; kernel_index < kernels_num; kernel_index++) {
323     auto kernel = nodes_tmp[kernel_index];
324     if (kernel == nullptr) {
325       continue;
326     }
327     MS_CHECK_TRUE_MSG(kernel->subgraph_type() == kernel::kNotSubGraph, RET_ERROR, "node cannot be a subgraph.");
328     kernels_array->units.push_back({});
329     size_t now_index = kernels_array->units.size() - 1;
330     kernels_array->units.at(now_index).kernels.push_back(kernel);
331     for (auto in_kernel : kernel->in_kernels()) {
332       for (size_t i = 0; i < now_index; i++) {
333         if (lite::IsContain(kernels_array->units.at(i).kernels, in_kernel)) {
334           kernels_array->units.at(now_index).input_indexs.push_back(i);
335           kernels_array->units.at(i).output_indexs.push_back(now_index);
336         }
337       }
338     }
339     bool is_graph_input = true;
340     for (auto &in_tensor : kernel->in_tensors()) {
341       if (!(lite::IsContain(graph_input, in_tensor) || in_tensor->IsGraphInput() || in_tensor->IsConst())) {
342         is_graph_input = false;
343       }
344     }
345     if (is_graph_input) {
346       if (kernel->in_kernels().size() != 0) {
347         MS_LOG(ERROR) << "graph input node in_kernels num invalid!";
348         return RET_ERROR;
349       }
350       kernels_array->graph_input.push_back(now_index);
351     } else if (kernel->in_kernels().size() == 0) {
352       MS_LOG(ERROR) << "graph input node invalid!";
353       return RET_ERROR;
354     }
355     MS_CHECK_TRUE_MSG(std::find_if(kernel->in_kernels().begin(), kernel->in_kernels().end(),
356                                    [kernel](KernelExec *in_kernel) {
357                                      return !lite::IsContain(in_kernel->out_kernels(), kernel);
358                                    }) == kernel->in_kernels().end(),
359                       RET_ERROR, "Invalid input and output structure of nodes in the graph.");
360     MS_CHECK_TRUE_MSG(std::find_if(kernel->out_kernels().begin(), kernel->out_kernels().end(),
361                                    [kernel](KernelExec *out_kernel) {
362                                      return !lite::IsContain(out_kernel->in_kernels(), kernel);
363                                    }) == kernel->out_kernels().end(),
364                       RET_ERROR, "Invalid input and output structure of nodes in the graph.");
365     while ((kernel->out_kernels().size() == 1) && (kernel->out_kernels().front()->in_kernels().size() == 1)) {
366       kernel = kernel->out_kernels().front();
367       size_t i;
368       for (i = kernel_index + 1; i < kernels_num; i++) {
369         if (nodes_tmp[i] == kernel) {
370           break;
371         }
372       }
373       if (i < kernels_num) {
374         nodes_tmp[i] = nullptr;
375       } else {
376         MS_LOG(ERROR) << "graph structure invalid!";
377         return RET_ERROR;
378       }
379       kernels_array->units.at(now_index).kernels.push_back(kernel);
380     }
381   }
382   return RET_OK;
383 }
384 
Prepare()385 int CustomSubGraph::Prepare() {
386   auto ret = SubGraphKernel::Prepare();
387   if (ret != RET_OK) {
388     return ret;
389   }
390   if (nodes_.size() < 1) {
391     return RET_OK;
392   }
393   auto provider = nodes_[0]->desc().provider;
394   auto context = this->Context();
395   AllocatorPtr allocator = context->allocator;
396   auto iter = std::find_if(context->device_list_.begin(), context->device_list_.end(),
397                            [&provider](const auto &dev) { return dev.provider_ == provider; });
398   if (iter != context->device_list_.end()) {
399     allocator = iter->allocator_;
400   }
401 
402   for (size_t i = 0; i < nodes_.size() - 1; ++i) {
403     auto node = nodes_[i];
404     for (auto tensor : node->out_tensors()) {
405       MS_ASSERT(tensor != nullptr);
406       if (tensor->allocator() == nullptr) {
407         tensor->set_allocator(allocator);
408       }
409     }
410   }
411 
412   auto node = nodes_[nodes_.size() - 1];
413   for (auto tensor : node->out_tensors()) {
414     MS_ASSERT(tensor != nullptr);
415     if (tensor->allocator() == nullptr) {
416       tensor->set_allocator(context->allocator);
417     }
418   }
419   return RET_OK;
420 }
421 
Execute(const KernelCallBack & before,const KernelCallBack & after)422 int CustomSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) {
423   for (auto kernel : nodes_) {
424     MS_ASSERT(kernel != nullptr);
425     auto ret = kernel->Execute(before, after);
426     if (ret != RET_OK) {
427       MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
428       return ret;
429     }
430   }
431 
432   return RET_OK;
433 }
434 
Prepare()435 int CpuSubGraph::Prepare() {
436   auto ret = SubGraphKernel::Prepare();
437   if (ret != RET_OK) {
438     return ret;
439   }
440   for (auto node : nodes_) {
441     for (auto tensor : node->out_tensors()) {
442       MS_ASSERT(tensor != nullptr);
443       if (tensor->allocator() == nullptr) {
444         tensor->set_allocator(this->Context()->allocator);
445       }
446     }
447   }
448   for (auto &out : this->out_tensors()) {
449     if (out->allocator() == nullptr) {
450       out->set_allocator(this->Context()->allocator);
451     }
452   }
453   return RET_OK;
454 }
455 
Execute(const KernelCallBack & before,const KernelCallBack & after)456 int CpuSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) {
457   MS_ASSERT(this->Context()->allocator.get() != nullptr);
458   for (auto *kernel : nodes_) {
459     MS_ASSERT(kernel != nullptr);
460     auto ret = kernel->Execute(before, after);
461     if (ret != RET_OK) {
462       MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
463       return ret;
464     }
465   }
466   return RET_OK;
467 }
468 
469 #if defined(ENABLE_ARM) && defined(ENABLE_FP16) && !defined(ENABLE_MINDRT)
Execute(const KernelCallBack & before,const KernelCallBack & after)470 int CpuFp16SubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) {
471   MS_ASSERT(this->Context()->allocator.get() != nullptr);
472   auto ret = this->PreProcess();
473   if (RET_OK != ret) {
474     MS_LOG(ERROR) << "PreProcess kernel failed, name: " << this->name();
475     return ret;
476   }
477   for (auto *kernel : nodes_) {
478     MS_ASSERT(kernel != nullptr);
479     ret = kernel->Execute(before, after);
480     if (ret != RET_OK) {
481       MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
482       return ret;
483     }
484   }
485   ret = this->PostProcess();
486   if (RET_OK != ret) {
487     MS_LOG(ERROR) << "PostProcess kernel failed, name: " << this->name();
488     return ret;
489   }
490   return RET_OK;
491 }
492 
FreeOriginInputData()493 void CpuFp16SubGraph::FreeOriginInputData() {
494   for (auto *data_store : this->origin_input_data_) {
495     if (data_store == nullptr) {
496       continue;
497     }
498     // free data in data_store
499     if (data_store->data_ != nullptr) {
500       if (data_store->allocator_ == nullptr) {
501         free(data_store->data_);
502       } else {
503         data_store->allocator_->Free(data_store->data_);
504       }
505     }
506     // free data_store
507     free(data_store);
508     data_store = nullptr;
509   }
510   this->origin_input_data_.clear();
511 }
512 
PreProcess()513 int CpuFp16SubGraph::PreProcess() {
514   if (!support_fp16_) {
515     MS_LOG(ERROR) << "Unsupported fp16 in this devices";
516     return RET_ERROR;
517   }
518   MS_ASSERT(origin_input_data_.empty());
519   for (auto tensor : this->in_tensors()) {
520     MS_ASSERT(tensor != nullptr);
521     if (tensor->data_type() == kNumberTypeFloat32) {
522       auto float32_data = tensor->data();
523       MS_ASSERT(float32_data != nullptr);
524       auto tensor_own_data = tensor->own_data();
525       tensor->set_data(nullptr);
526       tensor->set_data_type(TypeId::kNumberTypeFloat16);
527       auto tmp_data = malloc(tensor->Size());
528       if (tmp_data == nullptr) {
529         MS_LOG(ERROR) << "malloc data failed";
530         this->FreeOriginInputData();
531         return RET_ERROR;
532       }
533       tensor->set_data(tmp_data);
534       MS_ASSERT(tensor->data() != nullptr);
535       Float32ToFloat16_fp16_handler(float32_data, tensor->data(), tensor->ElementsNum(), support_fp16_);
536       auto *data_store = DataStore::CreateDataStore(float32_data, tensor_own_data, tensor->allocator().get());
537       if (data_store == nullptr) {
538         MS_LOG(ERROR) << "Create DataStore failed";
539         this->FreeOriginInputData();
540         return RET_ERROR;
541       }
542       tensor->set_allocator(nullptr);
543       origin_input_data_.emplace_back(data_store);
544     } else {
545       origin_input_data_.emplace_back(nullptr);
546     }
547   }
548   for (auto kernel : this->nodes_) {
549     for (auto tensor : kernel->out_tensors()) {
550       if (kernel->type() == schema::PrimitiveType_Cast) {
551         continue;
552       }
553       if (tensor->data_type() == kNumberTypeFloat32) {
554         tensor->set_data_type(kNumberTypeFloat16);
555       }
556     }
557   }
558   return RET_OK;
559 }
560 
PostProcess()561 int CpuFp16SubGraph::PostProcess() {
562   if (!support_fp16_) {
563     MS_LOG(ERROR) << "Unsupported fp16 in this devices";
564     return RET_ERROR;
565   }
566   for (auto tensor : this->out_tensors()) {
567     MS_ASSERT(tensor != nullptr);
568     if (tensor->data_type() == kNumberTypeFloat16) {
569       auto float16_data = tensor->data();
570       MS_ASSERT(float16_data != nullptr);
571       tensor->set_data(nullptr);
572       tensor->set_data_type(TypeId::kNumberTypeFloat32);
573       auto tmp_data = malloc(tensor->Size());
574       if (tmp_data == nullptr) {
575         MS_LOG(ERROR) << "malloc data failed";
576         if (this->context_ != nullptr && this->context_->allocator != nullptr) {
577           this->context_->allocator->Free(float16_data);
578         } else {
579           free(float16_data);
580         }
581         return RET_ERROR;
582       }
583       tensor->set_data(tmp_data);
584       MS_ASSERT(tensor->data() != nullptr);
585       Float16ToFloat32_fp16_handler(float16_data, tensor->data(), tensor->ElementsNum(), support_fp16_);
586       if (tensor->allocator() != nullptr) {
587         tensor->allocator()->Free(float16_data);
588       } else {
589         free(float16_data);
590       }
591       tensor->set_allocator(nullptr);
592     }
593   }
594   MS_ASSERT(this->origin_input_data_.size() == this->in_tensors().size());
595   for (size_t i = 0; i < this->in_tensors().size(); i++) {
596     auto tensor = in_tensors().at(i);
597     MS_ASSERT(tensor != nullptr);
598     auto origin_tensor_data = origin_input_data_.at(i);
599     if (tensor->data_type() == kNumberTypeFloat16 && origin_tensor_data != nullptr) {
600       if (!origin_tensor_data->own_data_ || (tensor->data() != nullptr)) {
601         MS_ASSERT(tensor != nullptr);
602         free(tensor->data());
603         MS_ASSERT(origin_tensor_data->data_ != nullptr);
604         tensor->set_data(origin_tensor_data->data_, origin_tensor_data->own_data_);
605         tensor->set_data_type(kNumberTypeFloat32);
606         origin_tensor_data->data_ = nullptr;
607       }
608     }
609   }
610   this->FreeOriginInputData();
611   return RET_OK;
612 }
613 #endif
614 
Prepare()615 int AclSubGraph::Prepare() {
616   auto ret = SubGraphKernel::Prepare();
617   if (ret != RET_OK) {
618     return ret;
619   }
620   for (auto node : nodes_) {
621     for (auto tensor : node->out_tensors()) {
622       MS_ASSERT(tensor != nullptr);
623       if (tensor->allocator() == nullptr) {
624         tensor->set_allocator(this->Context()->allocator);
625       }
626     }
627   }
628   for (auto &out : this->out_tensors()) {
629     if (out->allocator() == nullptr) {
630       out->set_allocator(this->Context()->allocator);
631     }
632   }
633   return RET_OK;
634 }
635 
Execute(const KernelCallBack & before,const KernelCallBack & after)636 int AclSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) {
637   MS_ASSERT(this->Context()->allocator.get() != nullptr);
638   for (auto *kernel : nodes_) {
639     MS_ASSERT(kernel != nullptr);
640     auto ret = kernel->Execute(before, after);
641     if (ret != RET_OK) {
642       MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
643       return ret;
644     }
645   }
646   return RET_OK;
647 }
648 }  // namespace mindspore::kernel
649