1 /**
2 * Copyright 2020 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/executor/sub_graph_kernel.h"
18 #include <algorithm>
19 #include <fstream>
20 #include <queue>
21 #include "src/tensor.h"
22 #include "src/tensorlist.h"
23 #ifdef ENABLE_FP16
24 #include "src/litert/kernel/cpu/fp16/fp16_op_handler.h"
25 #endif
26 #include "src/common/version_manager.h"
27 #include "src/common/tensor_util.h"
28 #include "src/common/file_utils.h"
29 #include "src/common/utils.h"
30 #include "src/litert/kernel_exec_util.h"
31
32 namespace mindspore::kernel {
33 using mindspore::lite::RET_ERROR;
34 using mindspore::lite::RET_INFER_ERR;
35 using mindspore::lite::RET_INFER_INVALID;
36 using mindspore::lite::RET_OK;
37
ToString() const38 std::string SubGraphKernel::ToString() const {
39 std::ostringstream oss;
40 oss << "===============================================" << std::endl
41 << "Subgraph type : " << this->subgraph_type_ << std::endl;
42 oss << this->in_tensors().size() << " Subgraph inputTensors:" << std::endl;
43 for (auto tensor : in_tensors()) {
44 oss << tensor->ToString() << std::endl;
45 }
46 oss << std::endl << this->out_tensors().size() << " Subgraph outputTensors:" << std::endl;
47 for (auto tensor : out_tensors()) {
48 oss << tensor->ToString() << std::endl;
49 }
50 oss << std::endl << this->in_nodes_.size() << " Subgraph input nodes:" << std::endl;
51 for (auto kernel : this->in_nodes_) {
52 oss << "***********************************************" << std::endl;
53 oss << kernel->ToString() << std::endl;
54 }
55 oss << std::endl << this->out_nodes_.size() << " Subgraph output nodes:" << std::endl;
56 for (auto kernel : this->out_nodes_) {
57 oss << "***********************************************" << std::endl;
58 oss << kernel->ToString() << std::endl;
59 }
60 oss << std::endl << nodes_.size() << " nodes in subgraph:" << std::endl;
61 for (auto kernel : this->nodes_) {
62 oss << "***********************************************" << std::endl;
63 oss << kernel->ToString() << std::endl;
64 }
65 return oss.str();
66 }
67
Execute(const KernelCallBack & before,const KernelCallBack & after)68 int SubGraphKernel::Execute(const KernelCallBack &before, const KernelCallBack &after) {
69 if (this->executor_ == nullptr) {
70 MS_LOG(ERROR) << "executor is nullptr";
71 return RET_ERROR;
72 }
73 auto ret = executor_->Run(this->in_tensors(), this->out_tensors(), this->nodes_, before, after);
74 if (ret != RET_OK) {
75 MS_LOG(ERROR) << "Run sub graph failed: " << ret;
76 return ret;
77 }
78
79 return lite::RET_OK;
80 }
81
InferShape()82 int SubGraphKernel::InferShape() {
83 int infer_ret = RET_OK;
84 for (auto kernel : nodes_) {
85 MS_ASSERT(kernel != nullptr);
86 auto ret = kernel->InferShape();
87 if (ret == RET_INFER_INVALID) {
88 MS_LOG(INFO) << "InferShape shouldn't be done before runtime, type:" << kernel->type() << "flag set to false.";
89 infer_ret = RET_INFER_INVALID;
90 } else if (ret != RET_OK) {
91 MS_LOG(ERROR) << "InferShape failed, type: " << kernel->type() << ", name:" << kernel->name();
92 return RET_INFER_ERR;
93 }
94 }
95 return infer_ret;
96 }
97
ReSize()98 int SubGraphKernel::ReSize() {
99 for (auto kernel : nodes_) {
100 MS_CHECK_FALSE_MSG(kernel == nullptr, RET_ERROR, "input kernel is nullptr.");
101 MS_CHECK_FALSE_MSG(kernel->subgraph_type() != kernel::kNotSubGraph, RET_ERROR,
102 "all nodes in should be kernel in subgraph kernels");
103 std::vector<lite::Tensor *> inputs = kernel->in_tensors();
104 std::vector<lite::Tensor *> outputs = kernel->out_tensors();
105 for (auto &output : outputs) {
106 output->FreeData();
107 }
108 auto ret = kernel->InferShape();
109 if (ret == RET_INFER_INVALID) {
110 MS_LOG(INFO) << "InferShape shouldn't be done before runtime, type:" << kernel->type() << "flag set to false.";
111 } else if (ret != RET_OK) {
112 MS_LOG(ERROR) << "InferShape failed, type: " << kernel->type() << ", name: " << kernel->name();
113 return RET_INFER_ERR;
114 }
115 if (ret == RET_OK) {
116 ret = kernel->ReSize();
117 if (ret != RET_OK) {
118 MS_LOG(ERROR) << "kernel " << kernel->name() << " resize fail!ret = " << ret;
119 return ret;
120 }
121 }
122 }
123 return RET_OK;
124 }
125
MallocSubgraphInputs()126 int SubGraphKernel::MallocSubgraphInputs() {
127 for (auto input : in_tensors()) {
128 auto ret = lite::MallocTensorData(input);
129 if (ret != RET_OK) {
130 return ret;
131 }
132 }
133 return RET_OK;
134 }
135
InitInputTensorInitRefCount()136 void SubGraphKernel::InitInputTensorInitRefCount() {
137 for (auto &input : this->in_tensors()) {
138 int input_init_refcount = input->init_ref_count();
139 for (auto *node : nodes_) {
140 input_init_refcount +=
141 static_cast<int>(std::count_if(node->in_tensors().begin(), node->in_tensors().end(),
142 [&input](const lite::Tensor *item) { return item == input; }));
143 }
144 input->set_init_ref_count(input_init_refcount);
145 }
146 }
147
InitOutTensorInitRefCount(const std::vector<KernelExec * > * mask_kernels)148 void SubGraphKernel::InitOutTensorInitRefCount(const std::vector<KernelExec *> *mask_kernels) {
149 for (auto *node : nodes_) {
150 node->InitOutTensorInitRefCount(mask_kernels);
151 }
152 for (auto &output : this->out_tensors()) {
153 if (output->init_ref_count() == 0) { // true only when output is also an input and only exist in control-flow model
154 output->set_init_ref_count(1);
155 }
156 }
157 }
158
TopologicalSortNodes()159 int SubGraphKernel::TopologicalSortNodes() {
160 in_nodes_ = kernel::KernelExecUtil::SubgraphInputNodes(nodes_);
161 auto ret = KernelExecUtil::TopologicalSortNodes(&nodes_, in_nodes_);
162 if (ret != RET_OK) {
163 MS_LOG(ERROR) << "TopologicalSortNodes failed";
164 }
165 return ret;
166 }
167
InsertInEdge(KernelExec * kernel,KernelExec * replace_kernel,const size_t & tensor_index)168 void SubGraphKernel::InsertInEdge(KernelExec *kernel, KernelExec *replace_kernel, const size_t &tensor_index) {
169 // replace_kernel is a kernel with ont input tensor and output tensor
170 auto in_kernel = KernelExecUtil::FindInKernelForInTensor(kernel, kernel->in_tensors().at(tensor_index));
171 if (in_kernel != nullptr) {
172 in_kernel->RemoveOutKernel(kernel); // Assume there is only one tensor between in_kernel and kernel.
173 in_kernel->AddOutKernel(replace_kernel);
174 auto in_tensors = kernel->in_tensors();
175 if (std::count(in_tensors.begin(), in_tensors.end(), in_tensors[tensor_index]) == 1) {
176 kernel->RemoveInKernel(in_kernel);
177 }
178 replace_kernel->AddInKernel(in_kernel);
179 }
180 replace_kernel->AddOutKernel(kernel);
181 kernel->AddInKernel(replace_kernel);
182 kernel->set_in_tensor(replace_kernel->out_tensors().at(0), tensor_index);
183
184 nodes_.push_back(replace_kernel);
185 }
186
InsertOutEdge(KernelExec * kernel,KernelExec * replace_kernel,const size_t & tensor_index)187 void SubGraphKernel::InsertOutEdge(KernelExec *kernel, KernelExec *replace_kernel, const size_t &tensor_index) {
188 // replace_kernel is a kernel with ont input tensor and output tensor
189 auto out_kernels = KernelExecUtil::FindOutKernelsForOutTensor(kernel, kernel->out_tensors().at(tensor_index));
190 for (const auto &post_kernel : out_kernels) {
191 post_kernel->RemoveInKernel(kernel); // Assume there is only one tensor between kernel and post_kernel.
192 post_kernel->AddInKernel(replace_kernel);
193 kernel->RemoveOutKernel(post_kernel);
194 replace_kernel->AddOutKernel(post_kernel);
195 }
196 replace_kernel->AddInKernel(kernel);
197 kernel->AddOutKernel(replace_kernel);
198 kernel->set_out_tensor(replace_kernel->in_tensors().at(0), tensor_index);
199
200 nodes_.push_back(replace_kernel);
201 }
202
203 // in_kernel -> in_post_kernel -> out_pre_kernel -> out_kernels.
204 // remove in_post_kernel and out_pre_kernel, link in_kernel and out_kernels.
205 // in_post_kernel and out_pre_kernel can be the same kernel sometimes.
UpdateInOutKernels(KernelExec * in_kernel,std::vector<KernelExec * > out_kernels,KernelExec * in_post_kernel,KernelExec * out_pre_kernel)206 void SubGraphKernel::UpdateInOutKernels(KernelExec *in_kernel, std::vector<KernelExec *> out_kernels,
207 KernelExec *in_post_kernel, KernelExec *out_pre_kernel) {
208 for (const auto &out_kernel : out_kernels) {
209 out_kernel->RemoveInKernel(out_pre_kernel);
210 out_pre_kernel->RemoveOutKernel(out_kernel);
211 if (in_kernel != nullptr) {
212 out_kernel->AddInKernel(in_kernel);
213 in_kernel->AddOutKernel(out_kernel);
214 }
215 }
216
217 if (in_post_kernel != out_pre_kernel) {
218 in_post_kernel->RemoveOutKernel(out_pre_kernel);
219 out_pre_kernel->RemoveInKernel(in_post_kernel);
220 }
221
222 if (in_post_kernel->out_kernels().empty() && in_kernel != nullptr && !lite::IsContain(out_nodes_, in_post_kernel)) {
223 in_kernel->RemoveOutKernel(in_post_kernel);
224 in_post_kernel->RemoveInKernel(in_kernel);
225 }
226
227 // update subgraph input node
228 if (lite::IsContain(in_nodes_, in_post_kernel)) {
229 for (const auto &out_kernel : out_kernels) {
230 in_nodes_.push_back(out_kernel);
231 }
232 if (in_post_kernel->out_kernels().empty() && !lite::IsContain(out_nodes_, in_post_kernel)) {
233 (void)lite::VectorErase(&in_nodes_, in_post_kernel);
234 }
235 }
236
237 // update subgraph output node
238 if (lite::IsContain(out_nodes_, out_pre_kernel) && in_kernel != nullptr) {
239 in_post_kernel->RemoveInKernel(in_kernel);
240 in_kernel->RemoveOutKernel(in_post_kernel);
241 out_nodes_.push_back(in_kernel);
242 if (out_pre_kernel->in_kernels().empty() && !lite::IsContain(in_nodes_, out_pre_kernel)) {
243 (void)lite::VectorErase(&out_nodes_, out_pre_kernel);
244 }
245 }
246 }
247
248 // Update tensor according to the subgraph.
249 // Because the model input must be subgraph input, and the model output must be subgraph output.
UpdateInOutTensors(KernelExec * in_kernel,const std::vector<KernelExec * > & out_kernels,lite::Tensor * in_tensor,lite::Tensor * out_tensor,bool keep_input)250 int SubGraphKernel::UpdateInOutTensors(KernelExec *in_kernel, const std::vector<KernelExec *> &out_kernels,
251 lite::Tensor *in_tensor, lite::Tensor *out_tensor, bool keep_input) {
252 auto reserve_input = (keep_input && !lite::IsContain(out_tensors(), out_tensor)) ||
253 (!keep_input && lite::IsContain(in_tensors(), in_tensor));
254 if (reserve_input) {
255 for (const auto &post_kernel : out_kernels) {
256 CHECK_NULL_RETURN(post_kernel);
257 auto indexes = post_kernel->FindAllInTensorIndex(out_tensor);
258 for (auto &index : indexes) {
259 post_kernel->set_in_tensor(in_tensor, index);
260 }
261 }
262 } else {
263 CHECK_NULL_RETURN(in_kernel);
264 auto index = in_kernel->FindOutTensorIndex(in_tensor);
265 in_kernel->set_out_tensor(out_tensor, index);
266
267 for (const auto &out_kernel : in_kernel->out_kernels()) {
268 if (lite::IsContain(out_kernel->in_tensors(), in_tensor)) {
269 auto input_indexes = out_kernel->FindAllInTensorIndex(in_tensor);
270 for (auto input_index : input_indexes) {
271 out_kernel->set_in_tensor(out_tensor, input_index);
272 }
273 }
274 }
275 }
276 return RET_OK;
277 }
278
279 // Remove a single way kernel.
280 // Before removing, pre_kernel -> in_tensor -> kernel -> out_tensor -> post_kernel.
281 // Keep_input is true, reserve the input tensor: pre_kernel -> in_tensor -> post_kernel.
282 // Keep_input is false, reserve the output tensor: pre_kernel -> out_tensor -> post_kernel.
DeleteSingleWayNode(KernelExec * kernel,bool keep_input)283 int SubGraphKernel::DeleteSingleWayNode(KernelExec *kernel, bool keep_input) {
284 if (lite::IsContain(in_nodes_, kernel) && lite::IsContain(out_nodes_, kernel)) {
285 MS_LOG(INFO) << "A single kernel subgraph can't delete this kernel.";
286 return RET_OK;
287 }
288 auto in_tensor = kernel->in_tensors().at(0);
289 auto out_tensor = kernel->out_tensors().at(0);
290 auto in_kernel = KernelExecUtil::FindInKernelForInTensor(kernel, in_tensor);
291 auto out_kernels = KernelExecUtil::FindOutKernelsForOutTensor(kernel, out_tensor);
292 if (in_kernel == nullptr && out_kernels.empty()) {
293 MS_LOG(INFO) << "A single kernel model can't delete this kernel.";
294 return RET_OK;
295 }
296
297 // update kernel link
298 UpdateInOutKernels(in_kernel, out_kernels, kernel, kernel);
299
300 // update tensor link
301 auto ret = UpdateInOutTensors(in_kernel, out_kernels, in_tensor, out_tensor, keep_input);
302 if (ret != RET_OK) {
303 MS_LOG(ERROR) << "Update tensor failed when removing kernel " << kernel->name();
304 return RET_ERROR;
305 }
306 DropNode(kernel);
307 delete kernel;
308 return RET_OK;
309 }
310
DropNode(KernelExec * node)311 void SubGraphKernel::DropNode(KernelExec *node) {
312 lite::VectorErase(&nodes_, node);
313 lite::VectorErase(&in_nodes_, node);
314 lite::VectorErase(&out_nodes_, node);
315 }
316
SubGraphSplitByOperator(KernelsArray * kernels_array)317 int SubGraphKernel::SubGraphSplitByOperator(KernelsArray *kernels_array) {
318 kernels_array->units.clear();
319 auto graph_input = this->in_tensors();
320 std::vector<KernelExec *> nodes_tmp = nodes_;
321 size_t kernels_num = nodes_tmp.size();
322 for (size_t kernel_index = 0; kernel_index < kernels_num; kernel_index++) {
323 auto kernel = nodes_tmp[kernel_index];
324 if (kernel == nullptr) {
325 continue;
326 }
327 MS_CHECK_TRUE_MSG(kernel->subgraph_type() == kernel::kNotSubGraph, RET_ERROR, "node cannot be a subgraph.");
328 kernels_array->units.push_back({});
329 size_t now_index = kernels_array->units.size() - 1;
330 kernels_array->units.at(now_index).kernels.push_back(kernel);
331 for (auto in_kernel : kernel->in_kernels()) {
332 for (size_t i = 0; i < now_index; i++) {
333 if (lite::IsContain(kernels_array->units.at(i).kernels, in_kernel)) {
334 kernels_array->units.at(now_index).input_indexs.push_back(i);
335 kernels_array->units.at(i).output_indexs.push_back(now_index);
336 }
337 }
338 }
339 bool is_graph_input = true;
340 for (auto &in_tensor : kernel->in_tensors()) {
341 if (!(lite::IsContain(graph_input, in_tensor) || in_tensor->IsGraphInput() || in_tensor->IsConst())) {
342 is_graph_input = false;
343 }
344 }
345 if (is_graph_input) {
346 if (kernel->in_kernels().size() != 0) {
347 MS_LOG(ERROR) << "graph input node in_kernels num invalid!";
348 return RET_ERROR;
349 }
350 kernels_array->graph_input.push_back(now_index);
351 } else if (kernel->in_kernels().size() == 0) {
352 MS_LOG(ERROR) << "graph input node invalid!";
353 return RET_ERROR;
354 }
355 MS_CHECK_TRUE_MSG(std::find_if(kernel->in_kernels().begin(), kernel->in_kernels().end(),
356 [kernel](KernelExec *in_kernel) {
357 return !lite::IsContain(in_kernel->out_kernels(), kernel);
358 }) == kernel->in_kernels().end(),
359 RET_ERROR, "Invalid input and output structure of nodes in the graph.");
360 MS_CHECK_TRUE_MSG(std::find_if(kernel->out_kernels().begin(), kernel->out_kernels().end(),
361 [kernel](KernelExec *out_kernel) {
362 return !lite::IsContain(out_kernel->in_kernels(), kernel);
363 }) == kernel->out_kernels().end(),
364 RET_ERROR, "Invalid input and output structure of nodes in the graph.");
365 while ((kernel->out_kernels().size() == 1) && (kernel->out_kernels().front()->in_kernels().size() == 1)) {
366 kernel = kernel->out_kernels().front();
367 size_t i;
368 for (i = kernel_index + 1; i < kernels_num; i++) {
369 if (nodes_tmp[i] == kernel) {
370 break;
371 }
372 }
373 if (i < kernels_num) {
374 nodes_tmp[i] = nullptr;
375 } else {
376 MS_LOG(ERROR) << "graph structure invalid!";
377 return RET_ERROR;
378 }
379 kernels_array->units.at(now_index).kernels.push_back(kernel);
380 }
381 }
382 return RET_OK;
383 }
384
Prepare()385 int CustomSubGraph::Prepare() {
386 auto ret = SubGraphKernel::Prepare();
387 if (ret != RET_OK) {
388 return ret;
389 }
390 if (nodes_.size() < 1) {
391 return RET_OK;
392 }
393 auto provider = nodes_[0]->desc().provider;
394 auto context = this->Context();
395 AllocatorPtr allocator = context->allocator;
396 auto iter = std::find_if(context->device_list_.begin(), context->device_list_.end(),
397 [&provider](const auto &dev) { return dev.provider_ == provider; });
398 if (iter != context->device_list_.end()) {
399 allocator = iter->allocator_;
400 }
401
402 for (size_t i = 0; i < nodes_.size() - 1; ++i) {
403 auto node = nodes_[i];
404 for (auto tensor : node->out_tensors()) {
405 MS_ASSERT(tensor != nullptr);
406 if (tensor->allocator() == nullptr) {
407 tensor->set_allocator(allocator);
408 }
409 }
410 }
411
412 auto node = nodes_[nodes_.size() - 1];
413 for (auto tensor : node->out_tensors()) {
414 MS_ASSERT(tensor != nullptr);
415 if (tensor->allocator() == nullptr) {
416 tensor->set_allocator(context->allocator);
417 }
418 }
419 return RET_OK;
420 }
421
Execute(const KernelCallBack & before,const KernelCallBack & after)422 int CustomSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) {
423 for (auto kernel : nodes_) {
424 MS_ASSERT(kernel != nullptr);
425 auto ret = kernel->Execute(before, after);
426 if (ret != RET_OK) {
427 MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
428 return ret;
429 }
430 }
431
432 return RET_OK;
433 }
434
Prepare()435 int CpuSubGraph::Prepare() {
436 auto ret = SubGraphKernel::Prepare();
437 if (ret != RET_OK) {
438 return ret;
439 }
440 for (auto node : nodes_) {
441 for (auto tensor : node->out_tensors()) {
442 MS_ASSERT(tensor != nullptr);
443 if (tensor->allocator() == nullptr) {
444 tensor->set_allocator(this->Context()->allocator);
445 }
446 }
447 }
448 for (auto &out : this->out_tensors()) {
449 if (out->allocator() == nullptr) {
450 out->set_allocator(this->Context()->allocator);
451 }
452 }
453 return RET_OK;
454 }
455
Execute(const KernelCallBack & before,const KernelCallBack & after)456 int CpuSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) {
457 MS_ASSERT(this->Context()->allocator.get() != nullptr);
458 for (auto *kernel : nodes_) {
459 MS_ASSERT(kernel != nullptr);
460 auto ret = kernel->Execute(before, after);
461 if (ret != RET_OK) {
462 MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
463 return ret;
464 }
465 }
466 return RET_OK;
467 }
468
469 #if defined(ENABLE_ARM) && defined(ENABLE_FP16) && !defined(ENABLE_MINDRT)
Execute(const KernelCallBack & before,const KernelCallBack & after)470 int CpuFp16SubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) {
471 MS_ASSERT(this->Context()->allocator.get() != nullptr);
472 auto ret = this->PreProcess();
473 if (RET_OK != ret) {
474 MS_LOG(ERROR) << "PreProcess kernel failed, name: " << this->name();
475 return ret;
476 }
477 for (auto *kernel : nodes_) {
478 MS_ASSERT(kernel != nullptr);
479 ret = kernel->Execute(before, after);
480 if (ret != RET_OK) {
481 MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
482 return ret;
483 }
484 }
485 ret = this->PostProcess();
486 if (RET_OK != ret) {
487 MS_LOG(ERROR) << "PostProcess kernel failed, name: " << this->name();
488 return ret;
489 }
490 return RET_OK;
491 }
492
FreeOriginInputData()493 void CpuFp16SubGraph::FreeOriginInputData() {
494 for (auto *data_store : this->origin_input_data_) {
495 if (data_store == nullptr) {
496 continue;
497 }
498 // free data in data_store
499 if (data_store->data_ != nullptr) {
500 if (data_store->allocator_ == nullptr) {
501 free(data_store->data_);
502 } else {
503 data_store->allocator_->Free(data_store->data_);
504 }
505 }
506 // free data_store
507 free(data_store);
508 data_store = nullptr;
509 }
510 this->origin_input_data_.clear();
511 }
512
PreProcess()513 int CpuFp16SubGraph::PreProcess() {
514 if (!support_fp16_) {
515 MS_LOG(ERROR) << "Unsupported fp16 in this devices";
516 return RET_ERROR;
517 }
518 MS_ASSERT(origin_input_data_.empty());
519 for (auto tensor : this->in_tensors()) {
520 MS_ASSERT(tensor != nullptr);
521 if (tensor->data_type() == kNumberTypeFloat32) {
522 auto float32_data = tensor->data();
523 MS_ASSERT(float32_data != nullptr);
524 auto tensor_own_data = tensor->own_data();
525 tensor->set_data(nullptr);
526 tensor->set_data_type(TypeId::kNumberTypeFloat16);
527 auto tmp_data = malloc(tensor->Size());
528 if (tmp_data == nullptr) {
529 MS_LOG(ERROR) << "malloc data failed";
530 this->FreeOriginInputData();
531 return RET_ERROR;
532 }
533 tensor->set_data(tmp_data);
534 MS_ASSERT(tensor->data() != nullptr);
535 Float32ToFloat16_fp16_handler(float32_data, tensor->data(), tensor->ElementsNum(), support_fp16_);
536 auto *data_store = DataStore::CreateDataStore(float32_data, tensor_own_data, tensor->allocator().get());
537 if (data_store == nullptr) {
538 MS_LOG(ERROR) << "Create DataStore failed";
539 this->FreeOriginInputData();
540 return RET_ERROR;
541 }
542 tensor->set_allocator(nullptr);
543 origin_input_data_.emplace_back(data_store);
544 } else {
545 origin_input_data_.emplace_back(nullptr);
546 }
547 }
548 for (auto kernel : this->nodes_) {
549 for (auto tensor : kernel->out_tensors()) {
550 if (kernel->type() == schema::PrimitiveType_Cast) {
551 continue;
552 }
553 if (tensor->data_type() == kNumberTypeFloat32) {
554 tensor->set_data_type(kNumberTypeFloat16);
555 }
556 }
557 }
558 return RET_OK;
559 }
560
PostProcess()561 int CpuFp16SubGraph::PostProcess() {
562 if (!support_fp16_) {
563 MS_LOG(ERROR) << "Unsupported fp16 in this devices";
564 return RET_ERROR;
565 }
566 for (auto tensor : this->out_tensors()) {
567 MS_ASSERT(tensor != nullptr);
568 if (tensor->data_type() == kNumberTypeFloat16) {
569 auto float16_data = tensor->data();
570 MS_ASSERT(float16_data != nullptr);
571 tensor->set_data(nullptr);
572 tensor->set_data_type(TypeId::kNumberTypeFloat32);
573 auto tmp_data = malloc(tensor->Size());
574 if (tmp_data == nullptr) {
575 MS_LOG(ERROR) << "malloc data failed";
576 if (this->context_ != nullptr && this->context_->allocator != nullptr) {
577 this->context_->allocator->Free(float16_data);
578 } else {
579 free(float16_data);
580 }
581 return RET_ERROR;
582 }
583 tensor->set_data(tmp_data);
584 MS_ASSERT(tensor->data() != nullptr);
585 Float16ToFloat32_fp16_handler(float16_data, tensor->data(), tensor->ElementsNum(), support_fp16_);
586 if (tensor->allocator() != nullptr) {
587 tensor->allocator()->Free(float16_data);
588 } else {
589 free(float16_data);
590 }
591 tensor->set_allocator(nullptr);
592 }
593 }
594 MS_ASSERT(this->origin_input_data_.size() == this->in_tensors().size());
595 for (size_t i = 0; i < this->in_tensors().size(); i++) {
596 auto tensor = in_tensors().at(i);
597 MS_ASSERT(tensor != nullptr);
598 auto origin_tensor_data = origin_input_data_.at(i);
599 if (tensor->data_type() == kNumberTypeFloat16 && origin_tensor_data != nullptr) {
600 if (!origin_tensor_data->own_data_ || (tensor->data() != nullptr)) {
601 MS_ASSERT(tensor != nullptr);
602 free(tensor->data());
603 MS_ASSERT(origin_tensor_data->data_ != nullptr);
604 tensor->set_data(origin_tensor_data->data_, origin_tensor_data->own_data_);
605 tensor->set_data_type(kNumberTypeFloat32);
606 origin_tensor_data->data_ = nullptr;
607 }
608 }
609 }
610 this->FreeOriginInputData();
611 return RET_OK;
612 }
613 #endif
614
Prepare()615 int AclSubGraph::Prepare() {
616 auto ret = SubGraphKernel::Prepare();
617 if (ret != RET_OK) {
618 return ret;
619 }
620 for (auto node : nodes_) {
621 for (auto tensor : node->out_tensors()) {
622 MS_ASSERT(tensor != nullptr);
623 if (tensor->allocator() == nullptr) {
624 tensor->set_allocator(this->Context()->allocator);
625 }
626 }
627 }
628 for (auto &out : this->out_tensors()) {
629 if (out->allocator() == nullptr) {
630 out->set_allocator(this->Context()->allocator);
631 }
632 }
633 return RET_OK;
634 }
635
Execute(const KernelCallBack & before,const KernelCallBack & after)636 int AclSubGraph::Execute(const KernelCallBack &before, const KernelCallBack &after) {
637 MS_ASSERT(this->Context()->allocator.get() != nullptr);
638 for (auto *kernel : nodes_) {
639 MS_ASSERT(kernel != nullptr);
640 auto ret = kernel->Execute(before, after);
641 if (ret != RET_OK) {
642 MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
643 return ret;
644 }
645 }
646 return RET_OK;
647 }
648 } // namespace mindspore::kernel
649