• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <utility>
18 #include <algorithm>
19 #include "src/litert/lite_mindrt.h"
20 #include "mindrt/include/mindrt.hpp"
21 #include "src/litert/kernel_exec_util.h"
22 #include "src/common/tensor_util.h"
23 #include "src/common/common.h"
24 #include "src/litert/inner_allocator.h"
25 #include "src/litert/kernel/cpu/base/partial_fusion.h"
26 #include "src/control_flow/control_actor_creator.h"
27 
28 namespace mindspore::lite {
RunOpData(OpData<lite::Tensor> * inputs,OpContext<lite::Tensor> * context)29 void LiteOpActor::RunOpData(OpData<lite::Tensor> *inputs, OpContext<lite::Tensor> *context) {
30   if (inputs == nullptr || context == nullptr) {
31     MS_LOG(ERROR) << "param is nullptr.";
32     return;
33   }
34   auto op_uuid = context->sequential_num_;
35   input_op_datas_[op_uuid].push_back(inputs);
36   inputs_data_[inputs->index_] = inputs->data_;
37   if (input_op_datas_[op_uuid].size() < kernel_->in_tensors().size()) {
38     return;
39   }
40 
41   auto ret = InitInputData();
42   if (ret != RET_OK) {
43     MS_LOG(ERROR) << "run kernel failed, name: " << kernel_->name();
44     context->SetFailed(ret);
45     return;
46   }
47 
48   ret = kernel_->Execute(*(reinterpret_cast<const KernelCallBack *>(context->kernel_call_back_before_)),
49                          *(reinterpret_cast<const KernelCallBack *>(context->kernel_call_back_after_)));
50   input_op_datas_.erase(op_uuid);
51   if (ret != RET_OK) {
52     MS_LOG(ERROR) << "run kernel failed, name: " << kernel_->name();
53     context->SetFailed(ret);
54     return;
55   }
56   AsyncOutput(context);
57   SetOutputData(context);
58   return;
59 }
60 
OfflineIsolated(const std::vector<kernel::KernelExec * > & kernels,const kernel::KernelExec & this_kernel,const lite::Tensor & this_input_tensor)61 bool OfflineIsolated(const std::vector<kernel::KernelExec *> &kernels, const kernel::KernelExec &this_kernel,
62                      const lite::Tensor &this_input_tensor) {
63   if (this_input_tensor.IsGraphInput()) {
64     return false;
65   }
66   for (auto &kernel : kernels) {
67     if (kernel == &this_kernel) {
68       continue;
69     }
70     if (std::any_of(kernel->out_tensors().begin(), kernel->out_tensors().end(),
71                     [&this_input_tensor](const lite::Tensor *tensor) { return tensor == &this_input_tensor; })) {
72       return false;
73     }
74   }
75   return true;
76 }
77 
GetSubgraphInTensorDataType(const kernel::KernelExec * kernel,const lite::Tensor * tensor)78 TypeId GetSubgraphInTensorDataType(const kernel::KernelExec *kernel, const lite::Tensor *tensor) {
79   if (kernel == nullptr || tensor == nullptr) {
80     return kTypeUnknown;
81   }
82 #ifdef ENABLE_LITE_ACL
83   if (kernel->subgraph_type() == kernel::kCustomSubGraph) {
84     return tensor->data_type();
85   }
86 #endif
87   if (kernel->subgraph_type() == kernel::kAclSubGraph) {
88     return tensor->data_type();
89   }
90   if (kernel->subgraph_type() != kernel::kGpuFp16SubGraph || tensor->IsGraphInput() || tensor->IsGraphOutput()) {
91     if (tensor->data_type() == kNumberTypeFloat16 || tensor->data_type() == kNumberTypeFloat32) {
92       return kernel->desc().data_type;
93     }
94   }
95   return tensor->data_type();
96 }
97 
PreInit(std::vector<std::shared_ptr<LiteOpActor>> * actors,std::unordered_map<Tensor *,Tensor * > * input_map)98 int LiteOpActor::PreInit(std::vector<std::shared_ptr<LiteOpActor>> *actors,
99                          std::unordered_map<Tensor *, Tensor *> *input_map) {
100   if (actors == nullptr || input_map == nullptr) {
101     return RET_ERROR;
102   }
103   return IsolateInputData(actors, input_map);
104 }
PostInit()105 int LiteOpActor::PostInit() { return PrepareOutputData(); }
106 
IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> * actors,std::unordered_map<Tensor *,Tensor * > * input_map)107 int LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *actors,
108                                   std::unordered_map<Tensor *, Tensor *> *input_map) {
109   if (actors == nullptr || input_map == nullptr) {
110     return RET_ERROR;
111   }
112   isolate_input_map_ = input_map;
113   std::vector<kernel::KernelExec *> kernels{};
114   std::transform(actors->begin(), actors->end(), std::back_inserter(kernels),
115                  [](const std::shared_ptr<LiteOpActor> &actor) { return actor->kernel_; });
116   size_t in_tensor_size = kernel_->in_tensors().size();
117   for (size_t i = 0; i < in_tensor_size; i++) {
118     Tensor *old_tensor = kernel_->in_tensors()[i];
119 
120     if (OfflineIsolated(kernels, *kernel_, *old_tensor)) {
121       if (old_tensor->data_type() == kNumberTypeFloat16 || old_tensor->data_type() == kNumberTypeFloat32) {
122         old_tensor->set_data_type(kernel_->desc().data_type);
123       }
124       SetTensorListTensorDataType(kernel_->desc().data_type, old_tensor);
125       if (kernel_->Context() == nullptr) {
126         MS_LOG(ERROR) << "kernel_->Context() is nullptr.";
127         return RET_NULL_PTR;
128       }
129       old_tensor->set_allocator(kernel_->Context()->allocator);
130       continue;
131     }
132 
133     TypeId new_data_type = GetSubgraphInTensorDataType(kernel_, old_tensor);
134     Tensor *new_tensor =
135       new (std::nothrow) Tensor(new_data_type, old_tensor->shape(), old_tensor->format(), old_tensor->category());
136     if (new_tensor == nullptr) {
137       MS_LOG(ERROR) << "new Tensor failed.";
138       return RET_NULL_PTR;
139     }
140     new_tensor->set_allocator(old_tensor->allocator());
141     if (new_tensor->allocator() == nullptr && kernel_->Context() != nullptr &&
142         kernel_->desc().arch != kernel::kDelegate) {
143       new_tensor->set_allocator(kernel_->Context()->allocator);
144     }
145 
146     new_tensor->set_tensor_name(kernel_->name() + "_duplicate_" + old_tensor->tensor_name());
147     for (LiteQuantParam quant : old_tensor->quant_params()) {
148       new_tensor->AddQuantParam(quant);
149     }
150     isolate_input_map_->insert(std::make_pair(new_tensor, old_tensor));
151     auto ret = kernel::KernelExecUtil::ReplaceSubGraphNodesInTensor(kernel_, old_tensor, new_tensor);
152     if (ret != RET_OK) {
153       MS_LOG(ERROR) << "ReplaceSubGraphNodesInTensor failed.";
154       return ret;
155     }
156 
157     // for case that subgraph input is subgraph output, replace old_tensor with new_tensor
158     ctx_->ReplaceLinkInfoSenderWithNewOne(new_tensor, old_tensor);
159 
160     // keep new link info for isolate input data case.
161     ctx_->SetLinkInfo(old_tensor, new_tensor);
162 
163     /* set subgraph input for copy data */
164     kernel_->set_in_tensor(new_tensor, i);
165   }
166 
167   for (auto &item : *isolate_input_map_) {
168     ctx_->ReplaceLinkInfoReceiverWithNewOne(item.first, item.second);
169   }
170 
171   return RET_OK;
172 }
173 
ResizeGraphInput(const std::vector<mindspore::lite::Tensor * > & inputs,const std::vector<std::vector<int>> & dims)174 int LiteOpActor::ResizeGraphInput(const std::vector<mindspore::lite::Tensor *> &inputs,
175                                   const std::vector<std::vector<int>> &dims) {
176   for (auto map : *isolate_input_map_) {
177     auto isolate_tensor = map.first;
178     auto src_tensor = map.second;
179     for (size_t i = 0; i < inputs.size(); i++) {
180       if (src_tensor == inputs[i]) {
181         isolate_tensor->FreeData();
182         isolate_tensor->set_shape(dims[i]);
183       }
184     }
185   }
186   return RET_OK;
187 }
188 
CompileArrow(const std::unordered_map<void *,std::set<std::pair<AID,size_t>>> & receivers_map)189 int LiteOpActor::CompileArrow(const std::unordered_map<void *, std::set<std::pair<AID, size_t>>> &receivers_map) {
190   auto ret = UpdateActorOutput();
191   if (ret != RET_OK) {
192     MS_LOG(ERROR) << "update actor output failed.";
193     return ret;
194   }
195 
196   return CompileArrowThroughOutputTensors(receivers_map);
197 }
198 
UpdateActorOutput()199 int LiteOpActor::UpdateActorOutput() {
200   if (kernel_->desc().arch == kernel::kDelegate) {
201     MS_LOG(DEBUG) << "no need for delegate kernel.";
202     return RET_OK;
203   }
204   auto *subgraph_kernel = reinterpret_cast<kernel::SubGraphKernel *>(kernel_);
205   if (subgraph_kernel == nullptr) {
206     MS_LOG(INFO) << "kernel is not subgraph kernel, no partial call.";
207     return RET_OK;
208   }
209   auto output_kernels = subgraph_kernel->out_nodes();
210   std::vector<kernel::KernelExec *> call_kernels{};
211   for (auto output_kernel : output_kernels) {
212     if (output_kernel->type() == schema::PrimitiveType_Call) {
213       call_kernels.push_back(output_kernel);
214     }
215   }
216   if (call_kernels.empty()) {
217     MS_LOG(DEBUG) << "not end with call kernel, no need to update output.";
218     return RET_OK;
219   }
220   if (call_kernels.size() != 1) {
221     MS_LOG(ERROR) << "not support many call kernels in one subgraph.";
222     return RET_NOT_SUPPORT;
223   }
224   call_node_ = call_kernels.front();
225 
226   // erase call output tensor
227   auto origin_output_tensors = kernel_->out_tensors();
228   auto call_output_tensors = call_node_->out_tensors();
229 
230   for (auto iter = origin_output_tensors.begin(); iter != origin_output_tensors.end();) {
231     if (IsContain(call_output_tensors, *iter)) {
232       iter = origin_output_tensors.erase(iter);
233     } else {
234       ++iter;
235     }
236   }
237 
238   auto partial_nodes = kernel::KernelExecUtil::GetCallInputPartials(call_node_);
239   if (partial_nodes.size() != 1) {
240     MS_LOG(ERROR) << "partial output is not right.";
241     return RET_ERROR;
242   }
243   partial_node_ = partial_nodes.front();
244   (void)std::copy(partial_node_->in_tensors().begin(), partial_node_->in_tensors().end(),
245                   std::back_inserter(origin_output_tensors));
246 
247   kernel_->set_out_tensors(origin_output_tensors);
248 
249   subgraph_kernel->DropNode(partial_node_);
250   subgraph_kernel->DropNode(call_node_);
251   return RET_OK;
252 }
253 
ArrowHasCompiled(const AID & actor_name,size_t to_index,const std::unordered_map<AID,std::set<size_t>> & receiver_index_set)254 bool LiteOpActor::ArrowHasCompiled(const AID &actor_name, size_t to_index,
255                                    const std::unordered_map<AID, std::set<size_t>> &receiver_index_set) {
256   auto iter = receiver_index_set.find(actor_name);
257   if (iter != receiver_index_set.end()) {
258     return iter->second.find(to_index) != iter->second.end();
259   }
260   return false;
261 }
262 
MarkArrowAsCompiled(const AID * actor_name,size_t to_index,std::unordered_map<AID,std::set<size_t>> * receiver_index_set)263 void LiteOpActor::MarkArrowAsCompiled(const AID *actor_name, size_t to_index,
264                                       std::unordered_map<AID, std::set<size_t>> *receiver_index_set) {
265   if (actor_name == nullptr || receiver_index_set == nullptr) {
266     return;
267   }
268   if (receiver_index_set->find(*actor_name) == receiver_index_set->end()) {
269     std::set<size_t> tmp{to_index};
270     receiver_index_set->insert(std::pair<AID, std::set<size_t>>(*actor_name, tmp));
271   } else {
272     (void)receiver_index_set->at(*actor_name).insert(to_index);
273   }
274 }
275 
CreateCommonArrow(const std::unordered_map<void *,std::set<std::pair<AID,size_t>>> & receivers_map,const std::set<void * > & receiver_tensors,const size_t & output_index,std::unordered_map<AID,std::set<size_t>> * receiver_index_set)276 int LiteOpActor::CreateCommonArrow(const std::unordered_map<void *, std::set<std::pair<AID, size_t>>> &receivers_map,
277                                    const std::set<void *> &receiver_tensors, const size_t &output_index,
278                                    std::unordered_map<AID, std::set<size_t>> *receiver_index_set) {
279   if (receiver_index_set == nullptr) {
280     return RET_ERROR;
281   }
282   std::unordered_map<void *, std::set<std::pair<AID, size_t>>>::const_iterator iter;
283   for (auto receiver_tensor : receiver_tensors) {
284     iter = receivers_map.find(receiver_tensor);
285     if (iter == receivers_map.end()) {
286       MS_LOG(DEBUG) << "not a useful receiver.";
287       continue;
288     }
289     auto receiver_set = iter->second;
290     for (auto item : receiver_set) {
291       if (ArrowHasCompiled(item.first, item.second, *receiver_index_set)) {
292         continue;
293       }
294       MarkArrowAsCompiled(&(item.first), item.second, receiver_index_set);
295       auto arrow = std::make_shared<DataArrow>(output_index, item.first, item.second);
296       MS_CHECK_TRUE_MSG(arrow != nullptr, RET_ERROR, "create arrow failed.");
297       output_data_arrows_.push_back(arrow);
298     }
299   }
300   return RET_OK;
301 }
302 
CreateEmptyArrow(const size_t & output_index)303 int LiteOpActor::CreateEmptyArrow(const size_t &output_index) {
304   AID non;
305   auto arrow = std::make_shared<DataArrow>(output_index, non, output_index);
306   MS_CHECK_TRUE_MSG(arrow != nullptr, RET_ERROR, "create arrow failed.");
307   output_data_arrows_.push_back(arrow);
308   return RET_OK;
309 }
310 
CompileArrowThroughOutputTensors(const std::unordered_map<void *,std::set<std::pair<AID,size_t>>> & receivers_map)311 int LiteOpActor::CompileArrowThroughOutputTensors(
312   const std::unordered_map<void *, std::set<std::pair<AID, size_t>>> &receivers_map) {
313   auto output_tensors = this->kernel_->out_tensors();
314   auto output_tensors_size = output_tensors.size();
315 
316   std::unordered_map<AID, std::set<size_t>> receiver_index_set{};
317   for (size_t i = 0; i < output_tensors_size; ++i) {
318     auto receiver_tensors = ctx_->GetLinkInfo(output_tensors[i]);
319     if (receiver_tensors.empty()) {
320       MS_LOG(DEBUG) << "create when running.";
321       auto ret = CreateEmptyArrow(i);
322       if (ret != RET_OK) {
323         MS_LOG(ERROR) << "CreateEmptyArrow failed, output tensor name: " << output_tensors[i]->tensor_name();
324         return ret;
325       }
326       continue;
327     }
328     auto ret = CreateCommonArrow(receivers_map, receiver_tensors, i, &receiver_index_set);
329     if (ret != RET_OK) {
330       MS_LOG(ERROR) << "CreateCommonArrow failed, output tensor name: " << output_tensors[i]->tensor_name();
331       return ret;
332     }
333   }
334   return RET_OK;
335 }
336 
SetInputShape()337 int LiteOpActor::SetInputShape() {
338   auto ret = RET_OK;
339   for (size_t i = 0; i < inputs_data_.size(); ++i) {
340     auto &input_tensor = kernel_->in_tensors()[i];
341     if (input_tensor->shape() == inputs_data_[i]->shape()) {
342       continue;
343     }
344     ret = SetTensorShape(input_tensor, inputs_data_[i]);
345     MS_CHECK_FALSE_MSG(ret != RET_OK, ret, "set input shape failed.");
346   }
347   return RET_OK;
348 }
349 
AssignInputData()350 int LiteOpActor::AssignInputData() {
351   auto ret = RET_OK;
352   for (size_t i = 0; i < inputs_data_.size(); ++i) {
353     auto dst_tensor = kernel_->in_tensors()[i];
354     auto src_tensor = inputs_data_[i];
355     dst_tensor->set_shape_changed(src_tensor->get_shape_changed());
356     if (dst_tensor->init_ref_count() == 0) {
357       src_tensor->DecRefCount();
358       continue;
359     }
360     if (NeedCastData(dst_tensor, src_tensor)) {
361       ret = CastTensorData(dst_tensor, src_tensor, support_fp16_);
362       MS_CHECK_FALSE_MSG(ret != RET_OK, ret, "CastTensorData failed.");
363       continue;
364     }
365     /* same data-type  */
366     if (src_tensor->allocator() == nullptr || src_tensor->IsGraphInput()) {
367       // delegate graph kernel output tensor
368       ret = SetTensorData(dst_tensor, src_tensor);
369       MS_CHECK_FALSE_MSG(ret != RET_OK, ret, "SetTensorData failed.");
370     } else {
371       ret = MoveTensorData(dst_tensor, src_tensor);
372       MS_CHECK_FALSE_MSG(ret != RET_OK, ret, "MoveTensorData failed.");
373     }
374   }
375   return ret;
376 }
377 
NeedResize()378 bool LiteOpActor::NeedResize() {
379   for (size_t i = 0; i < inputs_data_.size(); ++i) {
380     auto &subgraph_input = kernel_->in_tensors()[i];
381     auto &cur_input = inputs_data_[i];
382     if (!IsSameShape(subgraph_input, cur_input)) {
383       return true;
384     }
385   }
386   return false;
387 }
388 
InitInputData()389 int LiteOpActor::InitInputData() {
390   for (size_t i = 0; i < inputs_data_.size(); ++i) {
391     if (inputs_data_[i] == nullptr) {
392       MS_LOG(ERROR) << "inputs_data_ nullptr, index: " << i;
393       return RET_ERROR;
394     }
395   }
396   bool need_resize = NeedResize();
397   auto ret = SetInputShape();
398   MS_CHECK_FALSE_MSG(ret != RET_OK, ret, "Set input shape failed.");
399   if (need_resize) {
400     auto subgraph_kernel = reinterpret_cast<kernel::SubGraphKernel *>(kernel_);
401     MS_CHECK_FALSE_MSG(subgraph_kernel == nullptr, RET_ERROR, "Lite actor, cast kernel to subgraph kernel failed.");
402     ret = subgraph_kernel->MallocSubgraphInputs();
403     MS_CHECK_FALSE_MSG(ret != RET_OK, ret, "Subgraph kernel MallocSubgraphInputs failed.");
404   }
405   ret = AssignInputData();
406   MS_CHECK_FALSE_MSG(ret != RET_OK, ret, "Subgraph kernel AssignInputData failed.");
407   if (need_resize) {
408     auto subgraph_kernel = reinterpret_cast<kernel::SubGraphKernel *>(kernel_);
409     ret = subgraph_kernel->ReSize();
410     MS_CHECK_FALSE_MSG((ret != RET_OK) && (ret != RET_INFER_INVALID), ret, "Subgraph kernel Resize failed.");
411   }
412   return RET_OK;
413 }
414 
AsyncOutput(OpContext<Tensor> * context)415 void LiteOpActor::AsyncOutput(OpContext<Tensor> *context) {
416   if (context == nullptr) {
417     return;
418   }
419   auto output_size = output_data_arrows_.size();
420   for (size_t i = 0; i < output_size; ++i) {
421     auto data = outputs_data_[i];
422     Async(output_data_arrows_[i]->to_op_id_, get_actor_mgr(), &mindspore::OpActor<Tensor>::RunOpData, data.get(),
423           context);
424   }
425 }
426 
AddResultIndex(size_t index,size_t tensor_index)427 void LiteOpActor::AddResultIndex(size_t index, size_t tensor_index) {
428   results_index_.push_back(index);
429   results_tensor_index_.push_back(tensor_index);
430 }
431 
SetOutputData(const OpContext<Tensor> * context)432 void LiteOpActor::SetOutputData(const OpContext<Tensor> *context) {
433   if (context == nullptr) {
434     return;
435   }
436   for (auto index : results_index_) {
437     context->SetResult(index, RET_OK);
438   }
439 }
440 
PrepareOutputData()441 int LiteOpActor::PrepareOutputData() {
442   outputs_data_.resize(output_data_arrows_.size());
443   for (size_t i = 0; i < output_data_arrows_.size(); i++) {
444     auto &arrow = output_data_arrows_[i];
445     auto data = std::make_shared<OpData<Tensor>>(this->GetAID(), (kernel_->out_tensors()).at(arrow->from_output_index_),
446                                                  static_cast<int>(arrow->to_input_index_));
447     if (MS_UNLIKELY(data == nullptr)) {
448       MS_LOG(ERROR) << "new output_data failed.";
449       return RET_NULL_PTR;
450     }
451     outputs_data_[i] = data;
452   }
453   return RET_OK;
454 }
455 
CreateOpActor(const std::vector<kernel::KernelExec * > & kernels,lite::InnerContext * ctx,const std::shared_ptr<ActorMgr> & mgr)456 std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel::KernelExec *> &kernels,
457                                                         lite::InnerContext *ctx, const std::shared_ptr<ActorMgr> &mgr) {
458   MS_CHECK_TRUE_RET(ctx != nullptr, {});
459   std::vector<std::shared_ptr<LiteOpActor>> actors;
460   ActorThreadPool *thread_pool = reinterpret_cast<ActorThreadPool *>(ctx->thread_pool_);
461   if (thread_pool == nullptr) {
462     MS_LOG(ERROR) << "thread pool is nullptr";
463     return actors;
464   }
465   actors.reserve(kernels.size());
466   for (auto &kernel : kernels) {
467     /* make subgraph name (actor name) unique */
468     kernel->set_name(kernel->name() + "_" + std::to_string(actor_count++));
469     std::shared_ptr<LiteOpActor> actor = CreateActor(kernel, ctx);
470     if (actor == nullptr) {
471       MS_LOG(ERROR) << "create LiteOpActor failed: " << kernel->name();
472       actors.clear();
473       return actors;
474     }
475     actor->set_thread_pool(thread_pool);
476     actor->set_actor_mgr(mgr);
477     actors.push_back(actor);
478   }
479 
480   for (auto &actor : actors) {
481     auto aid = mindspore::Spawn(actor);
482   }
483   return actors;
484 }
485 
MindrtInit()486 int MindrtInit() { return mindspore::Initialize("", "", "", ""); }
487 
MindrtTerminate(const std::vector<std::shared_ptr<LiteOpActor>> & actor_list,const std::shared_ptr<ActorMgr> & actor_mgr)488 void MindrtTerminate(const std::vector<std::shared_ptr<LiteOpActor>> &actor_list,
489                      const std::shared_ptr<ActorMgr> &actor_mgr) {
490   for (const auto &actor : actor_list) {
491     mindspore::Terminate(actor->GetAID(), actor_mgr);
492   }
493 }
494 }  // namespace mindspore::lite
495