1 /**
2 * Copyright 2021-2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <utility>
18 #include <algorithm>
19 #include "src/litert/lite_mindrt.h"
20 #include "mindrt/include/mindrt.hpp"
21 #include "src/litert/kernel_exec_util.h"
22 #include "src/common/tensor_util.h"
23 #include "src/common/common.h"
24 #include "src/litert/inner_allocator.h"
25 #include "src/litert/kernel/cpu/base/partial_fusion.h"
26 #include "src/control_flow/control_actor_creator.h"
27
28 namespace mindspore::lite {
RunOpData(OpData<lite::Tensor> * inputs,OpContext<lite::Tensor> * context)29 void LiteOpActor::RunOpData(OpData<lite::Tensor> *inputs, OpContext<lite::Tensor> *context) {
30 if (inputs == nullptr || context == nullptr) {
31 MS_LOG(ERROR) << "param is nullptr.";
32 return;
33 }
34 auto op_uuid = context->sequential_num_;
35 input_op_datas_[op_uuid].push_back(inputs);
36 inputs_data_[inputs->index_] = inputs->data_;
37 if (input_op_datas_[op_uuid].size() < kernel_->in_tensors().size()) {
38 return;
39 }
40
41 auto ret = InitInputData();
42 if (ret != RET_OK) {
43 MS_LOG(ERROR) << "run kernel failed, name: " << kernel_->name();
44 context->SetFailed(ret);
45 return;
46 }
47
48 ret = kernel_->Execute(*(reinterpret_cast<const KernelCallBack *>(context->kernel_call_back_before_)),
49 *(reinterpret_cast<const KernelCallBack *>(context->kernel_call_back_after_)));
50 input_op_datas_.erase(op_uuid);
51 if (ret != RET_OK) {
52 MS_LOG(ERROR) << "run kernel failed, name: " << kernel_->name();
53 context->SetFailed(ret);
54 return;
55 }
56 AsyncOutput(context);
57 SetOutputData(context);
58 return;
59 }
60
OfflineIsolated(const std::vector<kernel::KernelExec * > & kernels,const kernel::KernelExec & this_kernel,const lite::Tensor & this_input_tensor)61 bool OfflineIsolated(const std::vector<kernel::KernelExec *> &kernels, const kernel::KernelExec &this_kernel,
62 const lite::Tensor &this_input_tensor) {
63 if (this_input_tensor.IsGraphInput()) {
64 return false;
65 }
66 for (auto &kernel : kernels) {
67 if (kernel == &this_kernel) {
68 continue;
69 }
70 if (std::any_of(kernel->out_tensors().begin(), kernel->out_tensors().end(),
71 [&this_input_tensor](const lite::Tensor *tensor) { return tensor == &this_input_tensor; })) {
72 return false;
73 }
74 }
75 return true;
76 }
77
GetSubgraphInTensorDataType(const kernel::KernelExec * kernel,const lite::Tensor * tensor)78 TypeId GetSubgraphInTensorDataType(const kernel::KernelExec *kernel, const lite::Tensor *tensor) {
79 if (kernel == nullptr || tensor == nullptr) {
80 return kTypeUnknown;
81 }
82 #ifdef ENABLE_LITE_ACL
83 if (kernel->subgraph_type() == kernel::kCustomSubGraph) {
84 return tensor->data_type();
85 }
86 #endif
87 if (kernel->subgraph_type() == kernel::kAclSubGraph) {
88 return tensor->data_type();
89 }
90 if (kernel->subgraph_type() != kernel::kGpuFp16SubGraph || tensor->IsGraphInput() || tensor->IsGraphOutput()) {
91 if (tensor->data_type() == kNumberTypeFloat16 || tensor->data_type() == kNumberTypeFloat32) {
92 return kernel->desc().data_type;
93 }
94 }
95 return tensor->data_type();
96 }
97
PreInit(std::vector<std::shared_ptr<LiteOpActor>> * actors,std::unordered_map<Tensor *,Tensor * > * input_map)98 int LiteOpActor::PreInit(std::vector<std::shared_ptr<LiteOpActor>> *actors,
99 std::unordered_map<Tensor *, Tensor *> *input_map) {
100 if (actors == nullptr || input_map == nullptr) {
101 return RET_ERROR;
102 }
103 return IsolateInputData(actors, input_map);
104 }
PostInit()105 int LiteOpActor::PostInit() { return PrepareOutputData(); }
106
IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> * actors,std::unordered_map<Tensor *,Tensor * > * input_map)107 int LiteOpActor::IsolateInputData(std::vector<std::shared_ptr<LiteOpActor>> *actors,
108 std::unordered_map<Tensor *, Tensor *> *input_map) {
109 if (actors == nullptr || input_map == nullptr) {
110 return RET_ERROR;
111 }
112 isolate_input_map_ = input_map;
113 std::vector<kernel::KernelExec *> kernels{};
114 std::transform(actors->begin(), actors->end(), std::back_inserter(kernels),
115 [](const std::shared_ptr<LiteOpActor> &actor) { return actor->kernel_; });
116 size_t in_tensor_size = kernel_->in_tensors().size();
117 for (size_t i = 0; i < in_tensor_size; i++) {
118 Tensor *old_tensor = kernel_->in_tensors()[i];
119
120 if (OfflineIsolated(kernels, *kernel_, *old_tensor)) {
121 if (old_tensor->data_type() == kNumberTypeFloat16 || old_tensor->data_type() == kNumberTypeFloat32) {
122 old_tensor->set_data_type(kernel_->desc().data_type);
123 }
124 SetTensorListTensorDataType(kernel_->desc().data_type, old_tensor);
125 if (kernel_->Context() == nullptr) {
126 MS_LOG(ERROR) << "kernel_->Context() is nullptr.";
127 return RET_NULL_PTR;
128 }
129 old_tensor->set_allocator(kernel_->Context()->allocator);
130 continue;
131 }
132
133 TypeId new_data_type = GetSubgraphInTensorDataType(kernel_, old_tensor);
134 Tensor *new_tensor =
135 new (std::nothrow) Tensor(new_data_type, old_tensor->shape(), old_tensor->format(), old_tensor->category());
136 if (new_tensor == nullptr) {
137 MS_LOG(ERROR) << "new Tensor failed.";
138 return RET_NULL_PTR;
139 }
140 new_tensor->set_allocator(old_tensor->allocator());
141 if (new_tensor->allocator() == nullptr && kernel_->Context() != nullptr &&
142 kernel_->desc().arch != kernel::kDelegate) {
143 new_tensor->set_allocator(kernel_->Context()->allocator);
144 }
145
146 new_tensor->set_tensor_name(kernel_->name() + "_duplicate_" + old_tensor->tensor_name());
147 for (LiteQuantParam quant : old_tensor->quant_params()) {
148 new_tensor->AddQuantParam(quant);
149 }
150 isolate_input_map_->insert(std::make_pair(new_tensor, old_tensor));
151 auto ret = kernel::KernelExecUtil::ReplaceSubGraphNodesInTensor(kernel_, old_tensor, new_tensor);
152 if (ret != RET_OK) {
153 MS_LOG(ERROR) << "ReplaceSubGraphNodesInTensor failed.";
154 return ret;
155 }
156
157 // for case that subgraph input is subgraph output, replace old_tensor with new_tensor
158 ctx_->ReplaceLinkInfoSenderWithNewOne(new_tensor, old_tensor);
159
160 // keep new link info for isolate input data case.
161 ctx_->SetLinkInfo(old_tensor, new_tensor);
162
163 /* set subgraph input for copy data */
164 kernel_->set_in_tensor(new_tensor, i);
165 }
166
167 for (auto &item : *isolate_input_map_) {
168 ctx_->ReplaceLinkInfoReceiverWithNewOne(item.first, item.second);
169 }
170
171 return RET_OK;
172 }
173
ResizeGraphInput(const std::vector<mindspore::lite::Tensor * > & inputs,const std::vector<std::vector<int>> & dims)174 int LiteOpActor::ResizeGraphInput(const std::vector<mindspore::lite::Tensor *> &inputs,
175 const std::vector<std::vector<int>> &dims) {
176 for (auto map : *isolate_input_map_) {
177 auto isolate_tensor = map.first;
178 auto src_tensor = map.second;
179 for (size_t i = 0; i < inputs.size(); i++) {
180 if (src_tensor == inputs[i]) {
181 isolate_tensor->FreeData();
182 isolate_tensor->set_shape(dims[i]);
183 }
184 }
185 }
186 return RET_OK;
187 }
188
CompileArrow(const std::unordered_map<void *,std::set<std::pair<AID,size_t>>> & receivers_map)189 int LiteOpActor::CompileArrow(const std::unordered_map<void *, std::set<std::pair<AID, size_t>>> &receivers_map) {
190 auto ret = UpdateActorOutput();
191 if (ret != RET_OK) {
192 MS_LOG(ERROR) << "update actor output failed.";
193 return ret;
194 }
195
196 return CompileArrowThroughOutputTensors(receivers_map);
197 }
198
UpdateActorOutput()199 int LiteOpActor::UpdateActorOutput() {
200 if (kernel_->desc().arch == kernel::kDelegate) {
201 MS_LOG(DEBUG) << "no need for delegate kernel.";
202 return RET_OK;
203 }
204 auto *subgraph_kernel = reinterpret_cast<kernel::SubGraphKernel *>(kernel_);
205 if (subgraph_kernel == nullptr) {
206 MS_LOG(INFO) << "kernel is not subgraph kernel, no partial call.";
207 return RET_OK;
208 }
209 auto output_kernels = subgraph_kernel->out_nodes();
210 std::vector<kernel::KernelExec *> call_kernels{};
211 for (auto output_kernel : output_kernels) {
212 if (output_kernel->type() == schema::PrimitiveType_Call) {
213 call_kernels.push_back(output_kernel);
214 }
215 }
216 if (call_kernels.empty()) {
217 MS_LOG(DEBUG) << "not end with call kernel, no need to update output.";
218 return RET_OK;
219 }
220 if (call_kernels.size() != 1) {
221 MS_LOG(ERROR) << "not support many call kernels in one subgraph.";
222 return RET_NOT_SUPPORT;
223 }
224 call_node_ = call_kernels.front();
225
226 // erase call output tensor
227 auto origin_output_tensors = kernel_->out_tensors();
228 auto call_output_tensors = call_node_->out_tensors();
229
230 for (auto iter = origin_output_tensors.begin(); iter != origin_output_tensors.end();) {
231 if (IsContain(call_output_tensors, *iter)) {
232 iter = origin_output_tensors.erase(iter);
233 } else {
234 ++iter;
235 }
236 }
237
238 auto partial_nodes = kernel::KernelExecUtil::GetCallInputPartials(call_node_);
239 if (partial_nodes.size() != 1) {
240 MS_LOG(ERROR) << "partial output is not right.";
241 return RET_ERROR;
242 }
243 partial_node_ = partial_nodes.front();
244 (void)std::copy(partial_node_->in_tensors().begin(), partial_node_->in_tensors().end(),
245 std::back_inserter(origin_output_tensors));
246
247 kernel_->set_out_tensors(origin_output_tensors);
248
249 subgraph_kernel->DropNode(partial_node_);
250 subgraph_kernel->DropNode(call_node_);
251 return RET_OK;
252 }
253
ArrowHasCompiled(const AID & actor_name,size_t to_index,const std::unordered_map<AID,std::set<size_t>> & receiver_index_set)254 bool LiteOpActor::ArrowHasCompiled(const AID &actor_name, size_t to_index,
255 const std::unordered_map<AID, std::set<size_t>> &receiver_index_set) {
256 auto iter = receiver_index_set.find(actor_name);
257 if (iter != receiver_index_set.end()) {
258 return iter->second.find(to_index) != iter->second.end();
259 }
260 return false;
261 }
262
MarkArrowAsCompiled(const AID * actor_name,size_t to_index,std::unordered_map<AID,std::set<size_t>> * receiver_index_set)263 void LiteOpActor::MarkArrowAsCompiled(const AID *actor_name, size_t to_index,
264 std::unordered_map<AID, std::set<size_t>> *receiver_index_set) {
265 if (actor_name == nullptr || receiver_index_set == nullptr) {
266 return;
267 }
268 if (receiver_index_set->find(*actor_name) == receiver_index_set->end()) {
269 std::set<size_t> tmp{to_index};
270 receiver_index_set->insert(std::pair<AID, std::set<size_t>>(*actor_name, tmp));
271 } else {
272 (void)receiver_index_set->at(*actor_name).insert(to_index);
273 }
274 }
275
CreateCommonArrow(const std::unordered_map<void *,std::set<std::pair<AID,size_t>>> & receivers_map,const std::set<void * > & receiver_tensors,const size_t & output_index,std::unordered_map<AID,std::set<size_t>> * receiver_index_set)276 int LiteOpActor::CreateCommonArrow(const std::unordered_map<void *, std::set<std::pair<AID, size_t>>> &receivers_map,
277 const std::set<void *> &receiver_tensors, const size_t &output_index,
278 std::unordered_map<AID, std::set<size_t>> *receiver_index_set) {
279 if (receiver_index_set == nullptr) {
280 return RET_ERROR;
281 }
282 std::unordered_map<void *, std::set<std::pair<AID, size_t>>>::const_iterator iter;
283 for (auto receiver_tensor : receiver_tensors) {
284 iter = receivers_map.find(receiver_tensor);
285 if (iter == receivers_map.end()) {
286 MS_LOG(DEBUG) << "not a useful receiver.";
287 continue;
288 }
289 auto receiver_set = iter->second;
290 for (auto item : receiver_set) {
291 if (ArrowHasCompiled(item.first, item.second, *receiver_index_set)) {
292 continue;
293 }
294 MarkArrowAsCompiled(&(item.first), item.second, receiver_index_set);
295 auto arrow = std::make_shared<DataArrow>(output_index, item.first, item.second);
296 MS_CHECK_TRUE_MSG(arrow != nullptr, RET_ERROR, "create arrow failed.");
297 output_data_arrows_.push_back(arrow);
298 }
299 }
300 return RET_OK;
301 }
302
CreateEmptyArrow(const size_t & output_index)303 int LiteOpActor::CreateEmptyArrow(const size_t &output_index) {
304 AID non;
305 auto arrow = std::make_shared<DataArrow>(output_index, non, output_index);
306 MS_CHECK_TRUE_MSG(arrow != nullptr, RET_ERROR, "create arrow failed.");
307 output_data_arrows_.push_back(arrow);
308 return RET_OK;
309 }
310
CompileArrowThroughOutputTensors(const std::unordered_map<void *,std::set<std::pair<AID,size_t>>> & receivers_map)311 int LiteOpActor::CompileArrowThroughOutputTensors(
312 const std::unordered_map<void *, std::set<std::pair<AID, size_t>>> &receivers_map) {
313 auto output_tensors = this->kernel_->out_tensors();
314 auto output_tensors_size = output_tensors.size();
315
316 std::unordered_map<AID, std::set<size_t>> receiver_index_set{};
317 for (size_t i = 0; i < output_tensors_size; ++i) {
318 auto receiver_tensors = ctx_->GetLinkInfo(output_tensors[i]);
319 if (receiver_tensors.empty()) {
320 MS_LOG(DEBUG) << "create when running.";
321 auto ret = CreateEmptyArrow(i);
322 if (ret != RET_OK) {
323 MS_LOG(ERROR) << "CreateEmptyArrow failed, output tensor name: " << output_tensors[i]->tensor_name();
324 return ret;
325 }
326 continue;
327 }
328 auto ret = CreateCommonArrow(receivers_map, receiver_tensors, i, &receiver_index_set);
329 if (ret != RET_OK) {
330 MS_LOG(ERROR) << "CreateCommonArrow failed, output tensor name: " << output_tensors[i]->tensor_name();
331 return ret;
332 }
333 }
334 return RET_OK;
335 }
336
SetInputShape()337 int LiteOpActor::SetInputShape() {
338 auto ret = RET_OK;
339 for (size_t i = 0; i < inputs_data_.size(); ++i) {
340 auto &input_tensor = kernel_->in_tensors()[i];
341 if (input_tensor->shape() == inputs_data_[i]->shape()) {
342 continue;
343 }
344 ret = SetTensorShape(input_tensor, inputs_data_[i]);
345 MS_CHECK_FALSE_MSG(ret != RET_OK, ret, "set input shape failed.");
346 }
347 return RET_OK;
348 }
349
AssignInputData()350 int LiteOpActor::AssignInputData() {
351 auto ret = RET_OK;
352 for (size_t i = 0; i < inputs_data_.size(); ++i) {
353 auto dst_tensor = kernel_->in_tensors()[i];
354 auto src_tensor = inputs_data_[i];
355 dst_tensor->set_shape_changed(src_tensor->get_shape_changed());
356 if (dst_tensor->init_ref_count() == 0) {
357 src_tensor->DecRefCount();
358 continue;
359 }
360 if (NeedCastData(dst_tensor, src_tensor)) {
361 ret = CastTensorData(dst_tensor, src_tensor, support_fp16_);
362 MS_CHECK_FALSE_MSG(ret != RET_OK, ret, "CastTensorData failed.");
363 continue;
364 }
365 /* same data-type */
366 if (src_tensor->allocator() == nullptr || src_tensor->IsGraphInput()) {
367 // delegate graph kernel output tensor
368 ret = SetTensorData(dst_tensor, src_tensor);
369 MS_CHECK_FALSE_MSG(ret != RET_OK, ret, "SetTensorData failed.");
370 } else {
371 ret = MoveTensorData(dst_tensor, src_tensor);
372 MS_CHECK_FALSE_MSG(ret != RET_OK, ret, "MoveTensorData failed.");
373 }
374 }
375 return ret;
376 }
377
NeedResize()378 bool LiteOpActor::NeedResize() {
379 for (size_t i = 0; i < inputs_data_.size(); ++i) {
380 auto &subgraph_input = kernel_->in_tensors()[i];
381 auto &cur_input = inputs_data_[i];
382 if (!IsSameShape(subgraph_input, cur_input)) {
383 return true;
384 }
385 }
386 return false;
387 }
388
InitInputData()389 int LiteOpActor::InitInputData() {
390 for (size_t i = 0; i < inputs_data_.size(); ++i) {
391 if (inputs_data_[i] == nullptr) {
392 MS_LOG(ERROR) << "inputs_data_ nullptr, index: " << i;
393 return RET_ERROR;
394 }
395 }
396 bool need_resize = NeedResize();
397 auto ret = SetInputShape();
398 MS_CHECK_FALSE_MSG(ret != RET_OK, ret, "Set input shape failed.");
399 if (need_resize) {
400 auto subgraph_kernel = reinterpret_cast<kernel::SubGraphKernel *>(kernel_);
401 MS_CHECK_FALSE_MSG(subgraph_kernel == nullptr, RET_ERROR, "Lite actor, cast kernel to subgraph kernel failed.");
402 ret = subgraph_kernel->MallocSubgraphInputs();
403 MS_CHECK_FALSE_MSG(ret != RET_OK, ret, "Subgraph kernel MallocSubgraphInputs failed.");
404 }
405 ret = AssignInputData();
406 MS_CHECK_FALSE_MSG(ret != RET_OK, ret, "Subgraph kernel AssignInputData failed.");
407 if (need_resize) {
408 auto subgraph_kernel = reinterpret_cast<kernel::SubGraphKernel *>(kernel_);
409 ret = subgraph_kernel->ReSize();
410 MS_CHECK_FALSE_MSG((ret != RET_OK) && (ret != RET_INFER_INVALID), ret, "Subgraph kernel Resize failed.");
411 }
412 return RET_OK;
413 }
414
AsyncOutput(OpContext<Tensor> * context)415 void LiteOpActor::AsyncOutput(OpContext<Tensor> *context) {
416 if (context == nullptr) {
417 return;
418 }
419 auto output_size = output_data_arrows_.size();
420 for (size_t i = 0; i < output_size; ++i) {
421 auto data = outputs_data_[i];
422 Async(output_data_arrows_[i]->to_op_id_, get_actor_mgr(), &mindspore::OpActor<Tensor>::RunOpData, data.get(),
423 context);
424 }
425 }
426
AddResultIndex(size_t index,size_t tensor_index)427 void LiteOpActor::AddResultIndex(size_t index, size_t tensor_index) {
428 results_index_.push_back(index);
429 results_tensor_index_.push_back(tensor_index);
430 }
431
SetOutputData(const OpContext<Tensor> * context)432 void LiteOpActor::SetOutputData(const OpContext<Tensor> *context) {
433 if (context == nullptr) {
434 return;
435 }
436 for (auto index : results_index_) {
437 context->SetResult(index, RET_OK);
438 }
439 }
440
PrepareOutputData()441 int LiteOpActor::PrepareOutputData() {
442 outputs_data_.resize(output_data_arrows_.size());
443 for (size_t i = 0; i < output_data_arrows_.size(); i++) {
444 auto &arrow = output_data_arrows_[i];
445 auto data = std::make_shared<OpData<Tensor>>(this->GetAID(), (kernel_->out_tensors()).at(arrow->from_output_index_),
446 static_cast<int>(arrow->to_input_index_));
447 if (MS_UNLIKELY(data == nullptr)) {
448 MS_LOG(ERROR) << "new output_data failed.";
449 return RET_NULL_PTR;
450 }
451 outputs_data_[i] = data;
452 }
453 return RET_OK;
454 }
455
CreateOpActor(const std::vector<kernel::KernelExec * > & kernels,lite::InnerContext * ctx,const std::shared_ptr<ActorMgr> & mgr)456 std::vector<std::shared_ptr<LiteOpActor>> CreateOpActor(const std::vector<kernel::KernelExec *> &kernels,
457 lite::InnerContext *ctx, const std::shared_ptr<ActorMgr> &mgr) {
458 MS_CHECK_TRUE_RET(ctx != nullptr, {});
459 std::vector<std::shared_ptr<LiteOpActor>> actors;
460 ActorThreadPool *thread_pool = reinterpret_cast<ActorThreadPool *>(ctx->thread_pool_);
461 if (thread_pool == nullptr) {
462 MS_LOG(ERROR) << "thread pool is nullptr";
463 return actors;
464 }
465 actors.reserve(kernels.size());
466 for (auto &kernel : kernels) {
467 /* make subgraph name (actor name) unique */
468 kernel->set_name(kernel->name() + "_" + std::to_string(actor_count++));
469 std::shared_ptr<LiteOpActor> actor = CreateActor(kernel, ctx);
470 if (actor == nullptr) {
471 MS_LOG(ERROR) << "create LiteOpActor failed: " << kernel->name();
472 actors.clear();
473 return actors;
474 }
475 actor->set_thread_pool(thread_pool);
476 actor->set_actor_mgr(mgr);
477 actors.push_back(actor);
478 }
479
480 for (auto &actor : actors) {
481 auto aid = mindspore::Spawn(actor);
482 }
483 return actors;
484 }
485
MindrtInit()486 int MindrtInit() { return mindspore::Initialize("", "", "", ""); }
487
MindrtTerminate(const std::vector<std::shared_ptr<LiteOpActor>> & actor_list,const std::shared_ptr<ActorMgr> & actor_mgr)488 void MindrtTerminate(const std::vector<std::shared_ptr<LiteOpActor>> &actor_list,
489 const std::shared_ptr<ActorMgr> &actor_mgr) {
490 for (const auto &actor : actor_list) {
491 mindspore::Terminate(actor->GetAID(), actor_mgr);
492 }
493 }
494 } // namespace mindspore::lite
495