• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "src/litert/mindrt_executor.h"
17 #include <algorithm>
18 #include <list>
19 #include <queue>
20 #include <memory>
21 #include "src/litert/lite_mindrt.h"
22 #include "include/errorcode.h"
23 #include "src/common/common.h"
24 #include "src/common/tensor_util.h"
25 #ifdef ENABLE_FP16
26 #include "nnacl/base/cast_base.h"
27 #endif
28 #include "nnacl/nnacl_common.h"
29 #include "src/litert/kernel_exec_util.h"
30 
31 namespace mindspore::lite {
32 namespace {
33 template <typename T>
MindrtAsyncRun(const std::vector<OpDataPtr<T>> & input_data,OpContext<T> * context,const std::shared_ptr<ActorMgr> & actor_mgr)34 Future<std::list<int>> MindrtAsyncRun(const std::vector<OpDataPtr<T>> &input_data, OpContext<T> *context,
35                                       const std::shared_ptr<ActorMgr> &actor_mgr) {
36   std::list<Future<int>> futures;
37   auto promises = *(context->results_);
38   (void)std::transform(promises.begin(), promises.end(), std::back_inserter(futures),
39                        [](const Promise<int> &promise) { return promise.GetFuture(); });
40   Future<std::list<int>> collect = mindspore::Collect<int>(futures);
41 
42   for (auto data : input_data) {
43     Async(data->op_id_, actor_mgr, &mindspore::OpActor<T>::RunOpData, data.get(), context);
44   }
45 
46   return collect;
47 }
48 
49 template <typename T>
MindrtRun(const std::vector<OpDataPtr<T>> & input_data,std::vector<OpDataPtr<T>> * output_data,const void * kernel_call_back_before,const void * kernel_call_back_after,const std::shared_ptr<ActorMgr> & actor_mgr)50 int MindrtRun(const std::vector<OpDataPtr<T>> &input_data, std::vector<OpDataPtr<T>> *output_data,
51               const void *kernel_call_back_before, const void *kernel_call_back_after,
52               const std::shared_ptr<ActorMgr> &actor_mgr) {
53   OpContext<T> context;
54   std::vector<Promise<int>> promises(output_data->size());
55   context.sequential_num_ = RandInt::Instance().Get();
56   context.results_ = &promises;
57   context.output_data_ = output_data;
58   context.kernel_call_back_before_ = kernel_call_back_before;
59   context.kernel_call_back_after_ = kernel_call_back_after;
60 
61   auto collect = MindrtAsyncRun<T>(input_data, &context, actor_mgr);
62   collect.Wait();
63   if (!collect.IsOK()) {
64     return -1;
65   }
66 
67   return 0;
68 }
69 }  // namespace
70 
PrepareGraphInput(const std::vector<kernel::KernelExec * > & kernels,const std::vector<Tensor * > & inputs)71 int MindrtExecutor::PrepareGraphInput(const std::vector<kernel::KernelExec *> &kernels,
72                                       const std::vector<Tensor *> &inputs) {
73   auto kernels_size = kernels.size();
74   for (size_t j = 0; j < kernels_size; ++j) {
75     auto in_tensor_size = kernels[j]->in_tensors().size();
76     for (size_t k = 0; k < in_tensor_size; ++k) {
77       auto tensor = kernels[j]->in_tensors()[k];
78       if (!tensor->IsGraphInput()) {
79         // for that extendrt create isolated_input_map_ outside of executor
80         auto input = isolate_input_map_->find(tensor);
81         if (input == isolate_input_map_->end() || !input->second->IsGraphInput()) {
82           continue;
83         }
84         tensor = input->second;
85       }
86       size_t idx = std::find(inputs.begin(), inputs.end(), tensor) - inputs.begin();
87       if (idx == inputs.size()) {
88         MS_LOG(ERROR) << "The input is not found.";
89         return RET_ERROR;
90       }
91       auto data = std::make_shared<OpData<Tensor>>(op_actors_[j]->GetAID(), inputs.at(idx), static_cast<int>(k));
92       if (MS_UNLIKELY(data == nullptr)) {
93         MS_LOG(ERROR) << "new opdata failed.";
94         return RET_NULL_PTR;
95       }
96       (void)input_data_.emplace_back(data);
97     }
98   }
99   return RET_OK;
100 }
101 
PrepareGraphOutput(const std::vector<kernel::KernelExec * > & kernels,const std::vector<Tensor * > & outputs)102 int MindrtExecutor::PrepareGraphOutput(const std::vector<kernel::KernelExec *> &kernels,
103                                        const std::vector<Tensor *> &outputs) {
104   auto outputs_size = outputs.size();
105   for (size_t i = 0; i < outputs_size; ++i) {
106     Tensor *graph_output_tensor = outputs[i];
107     if (graph_output_tensor->IsGraphInput()) {
108       continue;
109     }
110     auto current_output_map =
111       std::find_if(isolate_output_map_->begin(), isolate_output_map_->end(), [&](const auto output_map_tensor) {
112         if (graph_output_tensor == output_map_tensor.second) {
113           return true;
114         }
115         return false;
116       });
117     MS_ASSERT(current_output_map != isolate_output_map_->end());
118     Tensor *subgraph_output_tensor = current_output_map->first;
119     auto kernels_size = kernels.size();
120     for (size_t j = 0; j < kernels_size; ++j) {
121       auto out_tensor_size = kernels[j]->out_tensors().size();
122       for (size_t k = 0; k < out_tensor_size; ++k) {
123         if (subgraph_output_tensor != kernels[j]->out_tensors()[k]) {
124           continue;
125         }
126         auto data =
127           std::make_shared<OpData<Tensor>>(op_actors_[j]->GetAID(), subgraph_output_tensor, static_cast<int>(k));
128         if (MS_UNLIKELY(data == nullptr)) {
129           MS_LOG(ERROR) << "new opdata failed.";
130           return RET_NULL_PTR;
131         }
132         op_actors_[j]->AddResultIndex(output_data_.size(), k);
133         (void)output_data_.emplace_back(data);
134       }
135     }
136   }
137   if (output_data_.empty()) {
138     MS_LOG(ERROR) << "output_data_ can not be empty.";
139     return RET_ERROR;
140   }
141   return RET_OK;
142 }
143 
Resize(const std::vector<mindspore::lite::Tensor * > & inputs,const std::vector<std::vector<int>> & dims)144 int MindrtExecutor::Resize(const std::vector<mindspore::lite::Tensor *> &inputs,
145                            const std::vector<std::vector<int>> &dims) {
146   for (auto actor : op_actors_) {
147     actor->ResizeGraphInput(inputs, dims);
148   }
149   return RET_OK;
150 }
151 
PreInitActors()152 int MindrtExecutor::PreInitActors() {
153   // for that extendrt create isolated_input_map_ outside of executor
154   if (!isolate_input_map_->empty()) {
155     for (auto &iter : *isolate_input_map_) {
156       ctx_->SetLinkInfo(iter.second, iter.first);
157     }
158     for (const auto &actor : op_actors_) {
159       actor->set_isolate_input_map(isolate_input_map_);
160     }
161     return RET_OK;
162   }
163   for (const auto &actor : op_actors_) {
164     int ret = actor->PreInit(&op_actors_, isolate_input_map_);
165     if (ret != RET_OK) {
166       MS_LOG(ERROR) << "IsolateInputData failed, actor aid: " << actor->GetAID();
167       return ret;
168     }
169   }
170   return RET_OK;
171 }
172 
BuildReceiverMap()173 std::unordered_map<void *, std::set<std::pair<AID, size_t>>> MindrtExecutor::BuildReceiverMap() {
174   std::unordered_map<void *, std::set<std::pair<AID, size_t>>> receivers_map{};
175 
176   for (auto op_actor : op_actors_) {
177     auto input_tensors = op_actor->GetKernel()->in_tensors();
178     for (size_t i = 0; i < input_tensors.size(); ++i) {
179       auto key = input_tensors[i];
180       auto pair = std::make_pair(op_actor->GetAID(), i);
181       auto iter = receivers_map.find(key);
182       if (iter != receivers_map.end()) {
183         (void)iter->second.emplace(pair);
184       } else {
185         std::set<std::pair<AID, size_t>> tmp_set{pair};
186         receivers_map[input_tensors[i]] = tmp_set;
187       }
188     }
189   }
190   return receivers_map;
191 }
192 
LinkActors()193 int MindrtExecutor::LinkActors() {
194   auto receivers_map = BuildReceiverMap();
195   for (auto &&op_actor : op_actors_) {
196     auto ret = op_actor->CompileArrow(receivers_map);
197     if (ret != RET_OK) {
198       MS_LOG(ERROR) << "actor: " << op_actor->GetAID() << " compile arrow failed.";
199       return ret;
200     }
201   }
202   return RET_OK;
203 }
204 
PostInitActors()205 int MindrtExecutor::PostInitActors() {
206   for (auto &&actor : op_actors_) {
207     auto ret = actor->PostInit();
208     if (ret != RET_OK) {
209       MS_LOG(ERROR) << "PrepareGraphOutput failed, actor aid: " << actor->GetAID();
210       return ret;
211     }
212   }
213   return RET_OK;
214 }
215 
Prepare(const std::vector<kernel::KernelExec * > & kernels,const std::vector<Tensor * > & inputs,const std::vector<Tensor * > & outputs,lite::InnerContext * ctx)216 int MindrtExecutor::Prepare(const std::vector<kernel::KernelExec *> &kernels, const std::vector<Tensor *> &inputs,
217                             const std::vector<Tensor *> &outputs, lite::InnerContext *ctx) {
218   MS_ASSERT(ctx != nullptr);
219   ctx_ = ctx;
220   actor_mgr_ = std::make_shared<ActorMgr>();
221   if (actor_mgr_ == nullptr) {
222     MS_LOG(ERROR) << "make_shared ActorMgr failed!";
223     return RET_ERROR;
224   }
225 
226   op_actors_ = CreateOpActor(kernels, ctx, actor_mgr_);
227   if (op_actors_.size() != kernels.size()) {
228     MS_LOG(ERROR) << "CreateOpActor failed!actor num: " << op_actors_.size() << ", kernels num: " << kernels.size();
229     return RET_ERROR;
230   }
231 
232   auto ret = PrepareGraphInput(kernels, inputs);
233   if (ret != RET_OK) {
234     MS_LOG(ERROR) << "PrepareGraphInput failed!ret: " << ret;
235     return ret;
236   }
237 
238   ret = PrepareGraphOutput(kernels, outputs);
239   if (ret != RET_OK) {
240     MS_LOG(ERROR) << "PrepareGraphOutput failed!ret: " << ret;
241     return ret;
242   }
243 
244   ret = PreInitActors();
245   if (ret != RET_OK) {
246     MS_LOG(ERROR) << "PreInitActors failed!ret: " << ret;
247     return ret;
248   }
249 
250   ret = LinkActors();
251   if (ret != RET_OK) {
252     MS_LOG(ERROR) << "LinkActors failed!ret: " << ret;
253     return ret;
254   }
255 
256   ret = PostInitActors();
257   if (ret != RET_OK) {
258     MS_LOG(ERROR) << "PostInitActors failed!ret: " << ret;
259     return ret;
260   }
261   return RET_OK;
262 }
263 
TransferGraphOutput()264 int MindrtExecutor::TransferGraphOutput() {
265   for (auto tensor_map : *isolate_output_map_) {
266     auto dst_tensor = tensor_map.second;
267     auto src_tensor = tensor_map.first;
268     if (dst_tensor->data_type() == kNumberTypeGLUInt && src_tensor->data_type() == kNumberTypeGLUInt) {
269       continue;
270     }
271     dst_tensor->set_shape(src_tensor->shape());
272     /* dst tensor free in FreeOutputTensor */
273     if (src_tensor->data_type() == kNumberTypeFloat16 && dst_tensor->data_type() == kNumberTypeFloat32) {
274       auto ret = dst_tensor->MallocData();
275       if (ret != RET_OK) {
276         MS_LOG(ERROR) << "MallocData failed";
277         return ret;
278       }
279 #ifdef ENABLE_FP16
280       Fp16ToFloat32(reinterpret_cast<float16_t *>(src_tensor->MutableData()),
281                     reinterpret_cast<float *>(dst_tensor->data()), dst_tensor->ElementsNum());
282 #else
283       auto src_data = reinterpret_cast<const uint16_t *>(src_tensor->MutableData());
284       auto dst_data = reinterpret_cast<float *>(dst_tensor->data());
285       for (int i = 0; i < dst_tensor->ElementsNum(); i++) {
286         dst_data[i] = ShortToFloat32(src_data[i]);
287       }
288 #endif
289     } else {
290       if (dst_tensor->allocator() != src_tensor->allocator()) {
291         dst_tensor->set_allocator(src_tensor->allocator());
292       }
293       if (src_tensor->allocator() != nullptr) {
294         dst_tensor->set_data(src_tensor->data());
295         dst_tensor->set_own_data(src_tensor->IsConst() ? false : src_tensor->own_data());
296       } else {
297         dst_tensor->set_data(src_tensor->data());
298         src_tensor->set_data(nullptr);
299       }
300     }
301     src_tensor->DecRefCount();
302   }
303   return RET_OK;
304 }
305 
FreeOutputTensor()306 void MindrtExecutor::FreeOutputTensor() {
307   for (auto &&tensor_map : *isolate_output_map_) {
308     auto src_tensor = tensor_map.first;
309     auto dst_tensor = tensor_map.second;
310     if (dst_tensor->data_type() == kNumberTypeGLUInt && src_tensor->data_type() == kNumberTypeGLUInt) {
311       continue;
312     }
313 
314     if ((dst_tensor->allocator() != nullptr && dst_tensor->own_data()) || dst_tensor->data() == nullptr) {
315       MS_LOG(DEBUG) << "free data";
316       dst_tensor->FreeData();
317     } else if (dst_tensor->data() != nullptr && dst_tensor->data_type() == src_tensor->data_type()) {
318       if (dst_tensor->allocator() == nullptr) {
319         /* user set graph-output-tensor from outside */
320         MS_LOG(DEBUG) << "user set graph-output-tensor from outside";
321         src_tensor->set_data(dst_tensor->data());
322         src_tensor->set_own_data(false);
323         src_tensor->set_allocator(nullptr);
324       } else if (dst_tensor->allocator() == src_tensor->allocator()) {
325         /* nnrt npu zero copy scene */
326         MS_LOG(DEBUG) << "zero copy data";
327         src_tensor->set_data(dst_tensor->data());
328         src_tensor->set_own_data(dst_tensor->own_data());
329       }
330     }
331   }
332   return;
333 }
334 
Run(const std::vector<Tensor * > & in_tensors,const std::vector<Tensor * > & out_tensors,const std::vector<kernel::KernelExec * > & kernels,const KernelCallBack & before,const KernelCallBack & after)335 int MindrtExecutor::Run(const std::vector<Tensor *> &in_tensors, const std::vector<Tensor *> &out_tensors,
336                         const std::vector<kernel::KernelExec *> &kernels, const KernelCallBack &before,
337                         const KernelCallBack &after) {
338   CHECK_NULL_RETURN(ctx_);
339   auto thread_pool = ctx_->thread_pool_;
340   CHECK_NULL_RETURN(thread_pool);
341   if (ctx_->delegate == nullptr) {
342     thread_pool->SetSpinCountMaxValue();
343   }
344 
345   FreeOutputTensor();
346 
347   auto ret = MindrtRun<Tensor>(input_data_, &output_data_, &before, &after, actor_mgr_);
348   if (ret != RET_OK) {
349     MS_LOG(ERROR) << "MindrtRun failed";
350     return ret;
351   }
352 
353   ret = TransferGraphOutput();
354   if (ret != RET_OK) {
355     MS_LOG(ERROR) << "TransferGraphOutput failed";
356     return ret;
357   }
358 
359   thread_pool->SetSpinCountMinValue();
360   return RET_OK;
361 }
362 }  // namespace mindspore::lite
363