1 /**
2 * Copyright 2020-2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/litert/lite_session.h"
18 #include <set>
19 #include <vector>
20 #include <utility>
21 #include <fstream>
22 #include <algorithm>
23 #include "src/litert/pack_weight_manager.h"
24 #include "src/litert/runtime_pass.h"
25 #include "include/errorcode.h"
26 #include "src/common/log_adapter.h"
27 #include "src/litert/scheduler.h"
28 #include "src/litert/inner_allocator.h"
29 #include "src/litert/executor.h"
30 #include "src/common/context_util.h"
31 #include "src/common/utils.h"
32 #include "src/common/graph_util.h"
33 #include "src/common/tensor_util.h"
34 #include "src/common/file_utils.h"
35 #include "src/common/mmap_utils.h"
36 #include "src/litert/lite_model.h"
37 #include "src/litert/weight_decoder.h"
38 #include "src/litert/runtime_allocator.h"
39 #include "src/litert/kernel_exec_util.h"
40 #include "src/litert/cpu_info.h"
41 #ifndef CUSTOM_KERNEL_REGISTRY_CLIP
42 #include "src/registry/register_kernel_impl.h"
43 #endif
44 #ifdef ENABLE_MINDRT
45 #include "src/litert/mindrt_executor.h"
46 #endif
47 #ifdef SUPPORT_NPU
48 #include "src/litert/delegate/npu/npu_delegate.h"
49 #endif
50 #ifdef GPU_OPENCL
51 #include "src/litert/kernel/opencl/opencl_subgraph.h"
52 #endif
53 #ifdef GPU_TENSORRT
54 #include "src/litert/delegate/tensorrt/tensorrt_delegate.h"
55 #endif
56 #ifdef SUPPORT_NNAPI
57 #include "src/litert/delegate/nnapi/nnapi_delegate.h"
58 #endif
59 #ifdef ENABLE_COREML
60 #include "src/litert/delegate/coreml/coreml_delegate.h"
61 #endif
62 #include "src/litert/runtime_convert.h"
63 #include "extendrt/mindir_loader/model_loader.h"
64 #ifndef __ANDROID__
65 #include "kernel/ascend/plugin/ascend_kernel_plugin.h"
66 #endif
67 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
68 #include "thread/parallel_thread_pool_manager.h"
69 #endif
70 #include "src/litert/runtime_packed_node_pass.h"
71 #ifdef SUPPORT_NNRT
72 #include "src/litert/delegate/nnrt/nnrt_delegate.h"
73 #include "src/litert/delegate/nnrt/nnrt_allocator.h"
74 #endif
75
76 using AbstractBaseModel = mindspore::infer::AbstractBaseModel;
77
78 namespace mindspore {
79 #ifdef USE_GLOG
80 extern "C" {
81 extern void mindspore_log_init();
82 }
83 #endif
84 namespace lite {
85 namespace {
ExistCustomCpuKernel()86 bool ExistCustomCpuKernel() {
87 #ifndef CUSTOM_KERNEL_REGISTRY_CLIP
88 const std::string kArchCPU = "CPU";
89 auto custom_kernel_creators = registry::RegistryKernelImpl::GetInstance()->GetCustomKernelCreators();
90 for (const auto &custom_kernel_creator : custom_kernel_creators) { // <provider, <arch, <type, CreateKernel*>>>
91 if (custom_kernel_creator.second.empty()) {
92 continue;
93 }
94 if (std::any_of(
95 custom_kernel_creator.second.begin(), custom_kernel_creator.second.end(),
96 [kArchCPU](const std::pair<std::string, std::unordered_map<std::string, registry::CreateKernel *>> &pair) {
97 return pair.first == kArchCPU && !pair.second.empty();
98 })) {
99 return true;
100 }
101 }
102 #endif
103 return false;
104 }
105 } // namespace
106
LiteSession()107 LiteSession::LiteSession() {
108 #ifdef USE_GLOG
109 mindspore::mindspore_log_init();
110 #endif
111 this->is_running_.store(false);
112 }
113
CheckTensorValid(lite::Tensor * dst_tensor)114 int LiteSession::CheckTensorValid(lite::Tensor *dst_tensor) {
115 MS_ASSERT(dst_tensor != nullptr);
116 if (dst_tensor->data_type() == kObjectTypeTensorType) {
117 return RET_OK;
118 }
119 if (dst_tensor->IsGraphInput() || dst_tensor->IsGraphOutput()) {
120 return RET_OK;
121 }
122 if (dst_tensor->IsConst() == false && dst_tensor->data() != nullptr) {
123 return RET_ERROR;
124 }
125 return RET_OK;
126 }
127
ConvertTensorsQuantParam(const schema::Tensor * src_tensor,lite::Tensor * dst_tensor)128 void LiteSession::ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lite::Tensor *dst_tensor) {
129 MS_ASSERT(src_tensor != nullptr);
130 MS_ASSERT(dst_tensor != nullptr);
131 auto quant_params = src_tensor->quantParams();
132 if (quant_params != nullptr) {
133 for (size_t j = 0; j < quant_params->size(); j++) {
134 auto quant_param = quant_params->Get(j);
135 LiteQuantParam quant_arg{};
136 if (quant_param == nullptr) {
137 quant_arg.inited = false;
138 } else {
139 quant_arg.inited = true;
140 quant_arg.bitNum = quant_param->numBits();
141 quant_arg.scale = quant_param->scale();
142 quant_arg.zeroPoint = quant_param->zeroPoint();
143 quant_arg.var_corr = quant_param->varCorr();
144 quant_arg.mean_corr = quant_param->meanCorr();
145 quant_arg.roundType = quant_param->roundType();
146 quant_arg.multiplier = quant_param->multiplier();
147 quant_arg.dstDtype = quant_param->dstDtype();
148 quant_arg.min = quant_param->min();
149 quant_arg.max = quant_param->max();
150 }
151 dst_tensor->AddQuantParam(quant_arg);
152 }
153 }
154 auto quant_clusters = src_tensor->quantClusters();
155 if (quant_clusters != nullptr) {
156 std::vector<float> clusters;
157 for (size_t j = 0; j < quant_clusters->size(); j++) {
158 clusters.push_back(quant_clusters->Get(j));
159 }
160 dst_tensor->set_quant_clusters(clusters);
161 }
162 }
163
ConvertTensorsData(const lite::LiteModel * model,size_t tensor_index,lite::Tensor * dst_tensor)164 int LiteSession::ConvertTensorsData(const lite::LiteModel *model, size_t tensor_index, lite::Tensor *dst_tensor) {
165 MS_ASSERT(model != nullptr);
166 MS_ASSERT(dst_tensor != nullptr);
167 auto src_tensor = model->GetSchemaTensor(tensor_index);
168 if (src_tensor == nullptr || src_tensor->handler() == nullptr || src_tensor->data() == nullptr ||
169 src_tensor->length() == 0) {
170 MS_LOG(DEBUG) << "No valid data converted.";
171 return RET_OK;
172 }
173
174 /* tensor list convert */
175 if (dst_tensor->data_type() == kObjectTypeTensorType) {
176 const int *src_data = reinterpret_cast<const int *>(src_tensor->data());
177 return DecodeTensorLsit(dst_tensor, src_data, src_tensor->length());
178 }
179
180 /* normal tensor check */
181 auto shape_info = dst_tensor->shape();
182 if (shape_info.end() !=
183 std::find_if(shape_info.begin(), shape_info.end(), [](const int shape) { return shape <= 0; })) {
184 MS_LOG(ERROR) << "Invalid shape size, tensor name: " << src_tensor->handler()->name();
185 return RET_ERROR;
186 }
187
188 int compress_type = src_tensor->handler()->weightQuantCompressType();
189 int ret = RET_NO_CHANGE;
190 if (compress_type != kFSEInfer) {
191 ret = WeightDecoder::DecompressTensor(*src_tensor, dst_tensor);
192 }
193 if (ret == RET_NO_CHANGE) {
194 if (dst_tensor->Size() == 0 || src_tensor->length() < dst_tensor->Size()) {
195 MS_LOG(ERROR) << "Tensor data shape invalid";
196 return RET_ERROR;
197 }
198 auto data_pair = src_tensor->ReleaseData();
199 dst_tensor->set_data(data_pair.second);
200 dst_tensor->set_own_data(data_pair.first);
201 } else if (ret != RET_OK) {
202 MS_LOG(ERROR) << "Decompress tensor data failed: " << ret;
203 return ret;
204 }
205 return RET_OK;
206 }
207
ConvertTensor(const schema::Tensor & src_tensor)208 lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) {
209 int32_t data_type = src_tensor.dataType();
210 if (data_type <= kTypeUnknown || data_type >= kMonadTypeEnd) {
211 MS_LOG(ERROR) << "invalid data type. " << data_type;
212 return nullptr;
213 }
214 auto src_category = TensorCategory(src_tensor);
215 std::vector<int> shape;
216 if (src_tensor.dims() == nullptr) {
217 MS_LOG(DEBUG) << "Dims of src_tensor is nullptr";
218 }
219 if (src_tensor.dims() != nullptr) {
220 if (src_tensor.dataType() == kObjectTypeString && src_tensor.data() != nullptr) {
221 shape.push_back(src_tensor.data()->size());
222 } else {
223 for (size_t j = 0; j < src_tensor.dims()->size(); j++) {
224 shape.push_back(src_tensor.dims()->data()[j]);
225 }
226 }
227 if (std::any_of(shape.begin(), shape.end(), [](const int &element) { return element < 0 && element != -1; })) {
228 MS_LOG(ERROR) << "Dims of src_tensor is unsupported";
229 return nullptr;
230 }
231 }
232 lite::Tensor *dst_tensor = nullptr;
233 if (TypeId(data_type) == kObjectTypeTensorType) {
234 MS_CHECK_TRUE_RET(src_tensor.data() != nullptr, nullptr);
235 MS_CHECK_TRUE_RET(src_tensor.data()->size() > 0, nullptr);
236 auto src_data = src_tensor.data()->data();
237 dst_tensor = CreateTensorList(shape, src_category, src_data);
238 } else {
239 dst_tensor = new (std::nothrow)
240 Tensor(TypeId(data_type), shape, static_cast<mindspore::Format>(src_tensor.format()), src_category);
241 }
242 if (dst_tensor == nullptr) {
243 MS_LOG(ERROR) << "create dst_tensor is nullptr.";
244 return nullptr;
245 }
246 if (src_tensor.name() != nullptr) {
247 dst_tensor->set_tensor_name(src_tensor.name()->str());
248 }
249 auto compress_type = static_cast<CompressType>(src_tensor.weightQuantCompressType());
250 if (compress_type == kFSEInfer) {
251 dst_tensor->set_compress_type(static_cast<CompressType>(compress_type));
252 dst_tensor->set_compressed_size(src_tensor.data()->size());
253 }
254 return dst_tensor;
255 }
256
ConvertTensors(const lite::Model * model)257 int LiteSession::ConvertTensors(const lite::Model *model) {
258 MS_ASSERT(model != nullptr);
259 auto lite_model = reinterpret_cast<const lite::LiteModel *>(model);
260 uint32_t tensor_count = model->graph_.all_tensors_.size();
261 auto model_input_indices = model->graph_.input_indices_;
262 auto model_output_indices = model->graph_.output_indices_;
263
264 for (uint32_t i = 0; i < tensor_count; ++i) {
265 auto *src_tensor = model->graph_.all_tensors_[i];
266 if (src_tensor == nullptr) {
267 MS_LOG(ERROR) << i << "th tensor in model is nullptr";
268 return RET_NULL_PTR;
269 }
270 auto *dst_tensor = ConvertTensor(*src_tensor);
271 if (dst_tensor == nullptr) {
272 MS_LOG(ERROR) << "Convert new " << i << "th tensor failed!";
273 return RET_NULL_PTR;
274 }
275 auto ret = ConvertTensorsData(lite_model, i, dst_tensor);
276 if (ret != RET_OK) {
277 MS_LOG(ERROR) << "Convert data of " << i << "th tensor failed";
278 delete dst_tensor;
279 return ret;
280 }
281 ConvertTensorsQuantParam(src_tensor, dst_tensor);
282 if (IsContain(model_input_indices, i)) {
283 dst_tensor->set_category(Category::GRAPH_INPUT);
284 }
285 if (IsContain(model_output_indices, i)) {
286 // a tensor is as both input and output, would be treated as an input.
287 if (!dst_tensor->IsGraphInput()) {
288 dst_tensor->set_category(Category::GRAPH_OUTPUT);
289 }
290 }
291
292 ret = CheckTensorValid(dst_tensor);
293 if (ret != RET_OK) {
294 MS_LOG(ERROR) << "Check " << i << "th tensor failed";
295 delete dst_tensor;
296 return ret;
297 }
298
299 this->tensors_.emplace_back(dst_tensor);
300 }
301 return RET_OK;
302 }
303
InitGraphInputTensors(const lite::Model * model)304 void LiteSession::InitGraphInputTensors(const lite::Model *model) {
305 MS_ASSERT(model != nullptr);
306 auto graph_in_size = model->graph_.input_indices_.size();
307 for (size_t i = 0; i < graph_in_size; ++i) {
308 auto in_tensor_idx = model->graph_.input_indices_[i];
309 MS_ASSERT(in_tensor_idx < this->tensors_.size());
310 auto *in_tensor = this->tensors_.at(in_tensor_idx);
311 MS_ASSERT(in_tensor != nullptr);
312 this->inputs_.emplace_back(in_tensor);
313 }
314 }
315
InitGraphInputMSTensors()316 void LiteSession::InitGraphInputMSTensors() {
317 MS_ASSERT(this->input_vec_.empty());
318 for (auto &input_tensor : this->inputs_) {
319 MS_ASSERT(input_tensor != nullptr);
320 this->input_vec_.emplace_back(input_tensor);
321 }
322 }
323
InitGraphOutputTensors(const lite::Model * model)324 void LiteSession::InitGraphOutputTensors(const lite::Model *model) {
325 MS_ASSERT(model != nullptr);
326 MS_ASSERT(this->outputs_.empty());
327 auto graph_out_size = model->graph_.output_indices_.size();
328 for (size_t i = 0; i < graph_out_size; ++i) {
329 auto out_tensor_idx = model->graph_.output_indices_[i];
330 MS_ASSERT(out_tensor_idx < this->tensors_.size());
331 auto *out_tensor = this->tensors_.at(out_tensor_idx);
332 MS_ASSERT(out_tensor != nullptr);
333 this->outputs_.emplace_back(out_tensor);
334 }
335 }
336
InitGraphInputMap(const lite::Model * model)337 void LiteSession::InitGraphInputMap(const lite::Model *model) {
338 MS_ASSERT(model != nullptr);
339 MS_ASSERT(this->input_map_.empty());
340 MS_ASSERT(this->input_shape_map_.empty());
341 auto graph_input_node_indexes = GetGraphInputNodes(model);
342 auto graph_in_size = model->graph_.input_indices_.size();
343 for (auto in_node_index : graph_input_node_indexes) {
344 auto in_node = model->graph_.all_nodes_[in_node_index];
345 MS_ASSERT(in_node != nullptr);
346 auto in_size = in_node->input_indices_.size();
347 for (size_t i = 0; i < in_size; ++i) {
348 if (this->input_map_.find(in_node->name_ + std::to_string(i)) != this->input_map_.end()) {
349 MS_LOG(ERROR) << "cant find input " << in_node->name_ + std::to_string(i) << "at input_map_";
350 return;
351 }
352 auto in_tensor_index = size_t(in_node->input_indices_[i]);
353 bool is_graph_input = false;
354 for (size_t j = 0; j < graph_in_size; ++j) {
355 if (in_tensor_index == model->graph_.input_indices_[j]) {
356 is_graph_input = true;
357 break;
358 }
359 }
360 if (!is_graph_input) {
361 continue;
362 }
363 MS_ASSERT(in_tensor_index < this->tensors_.size());
364 auto *in_tensor = this->tensors_.at(in_tensor_index);
365 if (in_tensor == nullptr) {
366 MS_LOG(ERROR) << "in_tensor is null!";
367 return;
368 }
369 auto tensor_name = in_node->name_ + std::to_string(i);
370 this->input_map_[tensor_name] = in_tensor;
371 this->input_shape_map_[in_tensor] = in_tensor->shape();
372 if (!in_tensor->tensor_name().empty()) {
373 this->input_map_[in_tensor->tensor_name()] = in_tensor;
374 }
375 }
376 }
377
378 for (auto input_tensor : this->inputs_) {
379 MS_ASSERT(input_tensor != nullptr);
380 if (this->input_map_.find(input_tensor->tensor_name()) == this->input_map_.end()) {
381 this->input_map_[input_tensor->tensor_name()] = input_tensor;
382 }
383 if (this->input_shape_map_.find(input_tensor) == this->input_shape_map_.end()) {
384 this->input_shape_map_[input_tensor] = input_tensor->shape();
385 }
386 }
387 }
388
InitGraphOutputNodeMap(const lite::Model * model)389 void LiteSession::InitGraphOutputNodeMap(const lite::Model *model) {
390 MS_ASSERT(model != nullptr);
391 auto graph_output_node_indexes = GetGraphOutputNodes(model);
392 auto graph_out_size = model->graph_.output_indices_.size();
393 for (auto out_node_index : graph_output_node_indexes) {
394 auto out_node = model->graph_.all_nodes_[out_node_index];
395 MS_ASSERT(out_node != nullptr);
396 auto out_size = out_node->output_indices_.size();
397 for (size_t i = 0; i < out_size; ++i) {
398 auto out_tensor_index = out_node->output_indices_[i];
399 bool is_graph_output = false;
400 for (size_t j = 0; j < graph_out_size; ++j) {
401 if (out_tensor_index == model->graph_.output_indices_[j]) {
402 is_graph_output = true;
403 break;
404 }
405 }
406 if (!is_graph_output) {
407 continue;
408 }
409 MS_ASSERT(out_tensor_index < this->tensors_.size());
410 auto *out_tensor = this->tensors_.at(out_tensor_index);
411 if (out_tensor == nullptr) {
412 MS_LOG(ERROR) << "out_tensor is null!";
413 return;
414 }
415 this->output_node_map_[out_node->name_].emplace_back(out_tensor);
416 }
417 }
418 }
419
InitGraphOutputTensorMap(const lite::Model * model)420 void LiteSession::InitGraphOutputTensorMap(const lite::Model *model) {
421 MS_ASSERT(model != nullptr);
422 MS_ASSERT(this->output_tensor_map_.empty());
423 auto graph_out_size = model->graph_.output_indices_.size();
424 for (size_t i = 0; i < graph_out_size; ++i) {
425 size_t graph_out_index = model->graph_.output_indices_[i];
426 MS_ASSERT(graph_out_index < this->tensors_.size());
427 auto *out_tensor = this->tensors_.at(graph_out_index);
428 if (out_tensor == nullptr) {
429 MS_LOG(ERROR) << "out_tensor is null!";
430 return;
431 }
432 if (!out_tensor->tensor_name().empty()) {
433 this->output_tensor_map_.insert(std::make_pair(out_tensor->tensor_name(), out_tensor));
434 this->output_tensor_names_.emplace_back(out_tensor->tensor_name());
435 } else {
436 this->output_tensor_map_.insert(std::make_pair(std::to_string(graph_out_index), out_tensor));
437 this->output_tensor_names_.emplace_back(std::to_string(graph_out_index));
438 }
439 }
440 }
441
InitGraphInOutTensorsMap(const lite::Model * model)442 void LiteSession::InitGraphInOutTensorsMap(const lite::Model *model) {
443 InitGraphInputMSTensors();
444 InitGraphInputMap(model);
445 InitGraphOutputNodeMap(model);
446 InitGraphOutputTensorMap(model);
447 }
448
IsolateOutputTensor()449 int LiteSession::IsolateOutputTensor() {
450 for (Tensor *src_tensor : outputs_) {
451 if (src_tensor->IsGraphInput()) {
452 continue;
453 }
454 Tensor *new_tensor = new (std::nothrow)
455 Tensor(src_tensor->data_type(), src_tensor->shape(), src_tensor->format(), Category::GRAPH_OUTPUT);
456 if (MS_UNLIKELY(new_tensor == nullptr)) {
457 MS_LOG(ERROR) << "duplicate new output failed.";
458 return RET_NULL_PTR;
459 }
460 new_tensor->set_allocator(src_tensor->allocator()); /* GPU use opencl allocator */
461 new_tensor->set_tensor_name(src_tensor->tensor_name() + "_duplicate");
462 for (LiteQuantParam quant : src_tensor->quant_params()) {
463 new_tensor->AddQuantParam(quant);
464 }
465 new_tensor->set_init_ref_count(src_tensor->init_ref_count());
466
467 /* src tensor set for graph calculate */
468 if (src_tensor->data_type() == kNumberTypeFloat16) {
469 src_tensor->set_data_type(kNumberTypeFloat32);
470 }
471 src_tensor->set_ref_count(1);
472
473 isolate_graph_output_map_.insert(std::make_pair(new_tensor, src_tensor));
474
475 /* set new tensor for calculate */
476 for (auto subgraph : kernels_) {
477 /* subgraph input and output */
478 auto in_size = subgraph->in_tensors().size();
479 for (size_t i = 0; i < in_size; ++i) {
480 if (subgraph->in_tensors()[i] == src_tensor) {
481 subgraph->set_in_tensor(new_tensor, i);
482 }
483 }
484 auto out_size = subgraph->out_tensors().size();
485 for (size_t i = 0; i < out_size; ++i) {
486 if (subgraph->out_tensors()[i] == src_tensor) {
487 subgraph->set_out_tensor(new_tensor, i);
488 }
489 }
490 if (subgraph->desc().arch == kernel::kDelegate) {
491 continue;
492 }
493 /* node input and output */
494 auto nodes = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
495 auto nodes_size = nodes.size();
496 for (size_t i = 0; i < nodes_size; ++i) {
497 auto node = nodes[i];
498 out_size = node->out_tensors().size();
499 for (size_t j = 0; j < out_size; ++j) {
500 if (node->out_tensors()[j] == src_tensor) {
501 node->set_out_tensor(new_tensor, j);
502 break;
503 }
504 }
505 in_size = node->in_tensors().size();
506 for (size_t j = 0; j < in_size; ++j) {
507 if (node->in_tensors()[j] == src_tensor) {
508 node->set_in_tensor(new_tensor, j);
509 }
510 }
511 }
512 }
513 }
514
515 UpdateLinkInfoForIsolateOutput();
516 return RET_OK;
517 }
518
UpdateLinkInfoForIsolateOutput()519 void LiteSession::UpdateLinkInfoForIsolateOutput() {
520 for (auto &item : isolate_graph_output_map_) {
521 context_->ReplaceLinkInfoReceiverWithNewOne(item.first, item.second);
522 }
523 return;
524 }
525
FreePackOpWeight(const std::vector<kernel::KernelExec * > & kernels)526 void LiteSession::FreePackOpWeight(const std::vector<kernel::KernelExec *> &kernels) {
527 // For reducing runtime RAM
528 // free pack-op weight because pack-op will not access origin weight in runtime
529 for (auto *kernel : kernels) {
530 MS_ASSERT(kernel != nullptr);
531 if (kernel->subgraph_type() == kernel::kNotSubGraph) {
532 if (!IsPackedOp(static_cast<int>(kernel::SchemaType(kernel->type())))) {
533 continue;
534 }
535 } else {
536 auto subgraph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
537 FreePackOpWeight(subgraph->nodes());
538 }
539 auto inputs = kernel->in_tensors();
540 for (auto *tensor : inputs) {
541 MS_ASSERT(tensor != nullptr);
542 if (!tensor->IsConst() || tensor->ref_count() >= 1) {
543 continue;
544 }
545 tensor->FreeData();
546 }
547 }
548 }
549
MarkSharedWeight(const std::vector<kernel::KernelExec * > & kernels)550 void LiteSession::MarkSharedWeight(const std::vector<kernel::KernelExec *> &kernels) {
551 // For reducing runtime RAM
552 // free pack-op weight because pack-op will not access origin weight in runtime
553 for (auto *kernel : kernels) {
554 MS_ASSERT(kernel != nullptr);
555 if (kernel->subgraph_type() == kernel::kNotSubGraph) {
556 if (IsPackedOp(static_cast<int>(kernel::SchemaType(kernel->type())))) {
557 continue;
558 }
559 } else {
560 auto subgraph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
561 MarkSharedWeight(subgraph->nodes());
562 }
563 auto inputs = kernel->in_tensors();
564 for (auto *tensor : inputs) {
565 MS_ASSERT(tensor != nullptr);
566 if (tensor->IsConst()) {
567 tensor->IncRefCount();
568 }
569 }
570 }
571 }
572
CompileGraph(Model * model)573 int LiteSession::CompileGraph(Model *model) {
574 auto ret = PreCheck(model);
575 if (ret != RET_OK) {
576 MS_LOG(ERROR) << "schedule check failed: " << ret;
577 is_running_.store(false);
578 return ret;
579 }
580
581 if (model->model_type_ != ModelType_MSLite) {
582 ret = reinterpret_cast<AbstractBaseModel *>(model)->ConvertTensors(&this->tensors_);
583 } else {
584 // Convert to abstract base model interface
585 ret = ConvertTensors(model);
586 context_->set_schema_version(reinterpret_cast<LiteModel *>(model)->GetSchemaVersion());
587 }
588 if (ret != RET_OK) {
589 MS_LOG(ERROR) << "ConvertTensors failed: " << ret;
590 is_running_.store(false);
591 return ret;
592 }
593 ret = lite::PackWeightManager::GetInstance()->StoreOriginTensorData(model, &tensors_);
594 if (ret != RET_OK) {
595 MS_LOG(ERROR) << "StoreOriginTensorData failed.";
596 is_running_.store(false);
597 return RET_ERROR;
598 }
599 InitGraphInputTensors(model);
600 InitGraphOutputTensors(model);
601
602 PackedNodePass::GetInstance().Run(model, tensors_);
603
604 // scheduler kernels
605 Scheduler scheduler(context_.get(), ms_context_, model, &tensors_, &inputs_, &outputs_, is_train_session_,
606 &is_infershape_, &is_control_flow_, &infer_along_running_, execution_plan_, delegate_,
607 delegate_device_type_);
608 scheduler.SetupSchedulerCb(std::move(sched_cb_));
609 scheduler.SetConfig(config_info_);
610 ret = scheduler.Schedule(&kernels_);
611 if (ret != RET_OK) {
612 MS_LOG(ERROR) << "Schedule kernels failed: " << ret;
613 is_running_.store(false);
614 return ret;
615 }
616 if (ms_context_->GetThreadNum() == 1 && !context_->IsCpuFloat16Enabled() && is_control_flow_) {
617 context_->DeleteThreadPool();
618 (void)context_->CreateThreadPool(is_control_flow_);
619 }
620
621 infer_along_running_ = infer_along_running_ && !is_control_flow_ && !is_train_session_ && (is_infershape_ != RET_OK);
622 InitGraphInOutTensorsMap(model);
623
624 non_tail_call_kernels_ = scheduler.NonTailCallNodes();
625
626 ret = PrepareKernels(model);
627 if (ret != RET_OK) {
628 MS_LOG(ERROR) << "Prepare kernels failed: " << ret;
629 is_running_.store(false);
630 return ret;
631 }
632
633 if (is_train_session_ || is_prepare_session_) {
634 is_running_.store(false);
635 return RET_OK;
636 }
637
638 ret = InitExecutor();
639 if (ret != RET_OK) {
640 MS_LOG(ERROR) << "InitExecutor failed: " << ret;
641 is_running_.store(false);
642 return ret;
643 }
644
645 MarkSharedWeight(kernels_);
646 FreePackOpWeight(kernels_);
647
648 infer_along_running_ = infer_along_running_ && (runtime_allocator_ == nullptr);
649 if (infer_along_running_) {
650 this->context_->set_infer_checker(InferCheckerAll);
651 }
652 is_running_.store(false);
653 return RET_OK;
654 }
655
IsIsolatedSubGraph(const kernel::KernelExec * kernel)656 bool LiteSession::IsIsolatedSubGraph(const kernel::KernelExec *kernel) {
657 auto cur_in_tensors = kernel->in_tensors();
658 for (auto cur_kernel : this->kernels_) {
659 if (cur_kernel == kernel) {
660 continue;
661 }
662 auto out_tensors = cur_kernel->out_tensors();
663 for (auto tensor : cur_in_tensors) {
664 if (IsContain(out_tensors, tensor)) {
665 return false;
666 }
667 }
668 }
669 return true;
670 }
671
SetAllocatorForDelegateKernels(const kernel::KernelExec * kernel)672 int LiteSession::SetAllocatorForDelegateKernels(const kernel::KernelExec *kernel) {
673 if (kernel == nullptr) {
674 return RET_NULL_PTR;
675 }
676 for (auto input : kernel->in_tensors()) {
677 CHECK_NULL_RETURN(input);
678 input->set_allocator(this->context_->allocator);
679 }
680 for (auto output : kernel->out_tensors()) {
681 CHECK_NULL_RETURN(output);
682 output->set_allocator(this->context_->allocator);
683 }
684 return RET_OK;
685 }
686
CreateNNRTDelegate()687 int LiteSession::CreateNNRTDelegate() {
688 #if SUPPORT_NNRT
689 auto iter = std::find_if(context_->device_list_.begin(), context_->device_list_.end(),
690 [](DeviceContext &device) { return device.device_type_ == lite::DT_NNRT; });
691 if(iter == context_->device_list_.end()) {
692 MS_LOG(ERROR) << "Found non NNRT device info";
693 return RET_ERROR;
694 }
695
696 delegate_ = std::make_shared<NNRTDelegate>(iter->device_info_.nnrt_device_info_);
697 if (delegate_ == nullptr) {
698 MS_LOG(ERROR) << "New NNRT delegate failed";
699 return RET_ERROR;
700 }
701 delegate_device_type_ = DT_NNRT;
702 this->context_->delegate = delegate_;
703 #endif
704 return RET_OK;
705 };
706
DrawGraph(kernel::SubGraphKernel * graph)707 int LiteSession::DrawGraph(kernel::SubGraphKernel *graph) {
708 if (graph == nullptr) {
709 return RET_NULL_PTR;
710 }
711 // create and open .dot file
712 std::ofstream dotfile;
713 dotfile.open("./graph.dot", std::ios::out | std::ios::trunc);
714 if (!dotfile.is_open()) {
715 MS_LOG(ERROR) << "create or open dotfile failed.";
716 return RET_ERROR;
717 }
718 // write data to .dot file
719 dotfile << "digraph " << graph->name() << " {\n";
720 for (auto node : graph->nodes()) {
721 std::replace(node->name().begin(), node->name().end(), '/', '-');
722 // first node
723 if (node->in_kernels().empty()) {
724 dotfile << "\tinput->" << node->name();
725 dotfile << "[label=\"";
726 std::vector<int> input_shapes = node->in_tensors().front()->shape();
727 for (auto iter = input_shapes.begin(); iter != input_shapes.end(); iter++) {
728 if (iter == input_shapes.end() - 1) {
729 dotfile << *iter;
730 } else {
731 dotfile << *iter << "*";
732 }
733 }
734 dotfile << "\"]\n";
735 continue;
736 }
737
738 for (size_t i = 0; i < node->in_kernels().size(); ++i) {
739 dotfile << "\t" << node->in_kernels()[i]->name() << "->" << node->name() << "[label=\"";
740 std::vector<int32_t> in_kernel_shapes = node->in_tensors()[i]->shape();
741
742 for (auto iter = in_kernel_shapes.begin(); iter != in_kernel_shapes.end(); iter++) {
743 if (iter == in_kernel_shapes.end() - 1) {
744 dotfile << *iter;
745 } else {
746 dotfile << *iter << "*";
747 }
748 }
749 dotfile << "\"]\n";
750 }
751 // last node
752 if (node->out_kernels().empty()) {
753 dotfile << "\t" << node->name() << "->output";
754 dotfile << "[label=\"";
755 std::vector<int32_t> out_shapes = node->out_tensors().front()->shape();
756 for (auto iter = out_shapes.begin(); iter != out_shapes.end(); iter++) {
757 if (iter == out_shapes.end() - 1) {
758 dotfile << *iter;
759 } else {
760 dotfile << *iter << "*";
761 }
762 }
763 dotfile << "\"]\n";
764 }
765 }
766 dotfile.close();
767 return RET_OK;
768 }
769
SetInitRefCountOfPartialSubgraphInputs(const Model * model)770 void LiteSession::SetInitRefCountOfPartialSubgraphInputs(const Model *model) {
771 if (model == nullptr) {
772 return;
773 }
774 constexpr size_t kFirstPartialSubgraphIndex = 1U;
775 const auto &sub_graphs = model->graph_.sub_graphs_;
776 // Find out partial subgraph's inputs and set their 'init_ref_count' to INT_MAX to avoid trigger 'FreeData()'.
777 // Here start with index:1 to skip main subgraph.
778 for (size_t i = kFirstPartialSubgraphIndex; i < sub_graphs.size(); i++) {
779 for (auto index : sub_graphs[i]->input_indices_) {
780 tensors_[index]->set_init_ref_count(INT_MAX);
781 }
782 }
783 }
784
PrepareKernels(const Model * model)785 int LiteSession::PrepareKernels(const Model *model) {
786 // find kernel's in_kernels and out_kernels in every subgraph
787 kernel::KernelExecUtil::FindAllInoutKernelsInSubgraphKernel(this->kernels_);
788 // find in_kernels and out_kernels between subgraph kernels
789 kernel::KernelExecUtil::FindAllInoutKernels(this->kernels_);
790
791 // init init_ref_count for subgraphs and kernels
792 auto ret = SetTensorInitRefCount();
793 if (ret != RET_OK) {
794 MS_LOG(ERROR) << "SetTensorInitRefCount failed.";
795 return ret;
796 }
797 // When running control flow model, if partial subgraph's input is also it's output,
798 // 'init_ref_count' is not correctly initialized in 'SetTensorInitRefCount()', which would cause an error
799 // of referencing on input tensor's data_ptr after it's reset to NULL when ref_count down to 0.
800 // Here we set partial input tensor's 'init_ref_count' to INT_MAX to avoid null-filling in above case.
801 SetInitRefCountOfPartialSubgraphInputs(model);
802
803 for (auto kernel : this->kernels_) {
804 if (kernel->desc().arch == kernel::kDelegate) {
805 ret = SetAllocatorForDelegateKernels(kernel);
806 if (ret != RET_OK) {
807 MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret;
808 return ret;
809 }
810 }
811
812 if (!is_train_session_ && kernel->desc().arch != kernel::kDelegate && kernel->desc().arch != kernel::kGPU) {
813 auto subgraph_kernel = static_cast<kernel::SubGraphKernel *>(kernel);
814 if (subgraph_kernel == nullptr) {
815 MS_LOG(ERROR) << "kernel: " << kernel->name() << " not is subgraph kernel.";
816 return RET_ERROR;
817 }
818 for (auto &node : subgraph_kernel->nodes()) {
819 ret = PackKernelExec(node, tensors_);
820 if (ret != RET_OK) {
821 MS_LOG(ERROR) << "Pack KernelExec failed.";
822 return ret;
823 }
824 ret = node->Prepare();
825 if (ret != RET_OK) {
826 MS_LOG(ERROR) << "node: " << node->name() << " prepare failed.";
827 return ret;
828 }
829 }
830 }
831
832 #if (defined DEBUG) && (defined MSLITE_EXPORT_COMPUTE_IR)
833 auto subgraph_kernel = static_cast<kernel::SubGraphKernel *>(kernel);
834 ret = DrawGraph(subgraph_kernel);
835 if (ret != RET_OK) {
836 MS_LOG(ERROR) << "graph: " << kernel->name() << " draw failed.";
837 }
838 #endif
839
840 ret = kernel->Prepare();
841 if (ret != RET_OK) {
842 MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret;
843 return ret;
844 }
845 }
846 return RET_OK;
847 }
848
SetTensorInitRefCount()849 int LiteSession::SetTensorInitRefCount() {
850 for (auto *kernel : this->kernels_) {
851 kernel->InitOutTensorInitRefCount();
852 if (kernel->desc().arch == kernel::kDelegate) {
853 continue;
854 }
855 if (IsIsolatedSubGraph(kernel)) {
856 static_cast<kernel::SubGraphKernel *>(kernel)->InitInputTensorInitRefCount();
857 }
858 }
859
860 if (!non_tail_call_kernels_.empty()) {
861 return SetNonTaiCallSubgraphOutputInitRefCount();
862 }
863 return RET_OK;
864 }
865
SetNonTaiCallSubgraphOutputInitRefCount()866 int LiteSession::SetNonTaiCallSubgraphOutputInitRefCount() {
867 for (auto call_kernel : non_tail_call_kernels_) {
868 auto call_output = call_kernel->out_tensors();
869 auto all_out_subgraphs = kernel::KernelExecUtil::GetCallInputPartialsCorrespondingOutputSubgraph(call_kernel);
870 for (auto subgraph : all_out_subgraphs) {
871 MS_CHECK_TRUE_MSG(subgraph->out_tensors().size() == call_output.size(), RET_ERROR,
872 "non tail call output size is not same as subgraph output.");
873 std::set<Tensor *> subgraph_outputs_set{};
874 for (size_t i = 0; i < subgraph->out_tensors().size(); ++i) {
875 auto output = subgraph->out_tensors()[i];
876 if (subgraph_outputs_set.find(output) == subgraph_outputs_set.end()) {
877 output->set_init_ref_count(1);
878 (void)subgraph_outputs_set.insert(output);
879 } else {
880 output->set_init_ref_count(output->init_ref_count() + 1);
881 }
882 }
883 }
884 }
885 return RET_OK;
886 }
887
GetInputs() const888 std::vector<mindspore::lite::Tensor *> LiteSession::GetInputs() const { return this->input_vec_; }
889
RunGraph(const KernelCallBack & before,const KernelCallBack & after)890 int LiteSession::RunGraph(const KernelCallBack &before, const KernelCallBack &after) {
891 bool expected = false;
892 if (!is_running_.compare_exchange_strong(expected, true)) {
893 MS_LOG(ERROR) << "Not support multi-threading";
894 return RET_ERROR;
895 }
896 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
897 ParallelThreadPoolManager::GetInstance()->ActivatePool(runner_id_, worker_id_);
898 #endif
899 STATUS ret = CheckTensorsInvalid(inputs_);
900 if (MS_UNLIKELY(ret != RET_OK)) {
901 is_running_.store(false);
902 MS_LOG(ERROR) << "CheckInputs failed.";
903 return ret;
904 }
905 ret = CheckGraphInputShapes(inputs_, input_shape_map_);
906 if (MS_UNLIKELY(ret != RET_OK)) {
907 is_running_.store(false);
908 MS_LOG(ERROR) << "Check graph input shapes failed.";
909 return ret;
910 }
911 MS_ASSERT(this->context_ != nullptr);
912 ret = executor_->Run(this->inputs_, this->outputs_, this->kernels_, before, after);
913 if (MS_UNLIKELY(ret != RET_OK)) {
914 MS_LOG(ERROR) << "RunGraph failed : " << ret;
915 }
916 if (infer_along_running_) {
917 this->context_->set_infer_checker(InferCheckerInput);
918 for (auto input : inputs_) {
919 input->set_shape_changed(false);
920 }
921 }
922 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
923 ParallelThreadPoolManager::GetInstance()->SetFreePool(runner_id_, worker_id_);
924 #endif
925 is_running_.store(false);
926 return ret;
927 }
928
InitSharedThreadPool()929 int LiteSession::InitSharedThreadPool() {
930 int workers_num = -1;
931 int remaining_thread_num = -1;
932 int thread_num_limit = -1;
933 bool enable_shared_pool = false;
934 if (config_info_ != nullptr) {
935 auto runner_info_item = config_info_->find(kInnerModelParallelRunnerSection);
936 if (runner_info_item != config_info_->end()) {
937 auto item_runner = runner_info_item->second.find(kInnerRunnerIDKey);
938 if (item_runner != runner_info_item->second.end()) {
939 runner_id_ = runner_info_item->second.at(kInnerRunnerIDKey);
940 }
941 auto shared_pool_item = runner_info_item->second.find(kEnableSharedThreadPoolKey);
942 if (shared_pool_item != runner_info_item->second.end() &&
943 runner_info_item->second.at(kEnableSharedThreadPoolKey) == "true") {
944 workers_num = std::atoi(runner_info_item->second.at(kInnerWorkerNumKey).c_str());
945 remaining_thread_num = std::atoi(runner_info_item->second.at(kThreadNumRemainingPerWorkerKey).c_str());
946 thread_num_limit = std::atoi(runner_info_item->second.at(kThreadNumLimitPerWorkerKey).c_str());
947 worker_id_ = std::atoi(runner_info_item->second.at(kInnerModelIDKey).c_str());
948 enable_shared_pool = true;
949 }
950 }
951 }
952 MS_LOG(INFO) << "runner id: " << runner_id_ << " enable_shared_pool: " << enable_shared_pool
953 << " workers_num: " << workers_num << " thread_num_limit: " << thread_num_limit
954 << " remaining_thread_num: " << remaining_thread_num;
955 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
956 ParallelThreadPoolManager::GetInstance()->Init(enable_shared_pool, runner_id_, workers_num, remaining_thread_num,
957 thread_num_limit);
958 #endif
959 return RET_OK;
960 }
961
InitContext(const std::shared_ptr<InnerContext> & context)962 int LiteSession::InitContext(const std::shared_ptr<InnerContext> &context) {
963 if (context == nullptr) {
964 MS_LOG(ERROR) << "context is nullptr";
965 return RET_NULL_PTR;
966 }
967 this->context_ = context;
968 context_->SetBindRunnerId(runner_id_);
969 auto ret = this->context_->Init();
970 if (ret != RET_OK) {
971 MS_LOG(ERROR) << "Init Context failed";
972 return ret;
973 }
974
975 ms_context_ = MSContextFromContext(context);
976 if (ms_context_ == nullptr) {
977 MS_LOG(ERROR) << "transfer context to ms context failed.";
978 return RET_NULL_PTR;
979 }
980
981 #ifdef MS_COMPILE_IOS
982 context_->thread_pool_->SetMaxSpinCount(kDefaulLiteIosSpinCount);
983 context_->thread_pool_->SetMinSpinCount(kDefaulLiteIosSpinCount);
984 #endif
985
986 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
987 if (context_->inter_op_parallel_num_ > 1 && !runner_id_.empty() &&
988 ParallelThreadPoolManager::GetInstance()->GetEnableSharedThreadPool(runner_id_)) {
989 MS_LOG(INFO) << "Enable subgraph parallelism and enable thread pool sharing";
990 ParallelThreadPoolManager::GetInstance()->BindPoolToRunner(context_->thread_pool_, config_info_);
991 }
992 #endif
993
994 return RET_OK;
995 }
996
InitAscend(const std::shared_ptr<InnerContext> & context)997 int LiteSession::InitAscend(const std::shared_ptr<InnerContext> &context) {
998 #if !defined(__ANDROID__) && !defined(MS_COMPILE_OHOS)
999 if (!context->IsDeviceTypeEnabled(DT_ASCEND)) {
1000 MS_LOG(INFO) << "There is no Ascend device type.";
1001 return RET_OK;
1002 }
1003 return mindspore::AscendKernelPlugin::GetInstance().Register();
1004 #else
1005 return RET_OK;
1006 #endif
1007 }
1008
CreateTensorRTDelegate()1009 int LiteSession::CreateTensorRTDelegate() {
1010 #ifdef GPU_TENSORRT
1011 std::string cache_model_path;
1012 std::string serialize_path;
1013 size_t vocab_size = 0;
1014 size_t device_cache_size = 0;
1015 std::map<std::string, std::string> input_ranges;
1016 if (config_info_ != nullptr) {
1017 auto input_ranges_iter = config_info_->find(kGPUContextSection);
1018 if (input_ranges_iter != config_info_->end()) {
1019 input_ranges = input_ranges_iter->second;
1020 }
1021 auto ms_cache_iter = config_info_->find(kMSCacheSection);
1022 if (ms_cache_iter != config_info_->end()) {
1023 auto ms_cache = ms_cache_iter->second;
1024 auto model_path_iter = ms_cache.find(kMSCacheModelPathKey);
1025 if (model_path_iter != ms_cache.end()) {
1026 cache_model_path = model_path_iter->second;
1027 }
1028
1029 auto vocab_size_iter = ms_cache.find(kMSCacheVocabSizeKey);
1030 if (vocab_size_iter != ms_cache.end()) {
1031 auto vocab_size_opt = GenericParseValue<size_t>(vocab_size_iter->second);
1032 if (!vocab_size_opt.IsNone()) {
1033 vocab_size = vocab_size_opt.Get();
1034 }
1035 }
1036
1037 auto device_cache_size_iter = ms_cache.find(kMSCacheDeviceSizeKey);
1038 if (device_cache_size_iter != ms_cache.end()) {
1039 auto device_cache_size_opt = GenericParseValue<size_t>(device_cache_size_iter->second);
1040 if (!device_cache_size_opt.IsNone()) {
1041 device_cache_size = device_cache_size_opt.Get();
1042 }
1043 }
1044
1045 auto serialize_path_iter = ms_cache.find(kMSCacheSerializePathKey);
1046 if (serialize_path_iter != ms_cache.end()) {
1047 serialize_path = serialize_path_iter->second;
1048 }
1049 }
1050 }
1051
1052 delegate_ = std::make_shared<TensorRTDelegate>(ms_context_, cache_model_path, vocab_size, device_cache_size,
1053 serialize_path, input_ranges);
1054 if (delegate_ == nullptr) {
1055 MS_LOG(ERROR) << "New tensorrt delegate_ failed";
1056 return RET_ERROR;
1057 }
1058 delegate_device_type_ = DT_GPU;
1059 this->context_->delegate = delegate_;
1060 #endif
1061 return RET_OK;
1062 }
1063
CreateNPUDelegate()1064 int LiteSession::CreateNPUDelegate() {
1065 #ifdef SUPPORT_NPU
1066 std::string model_cache_dir;
1067 if (config_info_ != nullptr) {
1068 auto common_context_iter = config_info_->find(kCommonContextSection);
1069 if (common_context_iter != config_info_->end()) {
1070 auto common_context = common_context_iter->second;
1071 auto model_cache_dir_iter = common_context.find(kGraphCompilerCacheDirKey);
1072 if (model_cache_dir_iter != common_context.end()) {
1073 model_cache_dir = model_cache_dir_iter->second;
1074 }
1075 }
1076 }
1077 delegate_ = std::make_shared<NPUDelegate>(context_->GetDeviceInfo(DT_NPU).npu_device_info_, model_cache_dir);
1078 if (delegate_ == nullptr) {
1079 MS_LOG(ERROR) << "New delegate_ failed";
1080 return RET_ERROR;
1081 }
1082 delegate_device_type_ = DT_NPU;
1083 this->context_->delegate = delegate_;
1084 #endif
1085 return RET_OK;
1086 }
1087
CreateNNAPIDelegate()1088 int LiteSession::CreateNNAPIDelegate() {
1089 #ifdef SUPPORT_NNAPI
1090 bool enable_fp16 =
1091 context_->IsCpuFloat16Enabled() || context_->IsGpuFloat16Enabled() || context_->IsNpuFloat16Enabled();
1092 bool only_acc_device = !context_->IsDeviceTypeEnabled(DT_CPU) && !context_->IsDeviceTypeEnabled(DT_GPU) &&
1093 context_->IsDeviceTypeEnabled(DT_NPU);
1094 bool disable_cpu = !context_->IsDeviceTypeEnabled(DT_CPU);
1095 auto providers = context_->GetProviders();
1096 std::vector<std::string> specified_devices(providers.begin(), providers.end());
1097 delegate_ = std::make_shared<NNAPIDelegate>(enable_fp16, only_acc_device, disable_cpu, specified_devices);
1098 if (delegate_ == nullptr) {
1099 MS_LOG(ERROR) << "New delegate_ failed";
1100 return RET_ERROR;
1101 }
1102 this->context_->delegate = delegate_;
1103 #endif
1104 return RET_OK;
1105 }
1106
CreateCoreMLDelegate()1107 int LiteSession::CreateCoreMLDelegate() {
1108 #ifdef ENABLE_COREML
1109 delegate_ = std::make_shared<CoreMLDelegate>();
1110 if (delegate_ == nullptr) {
1111 MS_LOG(ERROR) << "New delegate_ failed";
1112 return RET_ERROR;
1113 }
1114 this->context_->delegate = delegate_;
1115 #endif
1116 return RET_OK;
1117 }
1118
InitDelegate()1119 int LiteSession::InitDelegate() {
1120 #ifndef DELEGATE_CLIP
1121 int ret = RET_OK;
1122 if (context_->delegate != nullptr) {
1123 delegate_ = context_->delegate;
1124 delegate_device_type_ = -1;
1125 } else if (context_->delegate_mode_ != kNoDelegate) {
1126 switch (context_->delegate_mode_) {
1127 case kNNAPI:
1128 ret = CreateNNAPIDelegate();
1129 break;
1130 case kCoreML:
1131 ret = CreateCoreMLDelegate();
1132 break;
1133 default:
1134 MS_LOG(ERROR) << "Unsupported built-in delegate mode: " << context_->delegate_mode_;
1135 return RET_ERROR;
1136 }
1137 } else {
1138 if (context_->IsDeviceTypeEnabled(DT_NPU)) {
1139 ret = CreateNPUDelegate();
1140 } else if (context_->IsDeviceTypeEnabled(DT_GPU)) {
1141 ret = CreateTensorRTDelegate();
1142 } else if (context_->IsDeviceTypeEnabled(DT_NNRT)) {
1143 ret = CreateNNRTDelegate();
1144 }
1145 }
1146
1147 if (ret != RET_OK) {
1148 return ret;
1149 }
1150 if (delegate_ != nullptr) {
1151 auto delegate_ret = delegate_->Init();
1152 if (delegate_ret == mindspore::kLiteNotSupport) {
1153 MS_LOG(DEBUG) << "Delegate is unsupported";
1154 delegate_.reset();
1155 delegate_ = nullptr;
1156 } else if (delegate_ret == mindspore::kSuccess) {
1157 MS_LOG(INFO) << "Delegate init successfully";
1158 } else {
1159 MS_LOG(ERROR) << "Delegate init failed";
1160 return RET_ERROR;
1161 }
1162 }
1163 #endif
1164 return RET_OK;
1165 }
1166
Init(const std::shared_ptr<InnerContext> & context)1167 int LiteSession::Init(const std::shared_ptr<InnerContext> &context) {
1168 bool expected = false;
1169 if (!is_running_.compare_exchange_strong(expected, true)) {
1170 MS_LOG(ERROR) << "Not support multi-threading";
1171 return RET_ERROR;
1172 }
1173
1174 if (!PlatformInstructionSetSupportCheck()) {
1175 MS_LOG(ERROR) << "Device not support isa";
1176 is_running_.store(false);
1177 return RET_NOT_SUPPORT;
1178 }
1179
1180 auto status = InitSharedThreadPool();
1181 if (status != RET_OK) {
1182 MS_LOG(ERROR) << "init Shared thread pool failed";
1183 is_running_.store(false);
1184 return status;
1185 }
1186 auto ret = InitContext(context);
1187 if (ret != RET_OK) {
1188 MS_LOG(ERROR) << "Init Context failed";
1189 is_running_.store(false);
1190 return ret;
1191 }
1192
1193 ret = InitAscend(context);
1194 if (ret != RET_OK) {
1195 MS_LOG(ERROR) << "Open Ascend kernel plugin failed";
1196 is_running_.store(false);
1197 return ret;
1198 }
1199
1200 ret = InitDelegate();
1201 if (ret != RET_OK) {
1202 MS_LOG(ERROR) << "Init delegate failed.";
1203 is_running_.store(false);
1204 return ret;
1205 }
1206
1207 ret = InitGPURuntime();
1208 if (ret != RET_OK) {
1209 MS_LOG(ERROR) << "Init GPU runtime failed.";
1210 is_running_.store(false);
1211 return ret;
1212 }
1213
1214 is_running_.store(false);
1215 return RET_OK;
1216 }
1217
BindThread(bool if_bind)1218 void LiteSession::BindThread(bool if_bind) {
1219 // Abandoned code
1220 // Bind thread in executor
1221 return;
1222 }
1223
~LiteSession()1224 LiteSession::~LiteSession() {
1225 delegate_.reset();
1226 bool expected = false;
1227 if (!is_running_.compare_exchange_strong(expected, true)) {
1228 MS_LOG(ERROR) << "Not support multi-threading";
1229 return;
1230 }
1231 for (auto *kernel : kernels_) {
1232 delete kernel;
1233 kernel = nullptr;
1234 }
1235 for (auto tensor : tensors_) {
1236 if (tensor == nullptr) {
1237 continue;
1238 }
1239 // Data of const tensor which doesn't own data will not freed.
1240 // Such as const data from meta_graph which will be freed when freeing meta_graph.
1241 if (tensor->IsConst() && !tensor->own_data()) {
1242 tensor->set_data(nullptr);
1243 }
1244
1245 /* situation : user set graph-output-tensor data */
1246 if (tensor->IsGraphOutput() && tensor->allocator() == nullptr) {
1247 tensor->set_data(nullptr);
1248 }
1249 delete tensor;
1250 tensor = nullptr;
1251 }
1252
1253 for (auto item : isolate_graph_output_map_) {
1254 auto isolate_output_tensor = item.first;
1255 isolate_output_tensor->set_data(nullptr);
1256 delete isolate_output_tensor;
1257 isolate_output_tensor = nullptr;
1258 }
1259
1260 for (auto map : isolate_input_map_) {
1261 auto isolate_input_tensor = map.first;
1262 isolate_input_tensor->set_data(nullptr);
1263 delete isolate_input_tensor;
1264 }
1265
1266 // Tensor * in input_map output_map are freed in tensors
1267 input_map_.clear();
1268 input_shape_map_.clear();
1269 output_node_map_.clear();
1270 output_tensor_map_.clear();
1271 input_vec_.clear();
1272 isolate_graph_output_map_.clear();
1273
1274 delete this->executor_;
1275 this->executor_ = nullptr;
1276 #ifdef GPU_OPENCL
1277 delete opencl_runtime_wrapper_;
1278 opencl_runtime_wrapper_ = nullptr;
1279 #endif
1280 delete ms_context_;
1281 ms_context_ = nullptr;
1282 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
1283 ParallelThreadPoolManager::GetInstance()->ResetParallelThreadPoolManager(runner_id_);
1284 #endif
1285 lite::PackWeightManager::GetInstance()->FreePackWeight(runner_id_, model_id_);
1286 if (model_ != nullptr && is_shared_weight_) {
1287 model_->buf = nullptr;
1288 }
1289 delete (model_);
1290 model_ = nullptr;
1291 #ifdef SUPPORT_NNRT
1292 NNRTAllocator::GetInstance()->ClearFreeList();
1293 #endif
1294 is_running_.store(false);
1295 }
1296
GetInputsByTensorName(const std::string & name) const1297 mindspore::lite::Tensor *LiteSession::GetInputsByTensorName(const std::string &name) const {
1298 auto ret = input_map_.find(name);
1299 if (ret == input_map_.end()) {
1300 MS_LOG(WARNING) << "Tensor " << name << " is not exist";
1301 return nullptr;
1302 }
1303 return ret->second;
1304 }
1305
GetOutputsByNodeName(const std::string & node_name) const1306 std::vector<mindspore::lite::Tensor *> LiteSession::GetOutputsByNodeName(const std::string &node_name) const {
1307 auto ret = output_node_map_.find(node_name);
1308 if (ret == output_node_map_.end()) {
1309 MS_LOG(WARNING) << "Node " << node_name << " is not an output node";
1310 std::vector<mindspore::lite::Tensor *> empty_ret;
1311 return empty_ret;
1312 }
1313 return ret->second;
1314 }
1315
GetOutputTensorNames() const1316 std::vector<std::string> LiteSession::GetOutputTensorNames() const { return this->output_tensor_names_; }
1317
GetOutputByTensorName(const std::string & tensor_name) const1318 mindspore::lite::Tensor *LiteSession::GetOutputByTensorName(const std::string &tensor_name) const {
1319 auto ret = output_tensor_map_.find(tensor_name);
1320 if (ret == output_tensor_map_.end()) {
1321 MS_LOG(WARNING) << "Tensor " << tensor_name << " is not an output node";
1322 return nullptr;
1323 }
1324 return ret->second;
1325 }
1326
GetOutputs() const1327 std::unordered_map<std::string, mindspore::lite::Tensor *> LiteSession::GetOutputs() const {
1328 return this->output_tensor_map_;
1329 }
1330
UpdateInputShapeMap()1331 int LiteSession::UpdateInputShapeMap() {
1332 for (auto input : inputs_) {
1333 MS_CHECK_TRUE_MSG(input != nullptr, RET_ERROR, "graph input tensor is nullptr.");
1334 if (input_shape_map_.find(input) != input_shape_map_.end()) {
1335 input_shape_map_.at(input) = input->shape();
1336 } else {
1337 MS_LOG(ERROR) << "can't find " << input->tensor_name() << " in input_shape_map";
1338 return RET_ERROR;
1339 }
1340 }
1341 return RET_OK;
1342 }
1343
ResizeInputs(const std::vector<mindspore::lite::Tensor * > & inputs,const std::vector<std::vector<int>> & dims)1344 int LiteSession::ResizeInputs(const std::vector<mindspore::lite::Tensor *> &inputs,
1345 const std::vector<std::vector<int>> &dims) {
1346 if (inputs.size() != inputs_.size()) {
1347 MS_LOG(ERROR) << "Inputs size " << inputs.size() << " is not equal to " << inputs_.size();
1348 return RET_PARAM_INVALID;
1349 }
1350
1351 if (dims.size() != inputs.size()) {
1352 MS_LOG(ERROR) << "Input dims size " << dims.size() << " is not equal to the inputs size " << inputs.size();
1353 return RET_PARAM_INVALID;
1354 }
1355
1356 for (size_t i = 0; i < inputs.size(); ++i) {
1357 if (inputs[i] != inputs_[i]) {
1358 MS_LOG(ERROR) << "Input[" << i << "] tensor is not equal to the inputs have been saved!";
1359 return RET_PARAM_INVALID;
1360 }
1361 inputs_[i]->FreeData();
1362 if (infer_along_running_ && !inputs_[i]->get_shape_changed()) {
1363 inputs_[i]->set_shape_changed(dims[i] != inputs_[i]->shape());
1364 }
1365 inputs_[i]->set_shape(dims[i]);
1366 }
1367 if (!is_train_session_) {
1368 executor_->Resize(inputs, dims);
1369 }
1370 return RET_OK;
1371 }
1372
ResetInputsShape(const std::vector<std::vector<int>> & dims)1373 void LiteSession::ResetInputsShape(const std::vector<std::vector<int>> &dims) {
1374 for (size_t i = 0; i < inputs_.size(); ++i) {
1375 inputs_[i]->FreeData();
1376 inputs_[i]->set_shape(dims[i]);
1377 inputs_[i]->set_shape_changed(false);
1378 }
1379 }
1380
ReSizeKernels(const std::vector<kernel::KernelExec * > & kernels,const std::unordered_map<Tensor *,Tensor * > & isolate_input_map)1381 int LiteSession::ReSizeKernels(const std::vector<kernel::KernelExec *> &kernels,
1382 const std::unordered_map<Tensor *, Tensor *> &isolate_input_map) {
1383 for (auto kernel : kernels) {
1384 if (kernel == nullptr) {
1385 MS_LOG(ERROR) << "input kernel is nullptr!";
1386 return RET_ERROR;
1387 }
1388 auto ret = RET_OK;
1389 if (kernel->desc().arch == kernel::kDelegate) {
1390 ret = kernel->ReSize();
1391 } else {
1392 // resize subgraph inputs
1393 auto sub_graph_kernel = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
1394 for (auto input : sub_graph_kernel->in_tensors()) {
1395 if (isolate_input_map.find(input) != isolate_input_map.end()) {
1396 input->set_shape(isolate_input_map.at(input)->shape());
1397 }
1398 }
1399 if (kernel->subgraph_type() == kernel::kGpuFp16SubGraph || kernel->subgraph_type() == kernel::kGpuFp32SubGraph) {
1400 #ifdef GPU_OPENCL
1401 auto sub_graph = reinterpret_cast<kernel::OpenCLSubGraph *>(kernel);
1402 ret = sub_graph->ReSize();
1403 #endif
1404 } else {
1405 auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
1406 ret = sub_graph->ReSize();
1407 }
1408 }
1409 if (ret == RET_INFER_INVALID) {
1410 MS_LOG(INFO) << "InferShape is interrupted";
1411 continue;
1412 }
1413 if (ret != RET_OK) {
1414 MS_LOG(ERROR) << "ReSize node " << kernel->name() << " failed";
1415 return RET_ERROR;
1416 }
1417 }
1418 return RET_OK;
1419 }
1420
SynIsolateInOutputDataType()1421 void LiteSession::SynIsolateInOutputDataType() {
1422 for (auto &tensor_map : isolate_input_map_) {
1423 auto dst_tensor = tensor_map.second;
1424 auto src_tensor = tensor_map.first;
1425
1426 src_tensor->set_data_type(dst_tensor->data_type());
1427 }
1428
1429 for (auto &tensor_map : isolate_graph_output_map_) {
1430 auto dst_tensor = tensor_map.second;
1431 auto src_tensor = tensor_map.first;
1432
1433 src_tensor->set_data_type(dst_tensor->data_type());
1434 }
1435 }
1436
BindGLTexture2DMemory(const std::map<std::string,unsigned int> & inputGLTexture,std::map<std::string,unsigned int> * outputGLTexture)1437 int LiteSession::BindGLTexture2DMemory(const std::map<std::string, unsigned int> &inputGLTexture,
1438 std::map<std::string, unsigned int> *outputGLTexture) {
1439 #ifdef GPU_OPENCL
1440 if (!this->context_->GetDeviceInfo(DT_GPU).gpu_device_info_.enable_gl_texture_) {
1441 MS_LOG(ERROR) << "the context isn't set to support OpenGL texture";
1442 return RET_ERROR;
1443 }
1444 for (const auto &[name, GLTexture_id] : inputGLTexture) {
1445 auto iter = input_map_.find(name);
1446 if (iter == input_map_.end()) {
1447 MS_LOG(ERROR) << "the in tensor name " << name << "is not match any model input name";
1448 return RET_ERROR;
1449 }
1450 auto in_data = iter->second->MutableData();
1451 if (in_data == nullptr) {
1452 std::cout << "MallocData for input Tensor failed" << std::endl;
1453 return RET_ERROR;
1454 }
1455 memcpy(in_data, &GLTexture_id, sizeof(cl_GLuint));
1456 iter->second->set_data_type(kNumberTypeGLUInt);
1457 }
1458 for (auto [name, GLTexture_id] : *outputGLTexture) {
1459 auto iter = output_tensor_map_.find(name);
1460 if (iter == output_tensor_map_.end()) {
1461 MS_LOG(ERROR) << "the out tensor name " << name << "is not match any model output name";
1462 return RET_ERROR;
1463 }
1464 auto out_data = iter->second->MutableData();
1465 if (out_data == nullptr) {
1466 std::cout << "MallocData for input Tensor failed" << std::endl;
1467 return RET_ERROR;
1468 }
1469 memcpy(out_data, &GLTexture_id, sizeof(cl_GLuint));
1470 iter->second->set_data_type(kNumberTypeGLUInt);
1471 }
1472
1473 #ifdef ENABLE_MINDRT
1474 SynIsolateInOutputDataType(); // Synchronized input/output with isolate input/output data types
1475 #endif
1476
1477 if (this->kernels_.size() != 1) {
1478 MS_LOG(ERROR) << "Now only support one opencl subgraph if you want to input opengl texture";
1479 return RET_ERROR;
1480 }
1481 auto opencl_subgraph = reinterpret_cast<kernel::OpenCLSubGraph *>(kernels_.front());
1482 for (size_t i = 0; i < outputs_.size(); i++) {
1483 (opencl_subgraph)->set_out_tensor(outputs_[i], i);
1484 }
1485 for (auto node : opencl_subgraph->out_nodes()) {
1486 node->set_out_tensors(opencl_subgraph->out_tensors());
1487 }
1488 #endif
1489 return RET_OK;
1490 }
1491
Resize(const std::vector<mindspore::lite::Tensor * > & inputs,const std::vector<std::vector<int>> & dims)1492 int LiteSession::Resize(const std::vector<mindspore::lite::Tensor *> &inputs,
1493 const std::vector<std::vector<int>> &dims) {
1494 bool expected = false;
1495 if (!is_running_.compare_exchange_strong(expected, true)) {
1496 MS_LOG(ERROR) << "Not support multi-threading";
1497 return RET_ERROR;
1498 }
1499 std::vector<std::vector<int>> old_dims;
1500 for (size_t i = 0; i < inputs_.size(); ++i) {
1501 old_dims.push_back(inputs_[i]->shape());
1502 }
1503 auto ret = ResizeInputs(inputs, dims);
1504 if (ret != RET_OK) {
1505 ResetInputsShape(old_dims);
1506 is_running_.store(false);
1507 return ret;
1508 }
1509 ret = UpdateInputShapeMap();
1510 if (ret != RET_OK) {
1511 MS_LOG(ERROR) << "update input shape map failed.";
1512 return RET_ERROR;
1513 }
1514 if (infer_along_running_) {
1515 is_running_.store(false);
1516 return ret;
1517 }
1518
1519 ret = ReSizeKernels(kernels_, isolate_input_map_);
1520 if (ret != RET_OK) {
1521 ResetInputsShape(old_dims);
1522 auto resize_ret = ReSizeKernels(kernels_);
1523 if (resize_ret != RET_OK) {
1524 MS_LOG(ERROR) << "restore kernel size fail!ret: " << resize_ret;
1525 }
1526 is_running_.store(false);
1527 return ret;
1528 }
1529
1530 auto status = GraphOptimizePass(&kernels_);
1531 if (status != RET_OK) {
1532 MS_LOG(ERROR) << "GraphOptimizePass failed.";
1533 return RET_ERROR;
1534 }
1535
1536 is_running_.store(false);
1537 return RET_OK;
1538 }
1539
PreCheck(Model * model)1540 int LiteSession::PreCheck(Model *model) {
1541 bool expected = false;
1542 if (!is_running_.compare_exchange_strong(expected, true)) {
1543 MS_LOG(ERROR) << "Not support multi-threading";
1544 return RET_ERROR;
1545 }
1546 if (model == nullptr) {
1547 MS_LOG(ERROR) << "The input model is nullptr.";
1548 return RET_PARAM_INVALID;
1549 }
1550 if (model->buf == nullptr) {
1551 MS_LOG(ERROR) << "The input model buf is nullptr.";
1552 return RET_PARAM_INVALID;
1553 }
1554 if (model->model_type_ != ModelType_MSLite) {
1555 // abstract base model
1556 if (!reinterpret_cast<AbstractBaseModel *>(model)->ModelVerify()) {
1557 MS_LOG(ERROR) << "wrong model input, please check";
1558 return RET_ERROR;
1559 }
1560 } else {
1561 // old routine, convert to abstract base model
1562 if (!reinterpret_cast<LiteModel *>(model)->ModelVerify()) {
1563 MS_LOG(ERROR) << "wrong model input, please check";
1564 return RET_ERROR;
1565 }
1566 }
1567
1568 #ifndef ENABLE_FP16
1569 if (context_->GetDeviceInfo(DT_CPU).cpu_device_info_.enable_float16_) {
1570 MS_LOG(WARNING) << unsupport_fp16_log;
1571 }
1572 #endif
1573 return RET_OK;
1574 }
1575
InitExecutor()1576 int LiteSession::InitExecutor() {
1577 int ret;
1578 #ifdef ENABLE_MINDRT
1579 if (ms_context_->GetThreadNum() == 1 && !context_->IsCpuFloat16Enabled() && !is_control_flow_) {
1580 executor_ = new (std::nothrow) Executor();
1581 } else {
1582 ret = IsolateOutputTensor();
1583 if (ret != RET_OK) {
1584 MS_LOG(ERROR) << "Isolate output tensor failed.";
1585 return ret;
1586 }
1587 executor_ = new (std::nothrow) MindrtExecutor(&isolate_graph_output_map_, &isolate_input_map_);
1588 }
1589 #else
1590 executor_ = new (std::nothrow) Executor();
1591 #endif
1592 if (executor_ == nullptr) {
1593 MS_LOG(ERROR) << "New Executor failed";
1594 return RET_ERROR;
1595 }
1596
1597 ret = executor_->Prepare(kernels_, inputs_, outputs_, context_.get());
1598 if (ret != RET_OK) {
1599 MS_LOG(ERROR) << "Prepare executor failed: " << ret;
1600 return ret;
1601 }
1602 return RET_OK;
1603 }
1604
RuntimeAllocatorValid()1605 int LiteSession::RuntimeAllocatorValid() {
1606 #ifdef ENABLE_ARM32
1607 MS_LOG(DEBUG) << "Not support runtime allocator in arm32.";
1608 return RET_ERROR;
1609 #endif
1610
1611 #ifndef ENABLE_MINDRT
1612 MS_LOG(DEBUG) << "Not support runtime allocator in converter.";
1613 return RET_ERROR;
1614 #endif
1615
1616 #ifdef BFC_MEMORY
1617 MS_LOG(DEBUG) << "Not support runtime allocator when BFC_MEMORY on.";
1618 return RET_ERROR;
1619 #endif
1620
1621 if ((context_->enable_parallel_ == true) || (context_->inter_op_parallel_num_ > 1)) {
1622 MS_LOG(DEBUG) << "Not support runtime allocator in subgraph parallel.";
1623 return RET_ERROR;
1624 }
1625 if (is_train_session_ == true) {
1626 MS_LOG(DEBUG) << "Not support runtime allocator in train session.";
1627 return RET_ERROR;
1628 }
1629 if (is_infershape_ != RET_OK) {
1630 MS_LOG(DEBUG) << "Not support runtime allocator in runtime-infershape.";
1631 return RET_ERROR;
1632 }
1633 #ifdef ENABLE_MINDRT
1634 if (kernels_.size() != 1) {
1635 MS_LOG(DEBUG) << "Not support runtime allocator in random subgraph sort";
1636 return RET_ERROR;
1637 }
1638 #endif
1639 #ifdef ENABLE_ARM64
1640 MS_LOG(DEBUG) << "support runtime allocator.";
1641 return RET_OK;
1642 #endif
1643 return RET_ERROR;
1644 }
1645
RuntimeAllocatorInitGraphOutput()1646 void LiteSession::RuntimeAllocatorInitGraphOutput() {
1647 AllocatorPtr default_allocator = context_->allocator;
1648 for (auto graph_out : isolate_graph_output_map_) {
1649 auto cal_t = graph_out.first;
1650 auto out_t = graph_out.second;
1651 if (cal_t->allocator() != runtime_allocator_ || out_t->allocator() != default_allocator) {
1652 continue;
1653 }
1654 out_t->set_allocator(runtime_allocator_);
1655 if (cal_t->data_type() != out_t->data_type()) {
1656 runtime_allocator_->MallocTensorData(out_t);
1657 }
1658 }
1659 return;
1660 }
1661
RuntimeAllocatorInitSubgraphInputs(const kernel::KernelExec * subgraph,const AllocatorPtr & default_allocator,const RuntimeAllocatorPtr & runtime_allocator,const std::unordered_map<Tensor *,Tensor * > & isolate_input_map,std::unordered_map<Tensor *,int> * tensor_ref_count,std::unordered_map<size_t,int> * data_ref_count)1662 void RuntimeAllocatorInitSubgraphInputs(const kernel::KernelExec *subgraph, const AllocatorPtr &default_allocator,
1663 const RuntimeAllocatorPtr &runtime_allocator,
1664 const std::unordered_map<Tensor *, Tensor *> &isolate_input_map,
1665 std::unordered_map<Tensor *, int> *tensor_ref_count,
1666 std::unordered_map<size_t, int> *data_ref_count) {
1667 MS_ASSERT(subgraph != nullptr && tensor_ref_count != nullptr && data_ref_count != nullptr);
1668 for (auto in_tensor : subgraph->in_tensors()) {
1669 auto iter = isolate_input_map.find(in_tensor);
1670 if (isolate_input_map.end() == iter) break;
1671 auto src_t = iter->second;
1672
1673 if (src_t->data_type() == in_tensor->data_type()) {
1674 in_tensor->set_allocator(src_t->allocator());
1675 if (src_t->allocator() == runtime_allocator) {
1676 (*tensor_ref_count)[in_tensor] = in_tensor->init_ref_count();
1677 (*data_ref_count)[runtime_allocator->GetOffsetMap().at(src_t)] += in_tensor->init_ref_count();
1678 runtime_allocator->SetDataOffset(in_tensor, runtime_allocator->GetOffsetMap().at(src_t));
1679 }
1680 } else {
1681 if (in_tensor->allocator() == default_allocator) {
1682 in_tensor->set_allocator(runtime_allocator);
1683 runtime_allocator->MallocTensorData(in_tensor);
1684 (*tensor_ref_count)[in_tensor] = in_tensor->init_ref_count();
1685 (*data_ref_count)[runtime_allocator->GetOffsetMap().at(in_tensor)] = in_tensor->init_ref_count();
1686 }
1687 }
1688
1689 if (src_t->allocator() != runtime_allocator) {
1690 continue;
1691 }
1692
1693 (*tensor_ref_count)[src_t]--;
1694 (*data_ref_count)[runtime_allocator->GetOffsetMap().at(src_t)]--;
1695
1696 if ((*tensor_ref_count)[src_t] <= 0) {
1697 if ((*data_ref_count)[runtime_allocator->GetOffsetMap().at(src_t)] <= 0) {
1698 runtime_allocator->FreeTensorData(src_t);
1699 }
1700 }
1701 }
1702 }
1703
RuntimeAllocatorInitSubgraph()1704 void LiteSession::RuntimeAllocatorInitSubgraph() {
1705 AllocatorPtr default_allocator = context_->allocator;
1706 std::unordered_map<lite::Tensor *, int> tensor_ref_count;
1707 std::unordered_map<size_t, int> data_ref_count;
1708
1709 for (auto subgraph : kernels_) {
1710 if (subgraph->desc().arch != kernel::KERNEL_ARCH::kCPU) {
1711 continue;
1712 }
1713
1714 RuntimeAllocatorInitSubgraphInputs(subgraph, default_allocator, runtime_allocator_, isolate_input_map_,
1715 &tensor_ref_count, &data_ref_count);
1716
1717 auto kernel_list = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
1718 for (auto kernel : kernel_list) {
1719 /* malloc for output */
1720 for (auto tensor : kernel->out_tensors()) {
1721 if (tensor->allocator() != default_allocator || tensor->IsConst()) {
1722 continue;
1723 }
1724 tensor->set_allocator(runtime_allocator_);
1725 runtime_allocator_->MallocTensorData(tensor);
1726 tensor_ref_count[tensor] = tensor->init_ref_count();
1727 data_ref_count[runtime_allocator_->GetOffsetMap().at(tensor)] = tensor->init_ref_count();
1728 }
1729
1730 /* free input after run */
1731 for (auto tensor : kernel->in_tensors()) {
1732 if (tensor->allocator() != runtime_allocator_) {
1733 continue;
1734 }
1735 tensor_ref_count[tensor]--;
1736 data_ref_count[runtime_allocator_->GetOffsetMap().at(tensor)]--;
1737
1738 if (tensor_ref_count[tensor] <= 0 && tensor->allocator() == runtime_allocator_) {
1739 if (data_ref_count[runtime_allocator_->GetOffsetMap().at(tensor)] <= 0) {
1740 runtime_allocator_->FreeTensorData(tensor);
1741 }
1742 }
1743 }
1744 }
1745 }
1746 return;
1747 }
1748
InitRuntimeAllocator()1749 int LiteSession::InitRuntimeAllocator() {
1750 if (RuntimeAllocatorValid() != RET_OK) {
1751 return RET_OK;
1752 }
1753 if (ExistCustomCpuKernel()) {
1754 return RET_OK;
1755 }
1756 if (runtime_allocator_ == nullptr) {
1757 runtime_allocator_ = std::shared_ptr<RuntimeAllocator>(new (std::nothrow) RuntimeAllocator());
1758 } else {
1759 runtime_allocator_->Clear(context_->allocator);
1760 }
1761 if (runtime_allocator_ == nullptr) {
1762 MS_LOG(ERROR) << "RuntimeAllocator is null.";
1763 return RET_ERROR;
1764 }
1765
1766 RuntimeAllocatorInitSubgraph();
1767
1768 RuntimeAllocatorInitGraphOutput();
1769
1770 auto ret = RuntimeAllocatorSetData();
1771 if (ret != RET_OK) {
1772 MS_LOG(ERROR) << "using optimize allocator failed.";
1773 return ret;
1774 }
1775 return RET_OK;
1776 }
1777
RuntimeAllocatorSetData()1778 int LiteSession::RuntimeAllocatorSetData() {
1779 void *data = runtime_allocator_->MallocOptData();
1780 if (data == nullptr) {
1781 MS_LOG(ERROR) << "malloc optimize data failed.";
1782 return RET_ERROR;
1783 }
1784 int8_t *int8_data = reinterpret_cast<int8_t *>(data);
1785 auto offset_map = runtime_allocator_->GetOffsetMap();
1786
1787 for (auto &iter : offset_map) {
1788 auto tensor = iter.first;
1789 if (tensor->allocator() != runtime_allocator_) {
1790 return RET_ERROR;
1791 }
1792 tensor->set_data(int8_data + iter.second);
1793 }
1794 return RET_OK;
1795 }
1796
InitGPURuntime()1797 int LiteSession::InitGPURuntime() {
1798 if (context_->IsDeviceTypeEnabled(DT_CPU)) {
1799 CpuBindMode cpu_bind_mode = context_->GetDeviceInfo(DT_CPU).cpu_device_info_.cpu_bind_mode_;
1800 ThreadPool *thread_pool = this->context_->thread_pool_;
1801 if (thread_pool != nullptr) {
1802 thread_pool->SetProcessAffinity(static_cast<BindMode>(cpu_bind_mode));
1803 }
1804 }
1805 #ifdef GPU_OPENCL
1806 if (this->context_->IsDeviceTypeEnabled(DT_GPU)) {
1807 opencl_runtime_wrapper_ = new (std::nothrow) opencl::OpenCLRuntimeInnerWrapper();
1808 if (opencl_runtime_wrapper_ == nullptr) {
1809 MS_LOG(ERROR) << "create OpenCLRuntimeInnerWrapper failed";
1810 return RET_ERROR;
1811 }
1812 const auto &gpu_device_info = this->context_->GetDeviceInfo(DT_GPU).gpu_device_info_;
1813 auto opencl_runtime = opencl_runtime_wrapper_->GetInstance();
1814 opencl_runtime->SetGLTextureEnable(gpu_device_info.enable_gl_texture_);
1815 opencl_runtime->SetGLContext(gpu_device_info.gl_context_);
1816 opencl_runtime->SetGLDisplay(gpu_device_info.gl_display_);
1817 if (opencl_runtime->Init() != RET_OK) {
1818 if (gpu_device_info.enable_gl_texture_) {
1819 MS_LOG(ERROR) << "Init OpenCL runtime failed, enable_gl_texture set true, only support GPU mode.";
1820 return RET_ERROR;
1821 }
1822 this->context_->device_list_ = {{DT_CPU, {gpu_device_info.enable_float16_, MID_CPU}}};
1823 MS_LOG(WARNING) << "Init OpenCL runtime failed, change to CPU mode.";
1824 } else {
1825 MS_LOG(INFO) << "Init OpenCL runtime success.";
1826 }
1827
1828 opencl_runtime->SetFp16Enable(gpu_device_info.enable_float16_);
1829
1830 /* check chip support shared memory */
1831 auto enable_arm_import_memory = opencl_runtime->isExtensionEnable(EXT_ARM_IMPORT_MEMORY_HOST);
1832 if (!enable_arm_import_memory) {
1833 MS_LOG(WARNING) << "GPU do not support shared memory!";
1834 }
1835 }
1836 #endif
1837 // Setting the binding core will affect the opencl drive scheduling.
1838 if (context_->IsDeviceTypeEnabled(DT_CPU)) {
1839 ThreadPool *thread_pool = this->context_->thread_pool_;
1840 if (thread_pool != nullptr) {
1841 thread_pool->SetProcessAffinity(static_cast<BindMode>(NO_BIND));
1842 }
1843 }
1844 return RET_OK;
1845 }
1846 } // namespace lite
1847
CreateSession(const std::shared_ptr<InnerContext> & context)1848 lite::LiteSession *lite::LiteSession::CreateSession(const std::shared_ptr<InnerContext> &context) {
1849 auto session = new (std::nothrow) lite::LiteSession();
1850 if (session == nullptr) {
1851 MS_LOG(ERROR) << "create session failed";
1852 return nullptr;
1853 }
1854 auto ret = session->Init(context);
1855 if (ret != mindspore::lite::RET_OK) {
1856 MS_LOG(ERROR) << "init session failed";
1857 delete session;
1858 return nullptr;
1859 }
1860 return session;
1861 }
1862
CreateSession(const char * model_buf,size_t size,const std::shared_ptr<InnerContext> & context)1863 lite::LiteSession *lite::LiteSession::CreateSession(const char *model_buf, size_t size,
1864 const std::shared_ptr<InnerContext> &context) {
1865 auto *session = lite::LiteSession::CreateSession(context);
1866 if (session == nullptr) {
1867 MS_LOG(ERROR) << "Create session failed";
1868 return nullptr;
1869 }
1870 auto ret = reinterpret_cast<lite::LiteSession *>(session)->LoadModelAndCompileByBuf(
1871 model_buf, mindspore::ModelType::kMindIR_Lite, size);
1872 if (ret != RET_OK) {
1873 MS_LOG(ERROR) << "Init session failed";
1874 delete session;
1875 return nullptr;
1876 }
1877 return session;
1878 }
1879
LoadModelByBuff(const char * model_buf,const size_t & buf_size,char ** lite_buf,size_t * size,mindspore::ModelType model_type)1880 mindspore::ModelType lite::LiteSession::LoadModelByBuff(const char *model_buf, const size_t &buf_size, char **lite_buf,
1881 size_t *size, mindspore::ModelType model_type) {
1882 if (model_type == mindspore::ModelType::kMindIR_Lite) {
1883 *size = buf_size;
1884 *lite_buf = const_cast<char *>(model_buf);
1885 return mindspore::ModelType::kMindIR_Lite;
1886 }
1887
1888 if (model_type != mindspore::ModelType::kMindIR) {
1889 return mindspore::ModelType::kUnknownType;
1890 }
1891
1892 flatbuffers::Verifier verify((const uint8_t *)model_buf, buf_size, INT32_MAX, INT32_MAX);
1893 auto version_verify = lite::LiteModel::VersionVerify(&verify);
1894 if (version_verify != SCHEMA_INVALID) {
1895 MS_LOG(DEBUG) << "The kMindIR type model buffer is valid mslite model buffer";
1896 *size = buf_size;
1897 *lite_buf = const_cast<char *>(model_buf);
1898 return mindspore::ModelType::kMindIR_Lite;
1899 }
1900 MS_LOG(WARNING) << "Invalid mslite model.";
1901
1902 #ifdef RUNTIME_CONVERT
1903 *lite_buf = RuntimeConvert(model_buf, buf_size, size, ms_context_);
1904 #else
1905 MS_LOG(WARNING) << "Please enable runtime convert.";
1906 #endif
1907 #ifdef ENABLE_CLOUD_FUSION_INFERENCE
1908 *size = buf_size;
1909 *lite_buf = const_cast<char *>(model_buf);
1910 #endif
1911 return mindspore::ModelType::kMindIR;
1912 }
1913
LoadModelByPath(const std::string & file,mindspore::ModelType model_type,size_t * size,bool use_mmap)1914 const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspore::ModelType model_type, size_t *size,
1915 bool use_mmap) {
1916 size_t buf_size;
1917 char *model_buf;
1918 if (use_mmap) {
1919 model_buf = reinterpret_cast<char *>(lite::ReadFileByMmap(file.c_str(), &buf_size));
1920 } else {
1921 model_buf = lite::ReadFile(file.c_str(), &buf_size);
1922 }
1923 if (model_buf == nullptr) {
1924 MS_LOG(ERROR) << "The model path is invalid";
1925 return model_buf;
1926 }
1927
1928 char *lite_buf = nullptr;
1929 auto buf_model_type = LoadModelByBuff(model_buf, buf_size, &lite_buf, size, model_type);
1930 if (buf_model_type == mindspore::ModelType::kUnknownType || lite_buf == nullptr) {
1931 if (use_mmap) {
1932 lite::UnmapMmapBuffer(const_cast<void *>(static_cast<const void *>(model_buf)), buf_size);
1933 } else {
1934 delete[] model_buf;
1935 }
1936 model_buf = nullptr;
1937 return nullptr;
1938 }
1939
1940 return lite_buf;
1941 }
1942
ParseWeightPath()1943 std::string lite::LiteSession::ParseWeightPath() {
1944 std::string weight_path = "";
1945 if (config_info_ != nullptr) {
1946 auto ms_weight = config_info_->find(kConfigModelFileSection);
1947 if (ms_weight != config_info_->end()) {
1948 auto ms_weight_iter = ms_weight->second;
1949 if (ms_weight_iter.find(kConfigMindIRPathKey) != ms_weight_iter.end()) {
1950 weight_path = ms_weight_iter[kConfigMindIRPathKey];
1951 }
1952 }
1953 }
1954 return weight_path;
1955 }
1956
ReshapeWeightTensor(lite::Tensor * orig_tensor,lite::Tensor * new_tensor)1957 int lite::LiteSession::ReshapeWeightTensor(lite::Tensor *orig_tensor, lite::Tensor *new_tensor) {
1958 if (orig_tensor->data_type() != new_tensor->data_type()) {
1959 MS_LOG(ERROR) << "Cannot reshape tensor of different type: " << new_tensor->tensor_name();
1960 return RET_PARAM_INVALID;
1961 }
1962
1963 if (orig_tensor->category() != lite::Category::CONST_TENSOR) {
1964 MS_LOG(ERROR) << "Cannot reshape non const tensor: " << new_tensor->tensor_name();
1965 return RET_ERROR;
1966 }
1967
1968 auto orig_size = orig_tensor->Size();
1969 uint8_t *new_data = reinterpret_cast<uint8_t *>(new_tensor->data());
1970 if (new_data == nullptr) {
1971 // Copy original data into new_tensor
1972 new_data = reinterpret_cast<uint8_t *>(new_tensor->MutableData());
1973 if (new_data == nullptr) {
1974 MS_LOG(ERROR) << "Allocation of Data Failed" << new_tensor->tensor_name();
1975 return RET_ERROR;
1976 }
1977 if (orig_size == 0) {
1978 MS_LOG(ERROR) << "Operation failed: Both new tensors and original one have no data";
1979 return RET_ERROR;
1980 }
1981 uint8_t *orig_data = reinterpret_cast<uint8_t *>(orig_tensor->data());
1982 for (unsigned int loc = 0; loc < new_tensor->Size(); loc++) {
1983 new_data[loc] = orig_data[loc % orig_size];
1984 }
1985 }
1986
1987 if (orig_tensor->shape() != new_tensor->shape()) {
1988 orig_tensor->FreeData();
1989 orig_tensor->set_data(nullptr);
1990 orig_tensor->set_shape(new_tensor->shape());
1991 }
1992
1993 uint8_t *dst_data = reinterpret_cast<uint8_t *>(orig_tensor->MutableData());
1994 if (dst_data == nullptr) {
1995 MS_LOG(ERROR) << "Allocation of Data Failed";
1996 return RET_ERROR;
1997 }
1998 std::copy(new_data, new_data + orig_tensor->Size(), dst_data);
1999 return RET_OK;
2000 }
2001
UpdateWeights(std::vector<lite::Tensor * > modify_tensors)2002 int lite::LiteSession::UpdateWeights(std::vector<lite::Tensor *> modify_tensors) {
2003 unsigned int num_of_found_tensors = 0;
2004 for (auto modify : modify_tensors) {
2005 if (modify == nullptr) {
2006 MS_LOG(ERROR) << "Tensor is nullptr";
2007 return RET_PARAM_INVALID;
2008 }
2009 for (auto tensor : tensors_) {
2010 if (modify->tensor_name() == tensor->tensor_name()) {
2011 if (tensor->Size() != modify->Size()) {
2012 model_buff_changed_ = true;
2013 }
2014 auto ret = ReshapeWeightTensor(tensor, modify);
2015 num_of_found_tensors++;
2016 if (ret != RET_OK) {
2017 model_buff_changed_ = false;
2018 return ret;
2019 }
2020 break;
2021 }
2022 }
2023 }
2024 if (num_of_found_tensors != modify_tensors.size()) {
2025 MS_LOG(ERROR) << "Did not find all the given tensors in the model";
2026 return RET_ERROR;
2027 }
2028 auto ret = ReSizeKernels(kernels_);
2029 if (ret != RET_OK) {
2030 MS_LOG(ERROR) << "Resize kernels fail!";
2031 model_buff_changed_ = false;
2032 return ret;
2033 }
2034
2035 bool is_eval = IsEval();
2036 if (is_eval) {
2037 ret = Eval();
2038 }
2039 return ret;
2040 }
2041
2042 #ifdef ENABLE_LITE_HELPER
LoadModelAndCompileByBuf(const char * model_buf,mindspore::ModelType model_type,const size_t & buf_size,mindspore::infer::helper::InferHelpers * infer_helpers)2043 int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, mindspore::ModelType model_type,
2044 const size_t &buf_size,
2045 mindspore::infer::helper::InferHelpers *infer_helpers) {
2046 #else
2047 int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, mindspore::ModelType model_type,
2048 const size_t &buf_size) {
2049 #endif
2050 auto status = lite::PackWeightManager::GetInstance()->InitPackWeightManager(model_buf, buf_size, &model_id_,
2051 &runner_id_, config_info_);
2052 if (status != RET_OK) {
2053 MS_LOG(ERROR) << "InitPackWeightByBuf failed.";
2054 return RET_ERROR;
2055 }
2056 auto new_model_buf =
2057 lite::PackWeightManager::GetInstance()->GetSharedModelBuf(model_buf, model_id_, config_info_, &is_shared_weight_);
2058 if (new_model_buf == nullptr) {
2059 MS_LOG(ERROR) << "get shared model buf is nullptr.";
2060 return RET_ERROR;
2061 }
2062 size_t lite_buf_size = 0;
2063 char *lite_buf = nullptr;
2064 auto buf_model_type = LoadModelByBuff(new_model_buf, buf_size, &lite_buf, &lite_buf_size, model_type);
2065 if (buf_model_type == mindspore::ModelType::kUnknownType || lite_buf == nullptr) {
2066 MS_LOG(ERROR) << "Invalid model_buf";
2067 return RET_ERROR;
2068 }
2069 auto weight_path = ParseWeightPath();
2070 #ifdef ENABLE_LITE_HELPER
2071 auto *model = lite::ImportFromBuffer(lite_buf, lite_buf_size, true, model_type, weight_path, infer_helpers);
2072 #else
2073 auto *model = lite::ImportFromBuffer(lite_buf, lite_buf_size, true, model_type, weight_path);
2074 #endif
2075 if (model == nullptr) {
2076 MS_LOG(ERROR) << "Import model failed";
2077 return RET_ERROR;
2078 }
2079 (reinterpret_cast<lite::LiteModel *>(model))->set_keep_model_buf(keep_model_buf_);
2080 auto ret = CompileGraph(model);
2081 model->buf = nullptr;
2082 if (ret != lite::RET_OK) {
2083 MS_LOG(ERROR) << "Compile model failed";
2084 delete model;
2085 return RET_ERROR;
2086 }
2087 set_model(model);
2088 return RET_OK;
2089 }
2090
2091 int lite::LiteSession::LoadModelAndCompileByPath(const std::string &model_path, mindspore::ModelType model_type) {
2092 size_t model_size;
2093 bool use_mmap = IsMmapEnable();
2094 auto model_buf = LoadModelByPath(model_path, model_type, &model_size, use_mmap);
2095 if (model_buf == nullptr) {
2096 MS_LOG(ERROR) << "Read model file failed";
2097 return RET_ERROR;
2098 }
2099 auto status = lite::PackWeightManager::GetInstance()->InitPackWeightManager(model_buf, model_size, &model_id_,
2100 &runner_id_, config_info_);
2101 if (status != RET_OK) {
2102 MS_LOG(ERROR) << "InitPackWeightByBuf failed.";
2103 return RET_ERROR;
2104 }
2105 auto new_model_buf =
2106 lite::PackWeightManager::GetInstance()->GetSharedModelBuf(model_buf, model_id_, config_info_, &is_shared_weight_);
2107 if (new_model_buf == nullptr) {
2108 MS_LOG(ERROR) << "get shared model buf is nullptr.";
2109 return RET_ERROR;
2110 }
2111 if (is_shared_weight_) {
2112 if (use_mmap) {
2113 lite::UnmapMmapBuffer(const_cast<void *>(static_cast<const void *>(model_buf)), model_size);
2114 } else {
2115 delete[] model_buf;
2116 }
2117 model_buf = nullptr;
2118 }
2119 auto *model = lite::ImportFromBuffer(new_model_buf, model_size, true, model_type, model_path);
2120 if (model == nullptr) {
2121 MS_LOG(ERROR) << "Import model failed";
2122 return RET_ERROR;
2123 }
2124 if (use_mmap && new_model_buf == model_buf) {
2125 reinterpret_cast<lite::LiteModel *>(model)->model_buf_by_mmap_ = true;
2126 }
2127 (reinterpret_cast<lite::LiteModel *>(model))->set_keep_model_buf(true);
2128 auto ret = CompileGraph(model);
2129 if (ret != lite::RET_OK) {
2130 MS_LOG(ERROR) << "Compile model failed";
2131 model->buf = nullptr;
2132 delete model;
2133 return RET_ERROR;
2134 }
2135 set_model(model);
2136 return RET_OK;
2137 }
2138
2139 bool lite::LiteSession::IsMmapEnable() const {
2140 #if !defined(_WIN32) && !defined(_WIN64) && !defined(MS_COMPILE_IOS)
2141 if (delegate_device_type_ == static_cast<int>(DT_NPU)) {
2142 return false;
2143 }
2144 return true;
2145 #else
2146 return false;
2147 #endif
2148 }
2149 } // namespace mindspore
2150