• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/litert/lite_session.h"
18 #include <set>
19 #include <vector>
20 #include <utility>
21 #include <fstream>
22 #include <algorithm>
23 #include "src/litert/pack_weight_manager.h"
24 #include "src/litert/runtime_pass.h"
25 #include "include/errorcode.h"
26 #include "src/common/log_adapter.h"
27 #include "src/litert/scheduler.h"
28 #include "src/litert/inner_allocator.h"
29 #include "src/litert/executor.h"
30 #include "src/common/context_util.h"
31 #include "src/common/utils.h"
32 #include "src/common/graph_util.h"
33 #include "src/common/tensor_util.h"
34 #include "src/common/file_utils.h"
35 #include "src/common/mmap_utils.h"
36 #include "src/litert/lite_model.h"
37 #include "src/litert/weight_decoder.h"
38 #include "src/litert/runtime_allocator.h"
39 #include "src/litert/kernel_exec_util.h"
40 #include "src/litert/cpu_info.h"
41 #ifndef CUSTOM_KERNEL_REGISTRY_CLIP
42 #include "src/registry/register_kernel_impl.h"
43 #endif
44 #ifdef ENABLE_MINDRT
45 #include "src/litert/mindrt_executor.h"
46 #endif
47 #ifdef SUPPORT_NPU
48 #include "src/litert/delegate/npu/npu_delegate.h"
49 #endif
50 #ifdef GPU_OPENCL
51 #include "src/litert/kernel/opencl/opencl_subgraph.h"
52 #endif
53 #ifdef GPU_TENSORRT
54 #include "src/litert/delegate/tensorrt/tensorrt_delegate.h"
55 #endif
56 #ifdef SUPPORT_NNAPI
57 #include "src/litert/delegate/nnapi/nnapi_delegate.h"
58 #endif
59 #ifdef ENABLE_COREML
60 #include "src/litert/delegate/coreml/coreml_delegate.h"
61 #endif
62 #include "src/litert/runtime_convert.h"
63 #include "extendrt/mindir_loader/model_loader.h"
64 #ifndef __ANDROID__
65 #include "kernel/ascend/plugin/ascend_kernel_plugin.h"
66 #endif
67 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
68 #include "thread/parallel_thread_pool_manager.h"
69 #endif
70 #include "src/litert/runtime_packed_node_pass.h"
71 #ifdef SUPPORT_NNRT
72 #include "src/litert/delegate/nnrt/nnrt_delegate.h"
73 #include "src/litert/delegate/nnrt/nnrt_allocator.h"
74 #endif
75 
76 using AbstractBaseModel = mindspore::infer::AbstractBaseModel;
77 
78 namespace mindspore {
79 #ifdef USE_GLOG
80 extern "C" {
81 extern void mindspore_log_init();
82 }
83 #endif
84 namespace lite {
85 namespace {
ExistCustomCpuKernel()86 bool ExistCustomCpuKernel() {
87 #ifndef CUSTOM_KERNEL_REGISTRY_CLIP
88   const std::string kArchCPU = "CPU";
89   auto custom_kernel_creators = registry::RegistryKernelImpl::GetInstance()->GetCustomKernelCreators();
90   for (const auto &custom_kernel_creator : custom_kernel_creators) {  // <provider, <arch, <type, CreateKernel*>>>
91     if (custom_kernel_creator.second.empty()) {
92       continue;
93     }
94     if (std::any_of(
95           custom_kernel_creator.second.begin(), custom_kernel_creator.second.end(),
96           [kArchCPU](const std::pair<std::string, std::unordered_map<std::string, registry::CreateKernel *>> &pair) {
97             return pair.first == kArchCPU && !pair.second.empty();
98           })) {
99       return true;
100     }
101   }
102 #endif
103   return false;
104 }
105 }  // namespace
106 
LiteSession()107 LiteSession::LiteSession() {
108 #ifdef USE_GLOG
109   mindspore::mindspore_log_init();
110 #endif
111   this->is_running_.store(false);
112 }
113 
CheckTensorValid(lite::Tensor * dst_tensor)114 int LiteSession::CheckTensorValid(lite::Tensor *dst_tensor) {
115   MS_ASSERT(dst_tensor != nullptr);
116   if (dst_tensor->data_type() == kObjectTypeTensorType) {
117     return RET_OK;
118   }
119   if (dst_tensor->IsGraphInput() || dst_tensor->IsGraphOutput()) {
120     return RET_OK;
121   }
122   if (dst_tensor->IsConst() == false && dst_tensor->data() != nullptr) {
123     return RET_ERROR;
124   }
125   return RET_OK;
126 }
127 
ConvertTensorsQuantParam(const schema::Tensor * src_tensor,lite::Tensor * dst_tensor)128 void LiteSession::ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lite::Tensor *dst_tensor) {
129   MS_ASSERT(src_tensor != nullptr);
130   MS_ASSERT(dst_tensor != nullptr);
131   auto quant_params = src_tensor->quantParams();
132   if (quant_params != nullptr) {
133     for (size_t j = 0; j < quant_params->size(); j++) {
134       auto quant_param = quant_params->Get(j);
135       LiteQuantParam quant_arg{};
136       if (quant_param == nullptr) {
137         quant_arg.inited = false;
138       } else {
139         quant_arg.inited = true;
140         quant_arg.bitNum = quant_param->numBits();
141         quant_arg.scale = quant_param->scale();
142         quant_arg.zeroPoint = quant_param->zeroPoint();
143         quant_arg.var_corr = quant_param->varCorr();
144         quant_arg.mean_corr = quant_param->meanCorr();
145         quant_arg.roundType = quant_param->roundType();
146         quant_arg.multiplier = quant_param->multiplier();
147         quant_arg.dstDtype = quant_param->dstDtype();
148         quant_arg.min = quant_param->min();
149         quant_arg.max = quant_param->max();
150       }
151       dst_tensor->AddQuantParam(quant_arg);
152     }
153   }
154   auto quant_clusters = src_tensor->quantClusters();
155   if (quant_clusters != nullptr) {
156     std::vector<float> clusters;
157     for (size_t j = 0; j < quant_clusters->size(); j++) {
158       clusters.push_back(quant_clusters->Get(j));
159     }
160     dst_tensor->set_quant_clusters(clusters);
161   }
162 }
163 
ConvertTensorsData(const lite::LiteModel * model,size_t tensor_index,lite::Tensor * dst_tensor)164 int LiteSession::ConvertTensorsData(const lite::LiteModel *model, size_t tensor_index, lite::Tensor *dst_tensor) {
165   MS_ASSERT(model != nullptr);
166   MS_ASSERT(dst_tensor != nullptr);
167   auto src_tensor = model->GetSchemaTensor(tensor_index);
168   if (src_tensor == nullptr || src_tensor->handler() == nullptr || src_tensor->data() == nullptr ||
169       src_tensor->length() == 0) {
170     MS_LOG(DEBUG) << "No valid data converted.";
171     return RET_OK;
172   }
173 
174   /* tensor list convert */
175   if (dst_tensor->data_type() == kObjectTypeTensorType) {
176     const int *src_data = reinterpret_cast<const int *>(src_tensor->data());
177     return DecodeTensorLsit(dst_tensor, src_data, src_tensor->length());
178   }
179 
180   /* normal tensor check */
181   auto shape_info = dst_tensor->shape();
182   if (shape_info.end() !=
183       std::find_if(shape_info.begin(), shape_info.end(), [](const int shape) { return shape <= 0; })) {
184     MS_LOG(ERROR) << "Invalid shape size, tensor name: " << src_tensor->handler()->name();
185     return RET_ERROR;
186   }
187 
188   int compress_type = src_tensor->handler()->weightQuantCompressType();
189   int ret = RET_NO_CHANGE;
190   if (compress_type != kFSEInfer) {
191     ret = WeightDecoder::DecompressTensor(*src_tensor, dst_tensor);
192   }
193   if (ret == RET_NO_CHANGE) {
194     if (dst_tensor->Size() == 0 || src_tensor->length() < dst_tensor->Size()) {
195       MS_LOG(ERROR) << "Tensor data shape invalid";
196       return RET_ERROR;
197     }
198     auto data_pair = src_tensor->ReleaseData();
199     dst_tensor->set_data(data_pair.second);
200     dst_tensor->set_own_data(data_pair.first);
201   } else if (ret != RET_OK) {
202     MS_LOG(ERROR) << "Decompress tensor data failed: " << ret;
203     return ret;
204   }
205   return RET_OK;
206 }
207 
ConvertTensor(const schema::Tensor & src_tensor)208 lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) {
209   int32_t data_type = src_tensor.dataType();
210   if (data_type <= kTypeUnknown || data_type >= kMonadTypeEnd) {
211     MS_LOG(ERROR) << "invalid data type. " << data_type;
212     return nullptr;
213   }
214   auto src_category = TensorCategory(src_tensor);
215   std::vector<int> shape;
216   if (src_tensor.dims() == nullptr) {
217     MS_LOG(DEBUG) << "Dims of src_tensor is nullptr";
218   }
219   if (src_tensor.dims() != nullptr) {
220     if (src_tensor.dataType() == kObjectTypeString && src_tensor.data() != nullptr) {
221       shape.push_back(src_tensor.data()->size());
222     } else {
223       for (size_t j = 0; j < src_tensor.dims()->size(); j++) {
224         shape.push_back(src_tensor.dims()->data()[j]);
225       }
226     }
227     if (std::any_of(shape.begin(), shape.end(), [](const int &element) { return element < 0 && element != -1; })) {
228       MS_LOG(ERROR) << "Dims of src_tensor is unsupported";
229       return nullptr;
230     }
231   }
232   lite::Tensor *dst_tensor = nullptr;
233   if (TypeId(data_type) == kObjectTypeTensorType) {
234     MS_CHECK_TRUE_RET(src_tensor.data() != nullptr, nullptr);
235     MS_CHECK_TRUE_RET(src_tensor.data()->size() > 0, nullptr);
236     auto src_data = src_tensor.data()->data();
237     dst_tensor = CreateTensorList(shape, src_category, src_data);
238   } else {
239     dst_tensor = new (std::nothrow)
240       Tensor(TypeId(data_type), shape, static_cast<mindspore::Format>(src_tensor.format()), src_category);
241   }
242   if (dst_tensor == nullptr) {
243     MS_LOG(ERROR) << "create dst_tensor is nullptr.";
244     return nullptr;
245   }
246   if (src_tensor.name() != nullptr) {
247     dst_tensor->set_tensor_name(src_tensor.name()->str());
248   }
249   auto compress_type = static_cast<CompressType>(src_tensor.weightQuantCompressType());
250   if (compress_type == kFSEInfer) {
251     dst_tensor->set_compress_type(static_cast<CompressType>(compress_type));
252     dst_tensor->set_compressed_size(src_tensor.data()->size());
253   }
254   return dst_tensor;
255 }
256 
ConvertTensors(const lite::Model * model)257 int LiteSession::ConvertTensors(const lite::Model *model) {
258   MS_ASSERT(model != nullptr);
259   auto lite_model = reinterpret_cast<const lite::LiteModel *>(model);
260   uint32_t tensor_count = model->graph_.all_tensors_.size();
261   auto model_input_indices = model->graph_.input_indices_;
262   auto model_output_indices = model->graph_.output_indices_;
263 
264   for (uint32_t i = 0; i < tensor_count; ++i) {
265     auto *src_tensor = model->graph_.all_tensors_[i];
266     if (src_tensor == nullptr) {
267       MS_LOG(ERROR) << i << "th tensor in model is nullptr";
268       return RET_NULL_PTR;
269     }
270     auto *dst_tensor = ConvertTensor(*src_tensor);
271     if (dst_tensor == nullptr) {
272       MS_LOG(ERROR) << "Convert new " << i << "th tensor failed!";
273       return RET_NULL_PTR;
274     }
275     auto ret = ConvertTensorsData(lite_model, i, dst_tensor);
276     if (ret != RET_OK) {
277       MS_LOG(ERROR) << "Convert data of " << i << "th tensor failed";
278       delete dst_tensor;
279       return ret;
280     }
281     ConvertTensorsQuantParam(src_tensor, dst_tensor);
282     if (IsContain(model_input_indices, i)) {
283       dst_tensor->set_category(Category::GRAPH_INPUT);
284     }
285     if (IsContain(model_output_indices, i)) {
286       // a tensor is as both input and output, would be treated as an input.
287       if (!dst_tensor->IsGraphInput()) {
288         dst_tensor->set_category(Category::GRAPH_OUTPUT);
289       }
290     }
291 
292     ret = CheckTensorValid(dst_tensor);
293     if (ret != RET_OK) {
294       MS_LOG(ERROR) << "Check " << i << "th tensor failed";
295       delete dst_tensor;
296       return ret;
297     }
298 
299     this->tensors_.emplace_back(dst_tensor);
300   }
301   return RET_OK;
302 }
303 
InitGraphInputTensors(const lite::Model * model)304 void LiteSession::InitGraphInputTensors(const lite::Model *model) {
305   MS_ASSERT(model != nullptr);
306   auto graph_in_size = model->graph_.input_indices_.size();
307   for (size_t i = 0; i < graph_in_size; ++i) {
308     auto in_tensor_idx = model->graph_.input_indices_[i];
309     MS_ASSERT(in_tensor_idx < this->tensors_.size());
310     auto *in_tensor = this->tensors_.at(in_tensor_idx);
311     MS_ASSERT(in_tensor != nullptr);
312     this->inputs_.emplace_back(in_tensor);
313   }
314 }
315 
InitGraphInputMSTensors()316 void LiteSession::InitGraphInputMSTensors() {
317   MS_ASSERT(this->input_vec_.empty());
318   for (auto &input_tensor : this->inputs_) {
319     MS_ASSERT(input_tensor != nullptr);
320     this->input_vec_.emplace_back(input_tensor);
321   }
322 }
323 
InitGraphOutputTensors(const lite::Model * model)324 void LiteSession::InitGraphOutputTensors(const lite::Model *model) {
325   MS_ASSERT(model != nullptr);
326   MS_ASSERT(this->outputs_.empty());
327   auto graph_out_size = model->graph_.output_indices_.size();
328   for (size_t i = 0; i < graph_out_size; ++i) {
329     auto out_tensor_idx = model->graph_.output_indices_[i];
330     MS_ASSERT(out_tensor_idx < this->tensors_.size());
331     auto *out_tensor = this->tensors_.at(out_tensor_idx);
332     MS_ASSERT(out_tensor != nullptr);
333     this->outputs_.emplace_back(out_tensor);
334   }
335 }
336 
InitGraphInputMap(const lite::Model * model)337 void LiteSession::InitGraphInputMap(const lite::Model *model) {
338   MS_ASSERT(model != nullptr);
339   MS_ASSERT(this->input_map_.empty());
340   MS_ASSERT(this->input_shape_map_.empty());
341   auto graph_input_node_indexes = GetGraphInputNodes(model);
342   auto graph_in_size = model->graph_.input_indices_.size();
343   for (auto in_node_index : graph_input_node_indexes) {
344     auto in_node = model->graph_.all_nodes_[in_node_index];
345     MS_ASSERT(in_node != nullptr);
346     auto in_size = in_node->input_indices_.size();
347     for (size_t i = 0; i < in_size; ++i) {
348       if (this->input_map_.find(in_node->name_ + std::to_string(i)) != this->input_map_.end()) {
349         MS_LOG(ERROR) << "cant find input " << in_node->name_ + std::to_string(i) << "at input_map_";
350         return;
351       }
352       auto in_tensor_index = size_t(in_node->input_indices_[i]);
353       bool is_graph_input = false;
354       for (size_t j = 0; j < graph_in_size; ++j) {
355         if (in_tensor_index == model->graph_.input_indices_[j]) {
356           is_graph_input = true;
357           break;
358         }
359       }
360       if (!is_graph_input) {
361         continue;
362       }
363       MS_ASSERT(in_tensor_index < this->tensors_.size());
364       auto *in_tensor = this->tensors_.at(in_tensor_index);
365       if (in_tensor == nullptr) {
366         MS_LOG(ERROR) << "in_tensor is null!";
367         return;
368       }
369       auto tensor_name = in_node->name_ + std::to_string(i);
370       this->input_map_[tensor_name] = in_tensor;
371       this->input_shape_map_[in_tensor] = in_tensor->shape();
372       if (!in_tensor->tensor_name().empty()) {
373         this->input_map_[in_tensor->tensor_name()] = in_tensor;
374       }
375     }
376   }
377 
378   for (auto input_tensor : this->inputs_) {
379     MS_ASSERT(input_tensor != nullptr);
380     if (this->input_map_.find(input_tensor->tensor_name()) == this->input_map_.end()) {
381       this->input_map_[input_tensor->tensor_name()] = input_tensor;
382     }
383     if (this->input_shape_map_.find(input_tensor) == this->input_shape_map_.end()) {
384       this->input_shape_map_[input_tensor] = input_tensor->shape();
385     }
386   }
387 }
388 
InitGraphOutputNodeMap(const lite::Model * model)389 void LiteSession::InitGraphOutputNodeMap(const lite::Model *model) {
390   MS_ASSERT(model != nullptr);
391   auto graph_output_node_indexes = GetGraphOutputNodes(model);
392   auto graph_out_size = model->graph_.output_indices_.size();
393   for (auto out_node_index : graph_output_node_indexes) {
394     auto out_node = model->graph_.all_nodes_[out_node_index];
395     MS_ASSERT(out_node != nullptr);
396     auto out_size = out_node->output_indices_.size();
397     for (size_t i = 0; i < out_size; ++i) {
398       auto out_tensor_index = out_node->output_indices_[i];
399       bool is_graph_output = false;
400       for (size_t j = 0; j < graph_out_size; ++j) {
401         if (out_tensor_index == model->graph_.output_indices_[j]) {
402           is_graph_output = true;
403           break;
404         }
405       }
406       if (!is_graph_output) {
407         continue;
408       }
409       MS_ASSERT(out_tensor_index < this->tensors_.size());
410       auto *out_tensor = this->tensors_.at(out_tensor_index);
411       if (out_tensor == nullptr) {
412         MS_LOG(ERROR) << "out_tensor is null!";
413         return;
414       }
415       this->output_node_map_[out_node->name_].emplace_back(out_tensor);
416     }
417   }
418 }
419 
InitGraphOutputTensorMap(const lite::Model * model)420 void LiteSession::InitGraphOutputTensorMap(const lite::Model *model) {
421   MS_ASSERT(model != nullptr);
422   MS_ASSERT(this->output_tensor_map_.empty());
423   auto graph_out_size = model->graph_.output_indices_.size();
424   for (size_t i = 0; i < graph_out_size; ++i) {
425     size_t graph_out_index = model->graph_.output_indices_[i];
426     MS_ASSERT(graph_out_index < this->tensors_.size());
427     auto *out_tensor = this->tensors_.at(graph_out_index);
428     if (out_tensor == nullptr) {
429       MS_LOG(ERROR) << "out_tensor is null!";
430       return;
431     }
432     if (!out_tensor->tensor_name().empty()) {
433       this->output_tensor_map_.insert(std::make_pair(out_tensor->tensor_name(), out_tensor));
434       this->output_tensor_names_.emplace_back(out_tensor->tensor_name());
435     } else {
436       this->output_tensor_map_.insert(std::make_pair(std::to_string(graph_out_index), out_tensor));
437       this->output_tensor_names_.emplace_back(std::to_string(graph_out_index));
438     }
439   }
440 }
441 
InitGraphInOutTensorsMap(const lite::Model * model)442 void LiteSession::InitGraphInOutTensorsMap(const lite::Model *model) {
443   InitGraphInputMSTensors();
444   InitGraphInputMap(model);
445   InitGraphOutputNodeMap(model);
446   InitGraphOutputTensorMap(model);
447 }
448 
IsolateOutputTensor()449 int LiteSession::IsolateOutputTensor() {
450   for (Tensor *src_tensor : outputs_) {
451     if (src_tensor->IsGraphInput()) {
452       continue;
453     }
454     Tensor *new_tensor = new (std::nothrow)
455       Tensor(src_tensor->data_type(), src_tensor->shape(), src_tensor->format(), Category::GRAPH_OUTPUT);
456     if (MS_UNLIKELY(new_tensor == nullptr)) {
457       MS_LOG(ERROR) << "duplicate new output failed.";
458       return RET_NULL_PTR;
459     }
460     new_tensor->set_allocator(src_tensor->allocator()); /* GPU use opencl allocator */
461     new_tensor->set_tensor_name(src_tensor->tensor_name() + "_duplicate");
462     for (LiteQuantParam quant : src_tensor->quant_params()) {
463       new_tensor->AddQuantParam(quant);
464     }
465     new_tensor->set_init_ref_count(src_tensor->init_ref_count());
466 
467     /* src tensor set for graph calculate */
468     if (src_tensor->data_type() == kNumberTypeFloat16) {
469       src_tensor->set_data_type(kNumberTypeFloat32);
470     }
471     src_tensor->set_ref_count(1);
472 
473     isolate_graph_output_map_.insert(std::make_pair(new_tensor, src_tensor));
474 
475     /* set new tensor for calculate */
476     for (auto subgraph : kernels_) {
477       /* subgraph input and output */
478       auto in_size = subgraph->in_tensors().size();
479       for (size_t i = 0; i < in_size; ++i) {
480         if (subgraph->in_tensors()[i] == src_tensor) {
481           subgraph->set_in_tensor(new_tensor, i);
482         }
483       }
484       auto out_size = subgraph->out_tensors().size();
485       for (size_t i = 0; i < out_size; ++i) {
486         if (subgraph->out_tensors()[i] == src_tensor) {
487           subgraph->set_out_tensor(new_tensor, i);
488         }
489       }
490       if (subgraph->desc().arch == kernel::kDelegate) {
491         continue;
492       }
493       /* node input and output */
494       auto nodes = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
495       auto nodes_size = nodes.size();
496       for (size_t i = 0; i < nodes_size; ++i) {
497         auto node = nodes[i];
498         out_size = node->out_tensors().size();
499         for (size_t j = 0; j < out_size; ++j) {
500           if (node->out_tensors()[j] == src_tensor) {
501             node->set_out_tensor(new_tensor, j);
502             break;
503           }
504         }
505         in_size = node->in_tensors().size();
506         for (size_t j = 0; j < in_size; ++j) {
507           if (node->in_tensors()[j] == src_tensor) {
508             node->set_in_tensor(new_tensor, j);
509           }
510         }
511       }
512     }
513   }
514 
515   UpdateLinkInfoForIsolateOutput();
516   return RET_OK;
517 }
518 
UpdateLinkInfoForIsolateOutput()519 void LiteSession::UpdateLinkInfoForIsolateOutput() {
520   for (auto &item : isolate_graph_output_map_) {
521     context_->ReplaceLinkInfoReceiverWithNewOne(item.first, item.second);
522   }
523   return;
524 }
525 
FreePackOpWeight(const std::vector<kernel::KernelExec * > & kernels)526 void LiteSession::FreePackOpWeight(const std::vector<kernel::KernelExec *> &kernels) {
527   // For reducing runtime RAM
528   // free pack-op weight because pack-op will not access origin weight in runtime
529   for (auto *kernel : kernels) {
530     MS_ASSERT(kernel != nullptr);
531     if (kernel->subgraph_type() == kernel::kNotSubGraph) {
532       if (!IsPackedOp(static_cast<int>(kernel::SchemaType(kernel->type())))) {
533         continue;
534       }
535     } else {
536       auto subgraph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
537       FreePackOpWeight(subgraph->nodes());
538     }
539     auto inputs = kernel->in_tensors();
540     for (auto *tensor : inputs) {
541       MS_ASSERT(tensor != nullptr);
542       if (!tensor->IsConst() || tensor->ref_count() >= 1) {
543         continue;
544       }
545       tensor->FreeData();
546     }
547   }
548 }
549 
MarkSharedWeight(const std::vector<kernel::KernelExec * > & kernels)550 void LiteSession::MarkSharedWeight(const std::vector<kernel::KernelExec *> &kernels) {
551   // For reducing runtime RAM
552   // free pack-op weight because pack-op will not access origin weight in runtime
553   for (auto *kernel : kernels) {
554     MS_ASSERT(kernel != nullptr);
555     if (kernel->subgraph_type() == kernel::kNotSubGraph) {
556       if (IsPackedOp(static_cast<int>(kernel::SchemaType(kernel->type())))) {
557         continue;
558       }
559     } else {
560       auto subgraph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
561       MarkSharedWeight(subgraph->nodes());
562     }
563     auto inputs = kernel->in_tensors();
564     for (auto *tensor : inputs) {
565       MS_ASSERT(tensor != nullptr);
566       if (tensor->IsConst()) {
567         tensor->IncRefCount();
568       }
569     }
570   }
571 }
572 
CompileGraph(Model * model)573 int LiteSession::CompileGraph(Model *model) {
574   auto ret = PreCheck(model);
575   if (ret != RET_OK) {
576     MS_LOG(ERROR) << "schedule check failed: " << ret;
577     is_running_.store(false);
578     return ret;
579   }
580 
581   if (model->model_type_ != ModelType_MSLite) {
582     ret = reinterpret_cast<AbstractBaseModel *>(model)->ConvertTensors(&this->tensors_);
583   } else {
584     // Convert to abstract base model interface
585     ret = ConvertTensors(model);
586     context_->set_schema_version(reinterpret_cast<LiteModel *>(model)->GetSchemaVersion());
587   }
588   if (ret != RET_OK) {
589     MS_LOG(ERROR) << "ConvertTensors failed: " << ret;
590     is_running_.store(false);
591     return ret;
592   }
593   ret = lite::PackWeightManager::GetInstance()->StoreOriginTensorData(model, &tensors_);
594   if (ret != RET_OK) {
595     MS_LOG(ERROR) << "StoreOriginTensorData failed.";
596     is_running_.store(false);
597     return RET_ERROR;
598   }
599   InitGraphInputTensors(model);
600   InitGraphOutputTensors(model);
601 
602   PackedNodePass::GetInstance().Run(model, tensors_);
603 
604   // scheduler kernels
605   Scheduler scheduler(context_.get(), ms_context_, model, &tensors_, &inputs_, &outputs_, is_train_session_,
606                       &is_infershape_, &is_control_flow_, &infer_along_running_, execution_plan_, delegate_,
607                       delegate_device_type_);
608   scheduler.SetupSchedulerCb(std::move(sched_cb_));
609   scheduler.SetConfig(config_info_);
610   ret = scheduler.Schedule(&kernels_);
611   if (ret != RET_OK) {
612     MS_LOG(ERROR) << "Schedule kernels failed: " << ret;
613     is_running_.store(false);
614     return ret;
615   }
616   if (ms_context_->GetThreadNum() == 1 && !context_->IsCpuFloat16Enabled() && is_control_flow_) {
617     context_->DeleteThreadPool();
618     (void)context_->CreateThreadPool(is_control_flow_);
619   }
620 
621   infer_along_running_ = infer_along_running_ && !is_control_flow_ && !is_train_session_ && (is_infershape_ != RET_OK);
622   InitGraphInOutTensorsMap(model);
623 
624   non_tail_call_kernels_ = scheduler.NonTailCallNodes();
625 
626   ret = PrepareKernels(model);
627   if (ret != RET_OK) {
628     MS_LOG(ERROR) << "Prepare kernels failed: " << ret;
629     is_running_.store(false);
630     return ret;
631   }
632 
633   if (is_train_session_ || is_prepare_session_) {
634     is_running_.store(false);
635     return RET_OK;
636   }
637 
638   ret = InitExecutor();
639   if (ret != RET_OK) {
640     MS_LOG(ERROR) << "InitExecutor failed: " << ret;
641     is_running_.store(false);
642     return ret;
643   }
644 
645   MarkSharedWeight(kernels_);
646   FreePackOpWeight(kernels_);
647 
648   infer_along_running_ = infer_along_running_ && (runtime_allocator_ == nullptr);
649   if (infer_along_running_) {
650     this->context_->set_infer_checker(InferCheckerAll);
651   }
652   is_running_.store(false);
653   return RET_OK;
654 }
655 
IsIsolatedSubGraph(const kernel::KernelExec * kernel)656 bool LiteSession::IsIsolatedSubGraph(const kernel::KernelExec *kernel) {
657   auto cur_in_tensors = kernel->in_tensors();
658   for (auto cur_kernel : this->kernels_) {
659     if (cur_kernel == kernel) {
660       continue;
661     }
662     auto out_tensors = cur_kernel->out_tensors();
663     for (auto tensor : cur_in_tensors) {
664       if (IsContain(out_tensors, tensor)) {
665         return false;
666       }
667     }
668   }
669   return true;
670 }
671 
SetAllocatorForDelegateKernels(const kernel::KernelExec * kernel)672 int LiteSession::SetAllocatorForDelegateKernels(const kernel::KernelExec *kernel) {
673   if (kernel == nullptr) {
674     return RET_NULL_PTR;
675   }
676   for (auto input : kernel->in_tensors()) {
677     CHECK_NULL_RETURN(input);
678     input->set_allocator(this->context_->allocator);
679   }
680   for (auto output : kernel->out_tensors()) {
681     CHECK_NULL_RETURN(output);
682     output->set_allocator(this->context_->allocator);
683   }
684   return RET_OK;
685 }
686 
CreateNNRTDelegate()687 int LiteSession::CreateNNRTDelegate() {
688 #if SUPPORT_NNRT
689   auto iter = std::find_if(context_->device_list_.begin(), context_->device_list_.end(),
690                            [](DeviceContext &device) { return device.device_type_ == lite::DT_NNRT; });
691   if(iter == context_->device_list_.end()) {
692     MS_LOG(ERROR) << "Found non NNRT device info";
693     return RET_ERROR;
694   }
695 
696   delegate_ = std::make_shared<NNRTDelegate>(iter->device_info_.nnrt_device_info_);
697   if (delegate_ == nullptr) {
698     MS_LOG(ERROR) << "New NNRT delegate failed";
699     return RET_ERROR;
700   }
701   delegate_device_type_ = DT_NNRT;
702   this->context_->delegate = delegate_;
703 #endif
704   return RET_OK;
705 };
706 
DrawGraph(kernel::SubGraphKernel * graph)707 int LiteSession::DrawGraph(kernel::SubGraphKernel *graph) {
708   if (graph == nullptr) {
709     return RET_NULL_PTR;
710   }
711   // create and open .dot file
712   std::ofstream dotfile;
713   dotfile.open("./graph.dot", std::ios::out | std::ios::trunc);
714   if (!dotfile.is_open()) {
715     MS_LOG(ERROR) << "create or open dotfile failed.";
716     return RET_ERROR;
717   }
718   // write data to .dot file
719   dotfile << "digraph " << graph->name() << " {\n";
720   for (auto node : graph->nodes()) {
721     std::replace(node->name().begin(), node->name().end(), '/', '-');
722     // first node
723     if (node->in_kernels().empty()) {
724       dotfile << "\tinput->" << node->name();
725       dotfile << "[label=\"";
726       std::vector<int> input_shapes = node->in_tensors().front()->shape();
727       for (auto iter = input_shapes.begin(); iter != input_shapes.end(); iter++) {
728         if (iter == input_shapes.end() - 1) {
729           dotfile << *iter;
730         } else {
731           dotfile << *iter << "*";
732         }
733       }
734       dotfile << "\"]\n";
735       continue;
736     }
737 
738     for (size_t i = 0; i < node->in_kernels().size(); ++i) {
739       dotfile << "\t" << node->in_kernels()[i]->name() << "->" << node->name() << "[label=\"";
740       std::vector<int32_t> in_kernel_shapes = node->in_tensors()[i]->shape();
741 
742       for (auto iter = in_kernel_shapes.begin(); iter != in_kernel_shapes.end(); iter++) {
743         if (iter == in_kernel_shapes.end() - 1) {
744           dotfile << *iter;
745         } else {
746           dotfile << *iter << "*";
747         }
748       }
749       dotfile << "\"]\n";
750     }
751     // last node
752     if (node->out_kernels().empty()) {
753       dotfile << "\t" << node->name() << "->output";
754       dotfile << "[label=\"";
755       std::vector<int32_t> out_shapes = node->out_tensors().front()->shape();
756       for (auto iter = out_shapes.begin(); iter != out_shapes.end(); iter++) {
757         if (iter == out_shapes.end() - 1) {
758           dotfile << *iter;
759         } else {
760           dotfile << *iter << "*";
761         }
762       }
763       dotfile << "\"]\n";
764     }
765   }
766   dotfile.close();
767   return RET_OK;
768 }
769 
SetInitRefCountOfPartialSubgraphInputs(const Model * model)770 void LiteSession::SetInitRefCountOfPartialSubgraphInputs(const Model *model) {
771   if (model == nullptr) {
772     return;
773   }
774   constexpr size_t kFirstPartialSubgraphIndex = 1U;
775   const auto &sub_graphs = model->graph_.sub_graphs_;
776   // Find out partial subgraph's inputs and set their 'init_ref_count' to INT_MAX to avoid trigger 'FreeData()'.
777   // Here start with index:1 to skip main subgraph.
778   for (size_t i = kFirstPartialSubgraphIndex; i < sub_graphs.size(); i++) {
779     for (auto index : sub_graphs[i]->input_indices_) {
780       tensors_[index]->set_init_ref_count(INT_MAX);
781     }
782   }
783 }
784 
PrepareKernels(const Model * model)785 int LiteSession::PrepareKernels(const Model *model) {
786   // find kernel's in_kernels and out_kernels in every subgraph
787   kernel::KernelExecUtil::FindAllInoutKernelsInSubgraphKernel(this->kernels_);
788   // find in_kernels and out_kernels between subgraph kernels
789   kernel::KernelExecUtil::FindAllInoutKernels(this->kernels_);
790 
791   // init init_ref_count for subgraphs and kernels
792   auto ret = SetTensorInitRefCount();
793   if (ret != RET_OK) {
794     MS_LOG(ERROR) << "SetTensorInitRefCount failed.";
795     return ret;
796   }
797   // When running control flow model, if partial subgraph's input is also it's output,
798   // 'init_ref_count' is not correctly initialized in 'SetTensorInitRefCount()', which would cause an error
799   // of referencing on input tensor's data_ptr after it's reset to NULL when ref_count down to 0.
800   // Here we set partial input tensor's 'init_ref_count' to INT_MAX to avoid null-filling in above case.
801   SetInitRefCountOfPartialSubgraphInputs(model);
802 
803   for (auto kernel : this->kernels_) {
804     if (kernel->desc().arch == kernel::kDelegate) {
805       ret = SetAllocatorForDelegateKernels(kernel);
806       if (ret != RET_OK) {
807         MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret;
808         return ret;
809       }
810     }
811 
812     if (!is_train_session_ && kernel->desc().arch != kernel::kDelegate && kernel->desc().arch != kernel::kGPU) {
813       auto subgraph_kernel = static_cast<kernel::SubGraphKernel *>(kernel);
814       if (subgraph_kernel == nullptr) {
815         MS_LOG(ERROR) << "kernel: " << kernel->name() << " not is subgraph kernel.";
816         return RET_ERROR;
817       }
818       for (auto &node : subgraph_kernel->nodes()) {
819         ret = PackKernelExec(node, tensors_);
820         if (ret != RET_OK) {
821           MS_LOG(ERROR) << "Pack KernelExec failed.";
822           return ret;
823         }
824         ret = node->Prepare();
825         if (ret != RET_OK) {
826           MS_LOG(ERROR) << "node: " << node->name() << " prepare failed.";
827           return ret;
828         }
829       }
830     }
831 
832 #if (defined DEBUG) && (defined MSLITE_EXPORT_COMPUTE_IR)
833     auto subgraph_kernel = static_cast<kernel::SubGraphKernel *>(kernel);
834     ret = DrawGraph(subgraph_kernel);
835     if (ret != RET_OK) {
836       MS_LOG(ERROR) << "graph: " << kernel->name() << " draw failed.";
837     }
838 #endif
839 
840     ret = kernel->Prepare();
841     if (ret != RET_OK) {
842       MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret;
843       return ret;
844     }
845   }
846   return RET_OK;
847 }
848 
SetTensorInitRefCount()849 int LiteSession::SetTensorInitRefCount() {
850   for (auto *kernel : this->kernels_) {
851     kernel->InitOutTensorInitRefCount();
852     if (kernel->desc().arch == kernel::kDelegate) {
853       continue;
854     }
855     if (IsIsolatedSubGraph(kernel)) {
856       static_cast<kernel::SubGraphKernel *>(kernel)->InitInputTensorInitRefCount();
857     }
858   }
859 
860   if (!non_tail_call_kernels_.empty()) {
861     return SetNonTaiCallSubgraphOutputInitRefCount();
862   }
863   return RET_OK;
864 }
865 
SetNonTaiCallSubgraphOutputInitRefCount()866 int LiteSession::SetNonTaiCallSubgraphOutputInitRefCount() {
867   for (auto call_kernel : non_tail_call_kernels_) {
868     auto call_output = call_kernel->out_tensors();
869     auto all_out_subgraphs = kernel::KernelExecUtil::GetCallInputPartialsCorrespondingOutputSubgraph(call_kernel);
870     for (auto subgraph : all_out_subgraphs) {
871       MS_CHECK_TRUE_MSG(subgraph->out_tensors().size() == call_output.size(), RET_ERROR,
872                         "non tail call output size is not same as subgraph output.");
873       std::set<Tensor *> subgraph_outputs_set{};
874       for (size_t i = 0; i < subgraph->out_tensors().size(); ++i) {
875         auto output = subgraph->out_tensors()[i];
876         if (subgraph_outputs_set.find(output) == subgraph_outputs_set.end()) {
877           output->set_init_ref_count(1);
878           (void)subgraph_outputs_set.insert(output);
879         } else {
880           output->set_init_ref_count(output->init_ref_count() + 1);
881         }
882       }
883     }
884   }
885   return RET_OK;
886 }
887 
GetInputs() const888 std::vector<mindspore::lite::Tensor *> LiteSession::GetInputs() const { return this->input_vec_; }
889 
RunGraph(const KernelCallBack & before,const KernelCallBack & after)890 int LiteSession::RunGraph(const KernelCallBack &before, const KernelCallBack &after) {
891   bool expected = false;
892   if (!is_running_.compare_exchange_strong(expected, true)) {
893     MS_LOG(ERROR) << "Not support multi-threading";
894     return RET_ERROR;
895   }
896 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
897   ParallelThreadPoolManager::GetInstance()->ActivatePool(runner_id_, worker_id_);
898 #endif
899   STATUS ret = CheckTensorsInvalid(inputs_);
900   if (MS_UNLIKELY(ret != RET_OK)) {
901     is_running_.store(false);
902     MS_LOG(ERROR) << "CheckInputs failed.";
903     return ret;
904   }
905   ret = CheckGraphInputShapes(inputs_, input_shape_map_);
906   if (MS_UNLIKELY(ret != RET_OK)) {
907     is_running_.store(false);
908     MS_LOG(ERROR) << "Check graph input shapes failed.";
909     return ret;
910   }
911   MS_ASSERT(this->context_ != nullptr);
912   ret = executor_->Run(this->inputs_, this->outputs_, this->kernels_, before, after);
913   if (MS_UNLIKELY(ret != RET_OK)) {
914     MS_LOG(ERROR) << "RunGraph failed : " << ret;
915   }
916   if (infer_along_running_) {
917     this->context_->set_infer_checker(InferCheckerInput);
918     for (auto input : inputs_) {
919       input->set_shape_changed(false);
920     }
921   }
922 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
923   ParallelThreadPoolManager::GetInstance()->SetFreePool(runner_id_, worker_id_);
924 #endif
925   is_running_.store(false);
926   return ret;
927 }
928 
InitSharedThreadPool()929 int LiteSession::InitSharedThreadPool() {
930   int workers_num = -1;
931   int remaining_thread_num = -1;
932   int thread_num_limit = -1;
933   bool enable_shared_pool = false;
934   if (config_info_ != nullptr) {
935     auto runner_info_item = config_info_->find(kInnerModelParallelRunnerSection);
936     if (runner_info_item != config_info_->end()) {
937       auto item_runner = runner_info_item->second.find(kInnerRunnerIDKey);
938       if (item_runner != runner_info_item->second.end()) {
939         runner_id_ = runner_info_item->second.at(kInnerRunnerIDKey);
940       }
941       auto shared_pool_item = runner_info_item->second.find(kEnableSharedThreadPoolKey);
942       if (shared_pool_item != runner_info_item->second.end() &&
943           runner_info_item->second.at(kEnableSharedThreadPoolKey) == "true") {
944         workers_num = std::atoi(runner_info_item->second.at(kInnerWorkerNumKey).c_str());
945         remaining_thread_num = std::atoi(runner_info_item->second.at(kThreadNumRemainingPerWorkerKey).c_str());
946         thread_num_limit = std::atoi(runner_info_item->second.at(kThreadNumLimitPerWorkerKey).c_str());
947         worker_id_ = std::atoi(runner_info_item->second.at(kInnerModelIDKey).c_str());
948         enable_shared_pool = true;
949       }
950     }
951   }
952   MS_LOG(INFO) << "runner id: " << runner_id_ << "  enable_shared_pool: " << enable_shared_pool
953                << "  workers_num: " << workers_num << "  thread_num_limit: " << thread_num_limit
954                << "  remaining_thread_num: " << remaining_thread_num;
955 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
956   ParallelThreadPoolManager::GetInstance()->Init(enable_shared_pool, runner_id_, workers_num, remaining_thread_num,
957                                                  thread_num_limit);
958 #endif
959   return RET_OK;
960 }
961 
InitContext(const std::shared_ptr<InnerContext> & context)962 int LiteSession::InitContext(const std::shared_ptr<InnerContext> &context) {
963   if (context == nullptr) {
964     MS_LOG(ERROR) << "context is nullptr";
965     return RET_NULL_PTR;
966   }
967   this->context_ = context;
968   context_->SetBindRunnerId(runner_id_);
969   auto ret = this->context_->Init();
970   if (ret != RET_OK) {
971     MS_LOG(ERROR) << "Init Context failed";
972     return ret;
973   }
974 
975   ms_context_ = MSContextFromContext(context);
976   if (ms_context_ == nullptr) {
977     MS_LOG(ERROR) << "transfer context to ms context failed.";
978     return RET_NULL_PTR;
979   }
980 
981 #ifdef MS_COMPILE_IOS
982   context_->thread_pool_->SetMaxSpinCount(kDefaulLiteIosSpinCount);
983   context_->thread_pool_->SetMinSpinCount(kDefaulLiteIosSpinCount);
984 #endif
985 
986 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
987   if (context_->inter_op_parallel_num_ > 1 && !runner_id_.empty() &&
988       ParallelThreadPoolManager::GetInstance()->GetEnableSharedThreadPool(runner_id_)) {
989     MS_LOG(INFO) << "Enable subgraph parallelism and enable thread pool sharing";
990     ParallelThreadPoolManager::GetInstance()->BindPoolToRunner(context_->thread_pool_, config_info_);
991   }
992 #endif
993 
994   return RET_OK;
995 }
996 
InitAscend(const std::shared_ptr<InnerContext> & context)997 int LiteSession::InitAscend(const std::shared_ptr<InnerContext> &context) {
998 #if !defined(__ANDROID__) && !defined(MS_COMPILE_OHOS)
999   if (!context->IsDeviceTypeEnabled(DT_ASCEND)) {
1000     MS_LOG(INFO) << "There is no Ascend device type.";
1001     return RET_OK;
1002   }
1003   return mindspore::AscendKernelPlugin::GetInstance().Register();
1004 #else
1005   return RET_OK;
1006 #endif
1007 }
1008 
CreateTensorRTDelegate()1009 int LiteSession::CreateTensorRTDelegate() {
1010 #ifdef GPU_TENSORRT
1011   std::string cache_model_path;
1012   std::string serialize_path;
1013   size_t vocab_size = 0;
1014   size_t device_cache_size = 0;
1015   std::map<std::string, std::string> input_ranges;
1016   if (config_info_ != nullptr) {
1017     auto input_ranges_iter = config_info_->find(kGPUContextSection);
1018     if (input_ranges_iter != config_info_->end()) {
1019       input_ranges = input_ranges_iter->second;
1020     }
1021     auto ms_cache_iter = config_info_->find(kMSCacheSection);
1022     if (ms_cache_iter != config_info_->end()) {
1023       auto ms_cache = ms_cache_iter->second;
1024       auto model_path_iter = ms_cache.find(kMSCacheModelPathKey);
1025       if (model_path_iter != ms_cache.end()) {
1026         cache_model_path = model_path_iter->second;
1027       }
1028 
1029       auto vocab_size_iter = ms_cache.find(kMSCacheVocabSizeKey);
1030       if (vocab_size_iter != ms_cache.end()) {
1031         auto vocab_size_opt = GenericParseValue<size_t>(vocab_size_iter->second);
1032         if (!vocab_size_opt.IsNone()) {
1033           vocab_size = vocab_size_opt.Get();
1034         }
1035       }
1036 
1037       auto device_cache_size_iter = ms_cache.find(kMSCacheDeviceSizeKey);
1038       if (device_cache_size_iter != ms_cache.end()) {
1039         auto device_cache_size_opt = GenericParseValue<size_t>(device_cache_size_iter->second);
1040         if (!device_cache_size_opt.IsNone()) {
1041           device_cache_size = device_cache_size_opt.Get();
1042         }
1043       }
1044 
1045       auto serialize_path_iter = ms_cache.find(kMSCacheSerializePathKey);
1046       if (serialize_path_iter != ms_cache.end()) {
1047         serialize_path = serialize_path_iter->second;
1048       }
1049     }
1050   }
1051 
1052   delegate_ = std::make_shared<TensorRTDelegate>(ms_context_, cache_model_path, vocab_size, device_cache_size,
1053                                                  serialize_path, input_ranges);
1054   if (delegate_ == nullptr) {
1055     MS_LOG(ERROR) << "New tensorrt delegate_ failed";
1056     return RET_ERROR;
1057   }
1058   delegate_device_type_ = DT_GPU;
1059   this->context_->delegate = delegate_;
1060 #endif
1061   return RET_OK;
1062 }
1063 
CreateNPUDelegate()1064 int LiteSession::CreateNPUDelegate() {
1065 #ifdef SUPPORT_NPU
1066   std::string model_cache_dir;
1067   if (config_info_ != nullptr) {
1068     auto common_context_iter = config_info_->find(kCommonContextSection);
1069     if (common_context_iter != config_info_->end()) {
1070       auto common_context = common_context_iter->second;
1071       auto model_cache_dir_iter = common_context.find(kGraphCompilerCacheDirKey);
1072       if (model_cache_dir_iter != common_context.end()) {
1073         model_cache_dir = model_cache_dir_iter->second;
1074       }
1075     }
1076   }
1077   delegate_ = std::make_shared<NPUDelegate>(context_->GetDeviceInfo(DT_NPU).npu_device_info_, model_cache_dir);
1078   if (delegate_ == nullptr) {
1079     MS_LOG(ERROR) << "New delegate_ failed";
1080     return RET_ERROR;
1081   }
1082   delegate_device_type_ = DT_NPU;
1083   this->context_->delegate = delegate_;
1084 #endif
1085   return RET_OK;
1086 }
1087 
CreateNNAPIDelegate()1088 int LiteSession::CreateNNAPIDelegate() {
1089 #ifdef SUPPORT_NNAPI
1090   bool enable_fp16 =
1091     context_->IsCpuFloat16Enabled() || context_->IsGpuFloat16Enabled() || context_->IsNpuFloat16Enabled();
1092   bool only_acc_device = !context_->IsDeviceTypeEnabled(DT_CPU) && !context_->IsDeviceTypeEnabled(DT_GPU) &&
1093                          context_->IsDeviceTypeEnabled(DT_NPU);
1094   bool disable_cpu = !context_->IsDeviceTypeEnabled(DT_CPU);
1095   auto providers = context_->GetProviders();
1096   std::vector<std::string> specified_devices(providers.begin(), providers.end());
1097   delegate_ = std::make_shared<NNAPIDelegate>(enable_fp16, only_acc_device, disable_cpu, specified_devices);
1098   if (delegate_ == nullptr) {
1099     MS_LOG(ERROR) << "New delegate_ failed";
1100     return RET_ERROR;
1101   }
1102   this->context_->delegate = delegate_;
1103 #endif
1104   return RET_OK;
1105 }
1106 
CreateCoreMLDelegate()1107 int LiteSession::CreateCoreMLDelegate() {
1108 #ifdef ENABLE_COREML
1109   delegate_ = std::make_shared<CoreMLDelegate>();
1110   if (delegate_ == nullptr) {
1111     MS_LOG(ERROR) << "New delegate_ failed";
1112     return RET_ERROR;
1113   }
1114   this->context_->delegate = delegate_;
1115 #endif
1116   return RET_OK;
1117 }
1118 
InitDelegate()1119 int LiteSession::InitDelegate() {
1120 #ifndef DELEGATE_CLIP
1121   int ret = RET_OK;
1122   if (context_->delegate != nullptr) {
1123     delegate_ = context_->delegate;
1124     delegate_device_type_ = -1;
1125   } else if (context_->delegate_mode_ != kNoDelegate) {
1126     switch (context_->delegate_mode_) {
1127       case kNNAPI:
1128         ret = CreateNNAPIDelegate();
1129         break;
1130       case kCoreML:
1131         ret = CreateCoreMLDelegate();
1132         break;
1133       default:
1134         MS_LOG(ERROR) << "Unsupported built-in delegate mode: " << context_->delegate_mode_;
1135         return RET_ERROR;
1136     }
1137   } else {
1138     if (context_->IsDeviceTypeEnabled(DT_NPU)) {
1139       ret = CreateNPUDelegate();
1140     } else if (context_->IsDeviceTypeEnabled(DT_GPU)) {
1141       ret = CreateTensorRTDelegate();
1142     } else if (context_->IsDeviceTypeEnabled(DT_NNRT)) {
1143       ret = CreateNNRTDelegate();
1144     }
1145   }
1146 
1147   if (ret != RET_OK) {
1148     return ret;
1149   }
1150   if (delegate_ != nullptr) {
1151     auto delegate_ret = delegate_->Init();
1152     if (delegate_ret == mindspore::kLiteNotSupport) {
1153       MS_LOG(DEBUG) << "Delegate is unsupported";
1154       delegate_.reset();
1155       delegate_ = nullptr;
1156     } else if (delegate_ret == mindspore::kSuccess) {
1157       MS_LOG(INFO) << "Delegate init successfully";
1158     } else {
1159       MS_LOG(ERROR) << "Delegate init failed";
1160       return RET_ERROR;
1161     }
1162   }
1163 #endif
1164   return RET_OK;
1165 }
1166 
Init(const std::shared_ptr<InnerContext> & context)1167 int LiteSession::Init(const std::shared_ptr<InnerContext> &context) {
1168   bool expected = false;
1169   if (!is_running_.compare_exchange_strong(expected, true)) {
1170     MS_LOG(ERROR) << "Not support multi-threading";
1171     return RET_ERROR;
1172   }
1173 
1174   if (!PlatformInstructionSetSupportCheck()) {
1175     MS_LOG(ERROR) << "Device not support isa";
1176     is_running_.store(false);
1177     return RET_NOT_SUPPORT;
1178   }
1179 
1180   auto status = InitSharedThreadPool();
1181   if (status != RET_OK) {
1182     MS_LOG(ERROR) << "init Shared thread pool failed";
1183     is_running_.store(false);
1184     return status;
1185   }
1186   auto ret = InitContext(context);
1187   if (ret != RET_OK) {
1188     MS_LOG(ERROR) << "Init Context failed";
1189     is_running_.store(false);
1190     return ret;
1191   }
1192 
1193   ret = InitAscend(context);
1194   if (ret != RET_OK) {
1195     MS_LOG(ERROR) << "Open Ascend kernel plugin failed";
1196     is_running_.store(false);
1197     return ret;
1198   }
1199 
1200   ret = InitDelegate();
1201   if (ret != RET_OK) {
1202     MS_LOG(ERROR) << "Init delegate failed.";
1203     is_running_.store(false);
1204     return ret;
1205   }
1206 
1207   ret = InitGPURuntime();
1208   if (ret != RET_OK) {
1209     MS_LOG(ERROR) << "Init GPU runtime failed.";
1210     is_running_.store(false);
1211     return ret;
1212   }
1213 
1214   is_running_.store(false);
1215   return RET_OK;
1216 }
1217 
BindThread(bool if_bind)1218 void LiteSession::BindThread(bool if_bind) {
1219   // Abandoned code
1220   // Bind thread in executor
1221   return;
1222 }
1223 
~LiteSession()1224 LiteSession::~LiteSession() {
1225   delegate_.reset();
1226   bool expected = false;
1227   if (!is_running_.compare_exchange_strong(expected, true)) {
1228     MS_LOG(ERROR) << "Not support multi-threading";
1229     return;
1230   }
1231   for (auto *kernel : kernels_) {
1232     delete kernel;
1233     kernel = nullptr;
1234   }
1235   for (auto tensor : tensors_) {
1236     if (tensor == nullptr) {
1237       continue;
1238     }
1239     // Data of const tensor which doesn't own data will not freed.
1240     // Such as const data from meta_graph which will be freed when freeing meta_graph.
1241     if (tensor->IsConst() && !tensor->own_data()) {
1242       tensor->set_data(nullptr);
1243     }
1244 
1245     /* situation : user set graph-output-tensor data */
1246     if (tensor->IsGraphOutput() && tensor->allocator() == nullptr) {
1247       tensor->set_data(nullptr);
1248     }
1249     delete tensor;
1250     tensor = nullptr;
1251   }
1252 
1253   for (auto item : isolate_graph_output_map_) {
1254     auto isolate_output_tensor = item.first;
1255     isolate_output_tensor->set_data(nullptr);
1256     delete isolate_output_tensor;
1257     isolate_output_tensor = nullptr;
1258   }
1259 
1260   for (auto map : isolate_input_map_) {
1261     auto isolate_input_tensor = map.first;
1262     isolate_input_tensor->set_data(nullptr);
1263     delete isolate_input_tensor;
1264   }
1265 
1266   // Tensor * in input_map output_map are freed in tensors
1267   input_map_.clear();
1268   input_shape_map_.clear();
1269   output_node_map_.clear();
1270   output_tensor_map_.clear();
1271   input_vec_.clear();
1272   isolate_graph_output_map_.clear();
1273 
1274   delete this->executor_;
1275   this->executor_ = nullptr;
1276 #ifdef GPU_OPENCL
1277   delete opencl_runtime_wrapper_;
1278   opencl_runtime_wrapper_ = nullptr;
1279 #endif
1280   delete ms_context_;
1281   ms_context_ = nullptr;
1282 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
1283   ParallelThreadPoolManager::GetInstance()->ResetParallelThreadPoolManager(runner_id_);
1284 #endif
1285   lite::PackWeightManager::GetInstance()->FreePackWeight(runner_id_, model_id_);
1286   if (model_ != nullptr && is_shared_weight_) {
1287     model_->buf = nullptr;
1288   }
1289   delete (model_);
1290   model_ = nullptr;
1291 #ifdef SUPPORT_NNRT
1292   NNRTAllocator::GetInstance()->ClearFreeList();
1293 #endif
1294   is_running_.store(false);
1295 }
1296 
GetInputsByTensorName(const std::string & name) const1297 mindspore::lite::Tensor *LiteSession::GetInputsByTensorName(const std::string &name) const {
1298   auto ret = input_map_.find(name);
1299   if (ret == input_map_.end()) {
1300     MS_LOG(WARNING) << "Tensor  " << name << " is not exist";
1301     return nullptr;
1302   }
1303   return ret->second;
1304 }
1305 
GetOutputsByNodeName(const std::string & node_name) const1306 std::vector<mindspore::lite::Tensor *> LiteSession::GetOutputsByNodeName(const std::string &node_name) const {
1307   auto ret = output_node_map_.find(node_name);
1308   if (ret == output_node_map_.end()) {
1309     MS_LOG(WARNING) << "Node  " << node_name << " is not an output node";
1310     std::vector<mindspore::lite::Tensor *> empty_ret;
1311     return empty_ret;
1312   }
1313   return ret->second;
1314 }
1315 
GetOutputTensorNames() const1316 std::vector<std::string> LiteSession::GetOutputTensorNames() const { return this->output_tensor_names_; }
1317 
GetOutputByTensorName(const std::string & tensor_name) const1318 mindspore::lite::Tensor *LiteSession::GetOutputByTensorName(const std::string &tensor_name) const {
1319   auto ret = output_tensor_map_.find(tensor_name);
1320   if (ret == output_tensor_map_.end()) {
1321     MS_LOG(WARNING) << "Tensor  " << tensor_name << " is not an output node";
1322     return nullptr;
1323   }
1324   return ret->second;
1325 }
1326 
GetOutputs() const1327 std::unordered_map<std::string, mindspore::lite::Tensor *> LiteSession::GetOutputs() const {
1328   return this->output_tensor_map_;
1329 }
1330 
UpdateInputShapeMap()1331 int LiteSession::UpdateInputShapeMap() {
1332   for (auto input : inputs_) {
1333     MS_CHECK_TRUE_MSG(input != nullptr, RET_ERROR, "graph input tensor is nullptr.");
1334     if (input_shape_map_.find(input) != input_shape_map_.end()) {
1335       input_shape_map_.at(input) = input->shape();
1336     } else {
1337       MS_LOG(ERROR) << "can't find " << input->tensor_name() << " in input_shape_map";
1338       return RET_ERROR;
1339     }
1340   }
1341   return RET_OK;
1342 }
1343 
ResizeInputs(const std::vector<mindspore::lite::Tensor * > & inputs,const std::vector<std::vector<int>> & dims)1344 int LiteSession::ResizeInputs(const std::vector<mindspore::lite::Tensor *> &inputs,
1345                               const std::vector<std::vector<int>> &dims) {
1346   if (inputs.size() != inputs_.size()) {
1347     MS_LOG(ERROR) << "Inputs size " << inputs.size() << " is not equal to " << inputs_.size();
1348     return RET_PARAM_INVALID;
1349   }
1350 
1351   if (dims.size() != inputs.size()) {
1352     MS_LOG(ERROR) << "Input dims size " << dims.size() << " is not equal to the inputs size " << inputs.size();
1353     return RET_PARAM_INVALID;
1354   }
1355 
1356   for (size_t i = 0; i < inputs.size(); ++i) {
1357     if (inputs[i] != inputs_[i]) {
1358       MS_LOG(ERROR) << "Input[" << i << "] tensor is not equal to the inputs have been saved!";
1359       return RET_PARAM_INVALID;
1360     }
1361     inputs_[i]->FreeData();
1362     if (infer_along_running_ && !inputs_[i]->get_shape_changed()) {
1363       inputs_[i]->set_shape_changed(dims[i] != inputs_[i]->shape());
1364     }
1365     inputs_[i]->set_shape(dims[i]);
1366   }
1367   if (!is_train_session_) {
1368     executor_->Resize(inputs, dims);
1369   }
1370   return RET_OK;
1371 }
1372 
ResetInputsShape(const std::vector<std::vector<int>> & dims)1373 void LiteSession::ResetInputsShape(const std::vector<std::vector<int>> &dims) {
1374   for (size_t i = 0; i < inputs_.size(); ++i) {
1375     inputs_[i]->FreeData();
1376     inputs_[i]->set_shape(dims[i]);
1377     inputs_[i]->set_shape_changed(false);
1378   }
1379 }
1380 
ReSizeKernels(const std::vector<kernel::KernelExec * > & kernels,const std::unordered_map<Tensor *,Tensor * > & isolate_input_map)1381 int LiteSession::ReSizeKernels(const std::vector<kernel::KernelExec *> &kernels,
1382                                const std::unordered_map<Tensor *, Tensor *> &isolate_input_map) {
1383   for (auto kernel : kernels) {
1384     if (kernel == nullptr) {
1385       MS_LOG(ERROR) << "input kernel is nullptr!";
1386       return RET_ERROR;
1387     }
1388     auto ret = RET_OK;
1389     if (kernel->desc().arch == kernel::kDelegate) {
1390       ret = kernel->ReSize();
1391     } else {
1392       // resize subgraph inputs
1393       auto sub_graph_kernel = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
1394       for (auto input : sub_graph_kernel->in_tensors()) {
1395         if (isolate_input_map.find(input) != isolate_input_map.end()) {
1396           input->set_shape(isolate_input_map.at(input)->shape());
1397         }
1398       }
1399       if (kernel->subgraph_type() == kernel::kGpuFp16SubGraph || kernel->subgraph_type() == kernel::kGpuFp32SubGraph) {
1400 #ifdef GPU_OPENCL
1401         auto sub_graph = reinterpret_cast<kernel::OpenCLSubGraph *>(kernel);
1402         ret = sub_graph->ReSize();
1403 #endif
1404       } else {
1405         auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
1406         ret = sub_graph->ReSize();
1407       }
1408     }
1409     if (ret == RET_INFER_INVALID) {
1410       MS_LOG(INFO) << "InferShape is interrupted";
1411       continue;
1412     }
1413     if (ret != RET_OK) {
1414       MS_LOG(ERROR) << "ReSize node " << kernel->name() << " failed";
1415       return RET_ERROR;
1416     }
1417   }
1418   return RET_OK;
1419 }
1420 
SynIsolateInOutputDataType()1421 void LiteSession::SynIsolateInOutputDataType() {
1422   for (auto &tensor_map : isolate_input_map_) {
1423     auto dst_tensor = tensor_map.second;
1424     auto src_tensor = tensor_map.first;
1425 
1426     src_tensor->set_data_type(dst_tensor->data_type());
1427   }
1428 
1429   for (auto &tensor_map : isolate_graph_output_map_) {
1430     auto dst_tensor = tensor_map.second;
1431     auto src_tensor = tensor_map.first;
1432 
1433     src_tensor->set_data_type(dst_tensor->data_type());
1434   }
1435 }
1436 
BindGLTexture2DMemory(const std::map<std::string,unsigned int> & inputGLTexture,std::map<std::string,unsigned int> * outputGLTexture)1437 int LiteSession::BindGLTexture2DMemory(const std::map<std::string, unsigned int> &inputGLTexture,
1438                                        std::map<std::string, unsigned int> *outputGLTexture) {
1439 #ifdef GPU_OPENCL
1440   if (!this->context_->GetDeviceInfo(DT_GPU).gpu_device_info_.enable_gl_texture_) {
1441     MS_LOG(ERROR) << "the context isn't set to support OpenGL texture";
1442     return RET_ERROR;
1443   }
1444   for (const auto &[name, GLTexture_id] : inputGLTexture) {
1445     auto iter = input_map_.find(name);
1446     if (iter == input_map_.end()) {
1447       MS_LOG(ERROR) << "the in tensor name " << name << "is not match any model input name";
1448       return RET_ERROR;
1449     }
1450     auto in_data = iter->second->MutableData();
1451     if (in_data == nullptr) {
1452       std::cout << "MallocData for input Tensor failed" << std::endl;
1453       return RET_ERROR;
1454     }
1455     memcpy(in_data, &GLTexture_id, sizeof(cl_GLuint));
1456     iter->second->set_data_type(kNumberTypeGLUInt);
1457   }
1458   for (auto [name, GLTexture_id] : *outputGLTexture) {
1459     auto iter = output_tensor_map_.find(name);
1460     if (iter == output_tensor_map_.end()) {
1461       MS_LOG(ERROR) << "the out tensor name " << name << "is not match any model output name";
1462       return RET_ERROR;
1463     }
1464     auto out_data = iter->second->MutableData();
1465     if (out_data == nullptr) {
1466       std::cout << "MallocData for input Tensor failed" << std::endl;
1467       return RET_ERROR;
1468     }
1469     memcpy(out_data, &GLTexture_id, sizeof(cl_GLuint));
1470     iter->second->set_data_type(kNumberTypeGLUInt);
1471   }
1472 
1473 #ifdef ENABLE_MINDRT
1474   SynIsolateInOutputDataType();  // Synchronized input/output with isolate input/output data types
1475 #endif
1476 
1477   if (this->kernels_.size() != 1) {
1478     MS_LOG(ERROR) << "Now only support one opencl subgraph if you want to input opengl texture";
1479     return RET_ERROR;
1480   }
1481   auto opencl_subgraph = reinterpret_cast<kernel::OpenCLSubGraph *>(kernels_.front());
1482   for (size_t i = 0; i < outputs_.size(); i++) {
1483     (opencl_subgraph)->set_out_tensor(outputs_[i], i);
1484   }
1485   for (auto node : opencl_subgraph->out_nodes()) {
1486     node->set_out_tensors(opencl_subgraph->out_tensors());
1487   }
1488 #endif
1489   return RET_OK;
1490 }
1491 
Resize(const std::vector<mindspore::lite::Tensor * > & inputs,const std::vector<std::vector<int>> & dims)1492 int LiteSession::Resize(const std::vector<mindspore::lite::Tensor *> &inputs,
1493                         const std::vector<std::vector<int>> &dims) {
1494   bool expected = false;
1495   if (!is_running_.compare_exchange_strong(expected, true)) {
1496     MS_LOG(ERROR) << "Not support multi-threading";
1497     return RET_ERROR;
1498   }
1499   std::vector<std::vector<int>> old_dims;
1500   for (size_t i = 0; i < inputs_.size(); ++i) {
1501     old_dims.push_back(inputs_[i]->shape());
1502   }
1503   auto ret = ResizeInputs(inputs, dims);
1504   if (ret != RET_OK) {
1505     ResetInputsShape(old_dims);
1506     is_running_.store(false);
1507     return ret;
1508   }
1509   ret = UpdateInputShapeMap();
1510   if (ret != RET_OK) {
1511     MS_LOG(ERROR) << "update input shape map failed.";
1512     return RET_ERROR;
1513   }
1514   if (infer_along_running_) {
1515     is_running_.store(false);
1516     return ret;
1517   }
1518 
1519   ret = ReSizeKernels(kernels_, isolate_input_map_);
1520   if (ret != RET_OK) {
1521     ResetInputsShape(old_dims);
1522     auto resize_ret = ReSizeKernels(kernels_);
1523     if (resize_ret != RET_OK) {
1524       MS_LOG(ERROR) << "restore kernel size fail!ret: " << resize_ret;
1525     }
1526     is_running_.store(false);
1527     return ret;
1528   }
1529 
1530   auto status = GraphOptimizePass(&kernels_);
1531   if (status != RET_OK) {
1532     MS_LOG(ERROR) << "GraphOptimizePass failed.";
1533     return RET_ERROR;
1534   }
1535 
1536   is_running_.store(false);
1537   return RET_OK;
1538 }
1539 
PreCheck(Model * model)1540 int LiteSession::PreCheck(Model *model) {
1541   bool expected = false;
1542   if (!is_running_.compare_exchange_strong(expected, true)) {
1543     MS_LOG(ERROR) << "Not support multi-threading";
1544     return RET_ERROR;
1545   }
1546   if (model == nullptr) {
1547     MS_LOG(ERROR) << "The input model is nullptr.";
1548     return RET_PARAM_INVALID;
1549   }
1550   if (model->buf == nullptr) {
1551     MS_LOG(ERROR) << "The input model buf is nullptr.";
1552     return RET_PARAM_INVALID;
1553   }
1554   if (model->model_type_ != ModelType_MSLite) {
1555     // abstract base model
1556     if (!reinterpret_cast<AbstractBaseModel *>(model)->ModelVerify()) {
1557       MS_LOG(ERROR) << "wrong model input, please check";
1558       return RET_ERROR;
1559     }
1560   } else {
1561     // old routine, convert to abstract base model
1562     if (!reinterpret_cast<LiteModel *>(model)->ModelVerify()) {
1563       MS_LOG(ERROR) << "wrong model input, please check";
1564       return RET_ERROR;
1565     }
1566   }
1567 
1568 #ifndef ENABLE_FP16
1569   if (context_->GetDeviceInfo(DT_CPU).cpu_device_info_.enable_float16_) {
1570     MS_LOG(WARNING) << unsupport_fp16_log;
1571   }
1572 #endif
1573   return RET_OK;
1574 }
1575 
InitExecutor()1576 int LiteSession::InitExecutor() {
1577   int ret;
1578 #ifdef ENABLE_MINDRT
1579   if (ms_context_->GetThreadNum() == 1 && !context_->IsCpuFloat16Enabled() && !is_control_flow_) {
1580     executor_ = new (std::nothrow) Executor();
1581   } else {
1582     ret = IsolateOutputTensor();
1583     if (ret != RET_OK) {
1584       MS_LOG(ERROR) << "Isolate output tensor failed.";
1585       return ret;
1586     }
1587     executor_ = new (std::nothrow) MindrtExecutor(&isolate_graph_output_map_, &isolate_input_map_);
1588   }
1589 #else
1590   executor_ = new (std::nothrow) Executor();
1591 #endif
1592   if (executor_ == nullptr) {
1593     MS_LOG(ERROR) << "New Executor failed";
1594     return RET_ERROR;
1595   }
1596 
1597   ret = executor_->Prepare(kernels_, inputs_, outputs_, context_.get());
1598   if (ret != RET_OK) {
1599     MS_LOG(ERROR) << "Prepare executor failed: " << ret;
1600     return ret;
1601   }
1602   return RET_OK;
1603 }
1604 
RuntimeAllocatorValid()1605 int LiteSession::RuntimeAllocatorValid() {
1606 #ifdef ENABLE_ARM32
1607   MS_LOG(DEBUG) << "Not support runtime allocator in arm32.";
1608   return RET_ERROR;
1609 #endif
1610 
1611 #ifndef ENABLE_MINDRT
1612   MS_LOG(DEBUG) << "Not support runtime allocator in converter.";
1613   return RET_ERROR;
1614 #endif
1615 
1616 #ifdef BFC_MEMORY
1617   MS_LOG(DEBUG) << "Not support runtime allocator when BFC_MEMORY on.";
1618   return RET_ERROR;
1619 #endif
1620 
1621   if ((context_->enable_parallel_ == true) || (context_->inter_op_parallel_num_ > 1)) {
1622     MS_LOG(DEBUG) << "Not support runtime allocator in subgraph parallel.";
1623     return RET_ERROR;
1624   }
1625   if (is_train_session_ == true) {
1626     MS_LOG(DEBUG) << "Not support runtime allocator in train session.";
1627     return RET_ERROR;
1628   }
1629   if (is_infershape_ != RET_OK) {
1630     MS_LOG(DEBUG) << "Not support runtime allocator in runtime-infershape.";
1631     return RET_ERROR;
1632   }
1633 #ifdef ENABLE_MINDRT
1634   if (kernels_.size() != 1) {
1635     MS_LOG(DEBUG) << "Not support runtime allocator in random subgraph sort";
1636     return RET_ERROR;
1637   }
1638 #endif
1639 #ifdef ENABLE_ARM64
1640   MS_LOG(DEBUG) << "support runtime allocator.";
1641   return RET_OK;
1642 #endif
1643   return RET_ERROR;
1644 }
1645 
RuntimeAllocatorInitGraphOutput()1646 void LiteSession::RuntimeAllocatorInitGraphOutput() {
1647   AllocatorPtr default_allocator = context_->allocator;
1648   for (auto graph_out : isolate_graph_output_map_) {
1649     auto cal_t = graph_out.first;
1650     auto out_t = graph_out.second;
1651     if (cal_t->allocator() != runtime_allocator_ || out_t->allocator() != default_allocator) {
1652       continue;
1653     }
1654     out_t->set_allocator(runtime_allocator_);
1655     if (cal_t->data_type() != out_t->data_type()) {
1656       runtime_allocator_->MallocTensorData(out_t);
1657     }
1658   }
1659   return;
1660 }
1661 
RuntimeAllocatorInitSubgraphInputs(const kernel::KernelExec * subgraph,const AllocatorPtr & default_allocator,const RuntimeAllocatorPtr & runtime_allocator,const std::unordered_map<Tensor *,Tensor * > & isolate_input_map,std::unordered_map<Tensor *,int> * tensor_ref_count,std::unordered_map<size_t,int> * data_ref_count)1662 void RuntimeAllocatorInitSubgraphInputs(const kernel::KernelExec *subgraph, const AllocatorPtr &default_allocator,
1663                                         const RuntimeAllocatorPtr &runtime_allocator,
1664                                         const std::unordered_map<Tensor *, Tensor *> &isolate_input_map,
1665                                         std::unordered_map<Tensor *, int> *tensor_ref_count,
1666                                         std::unordered_map<size_t, int> *data_ref_count) {
1667   MS_ASSERT(subgraph != nullptr && tensor_ref_count != nullptr && data_ref_count != nullptr);
1668   for (auto in_tensor : subgraph->in_tensors()) {
1669     auto iter = isolate_input_map.find(in_tensor);
1670     if (isolate_input_map.end() == iter) break;
1671     auto src_t = iter->second;
1672 
1673     if (src_t->data_type() == in_tensor->data_type()) {
1674       in_tensor->set_allocator(src_t->allocator());
1675       if (src_t->allocator() == runtime_allocator) {
1676         (*tensor_ref_count)[in_tensor] = in_tensor->init_ref_count();
1677         (*data_ref_count)[runtime_allocator->GetOffsetMap().at(src_t)] += in_tensor->init_ref_count();
1678         runtime_allocator->SetDataOffset(in_tensor, runtime_allocator->GetOffsetMap().at(src_t));
1679       }
1680     } else {
1681       if (in_tensor->allocator() == default_allocator) {
1682         in_tensor->set_allocator(runtime_allocator);
1683         runtime_allocator->MallocTensorData(in_tensor);
1684         (*tensor_ref_count)[in_tensor] = in_tensor->init_ref_count();
1685         (*data_ref_count)[runtime_allocator->GetOffsetMap().at(in_tensor)] = in_tensor->init_ref_count();
1686       }
1687     }
1688 
1689     if (src_t->allocator() != runtime_allocator) {
1690       continue;
1691     }
1692 
1693     (*tensor_ref_count)[src_t]--;
1694     (*data_ref_count)[runtime_allocator->GetOffsetMap().at(src_t)]--;
1695 
1696     if ((*tensor_ref_count)[src_t] <= 0) {
1697       if ((*data_ref_count)[runtime_allocator->GetOffsetMap().at(src_t)] <= 0) {
1698         runtime_allocator->FreeTensorData(src_t);
1699       }
1700     }
1701   }
1702 }
1703 
RuntimeAllocatorInitSubgraph()1704 void LiteSession::RuntimeAllocatorInitSubgraph() {
1705   AllocatorPtr default_allocator = context_->allocator;
1706   std::unordered_map<lite::Tensor *, int> tensor_ref_count;
1707   std::unordered_map<size_t, int> data_ref_count;
1708 
1709   for (auto subgraph : kernels_) {
1710     if (subgraph->desc().arch != kernel::KERNEL_ARCH::kCPU) {
1711       continue;
1712     }
1713 
1714     RuntimeAllocatorInitSubgraphInputs(subgraph, default_allocator, runtime_allocator_, isolate_input_map_,
1715                                        &tensor_ref_count, &data_ref_count);
1716 
1717     auto kernel_list = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
1718     for (auto kernel : kernel_list) {
1719       /* malloc for output */
1720       for (auto tensor : kernel->out_tensors()) {
1721         if (tensor->allocator() != default_allocator || tensor->IsConst()) {
1722           continue;
1723         }
1724         tensor->set_allocator(runtime_allocator_);
1725         runtime_allocator_->MallocTensorData(tensor);
1726         tensor_ref_count[tensor] = tensor->init_ref_count();
1727         data_ref_count[runtime_allocator_->GetOffsetMap().at(tensor)] = tensor->init_ref_count();
1728       }
1729 
1730       /* free input after run */
1731       for (auto tensor : kernel->in_tensors()) {
1732         if (tensor->allocator() != runtime_allocator_) {
1733           continue;
1734         }
1735         tensor_ref_count[tensor]--;
1736         data_ref_count[runtime_allocator_->GetOffsetMap().at(tensor)]--;
1737 
1738         if (tensor_ref_count[tensor] <= 0 && tensor->allocator() == runtime_allocator_) {
1739           if (data_ref_count[runtime_allocator_->GetOffsetMap().at(tensor)] <= 0) {
1740             runtime_allocator_->FreeTensorData(tensor);
1741           }
1742         }
1743       }
1744     }
1745   }
1746   return;
1747 }
1748 
InitRuntimeAllocator()1749 int LiteSession::InitRuntimeAllocator() {
1750   if (RuntimeAllocatorValid() != RET_OK) {
1751     return RET_OK;
1752   }
1753   if (ExistCustomCpuKernel()) {
1754     return RET_OK;
1755   }
1756   if (runtime_allocator_ == nullptr) {
1757     runtime_allocator_ = std::shared_ptr<RuntimeAllocator>(new (std::nothrow) RuntimeAllocator());
1758   } else {
1759     runtime_allocator_->Clear(context_->allocator);
1760   }
1761   if (runtime_allocator_ == nullptr) {
1762     MS_LOG(ERROR) << "RuntimeAllocator is null.";
1763     return RET_ERROR;
1764   }
1765 
1766   RuntimeAllocatorInitSubgraph();
1767 
1768   RuntimeAllocatorInitGraphOutput();
1769 
1770   auto ret = RuntimeAllocatorSetData();
1771   if (ret != RET_OK) {
1772     MS_LOG(ERROR) << "using optimize allocator failed.";
1773     return ret;
1774   }
1775   return RET_OK;
1776 }
1777 
RuntimeAllocatorSetData()1778 int LiteSession::RuntimeAllocatorSetData() {
1779   void *data = runtime_allocator_->MallocOptData();
1780   if (data == nullptr) {
1781     MS_LOG(ERROR) << "malloc optimize data failed.";
1782     return RET_ERROR;
1783   }
1784   int8_t *int8_data = reinterpret_cast<int8_t *>(data);
1785   auto offset_map = runtime_allocator_->GetOffsetMap();
1786 
1787   for (auto &iter : offset_map) {
1788     auto tensor = iter.first;
1789     if (tensor->allocator() != runtime_allocator_) {
1790       return RET_ERROR;
1791     }
1792     tensor->set_data(int8_data + iter.second);
1793   }
1794   return RET_OK;
1795 }
1796 
InitGPURuntime()1797 int LiteSession::InitGPURuntime() {
1798   if (context_->IsDeviceTypeEnabled(DT_CPU)) {
1799     CpuBindMode cpu_bind_mode = context_->GetDeviceInfo(DT_CPU).cpu_device_info_.cpu_bind_mode_;
1800     ThreadPool *thread_pool = this->context_->thread_pool_;
1801     if (thread_pool != nullptr) {
1802       thread_pool->SetProcessAffinity(static_cast<BindMode>(cpu_bind_mode));
1803     }
1804   }
1805 #ifdef GPU_OPENCL
1806   if (this->context_->IsDeviceTypeEnabled(DT_GPU)) {
1807     opencl_runtime_wrapper_ = new (std::nothrow) opencl::OpenCLRuntimeInnerWrapper();
1808     if (opencl_runtime_wrapper_ == nullptr) {
1809       MS_LOG(ERROR) << "create OpenCLRuntimeInnerWrapper failed";
1810       return RET_ERROR;
1811     }
1812     const auto &gpu_device_info = this->context_->GetDeviceInfo(DT_GPU).gpu_device_info_;
1813     auto opencl_runtime = opencl_runtime_wrapper_->GetInstance();
1814     opencl_runtime->SetGLTextureEnable(gpu_device_info.enable_gl_texture_);
1815     opencl_runtime->SetGLContext(gpu_device_info.gl_context_);
1816     opencl_runtime->SetGLDisplay(gpu_device_info.gl_display_);
1817     if (opencl_runtime->Init() != RET_OK) {
1818       if (gpu_device_info.enable_gl_texture_) {
1819         MS_LOG(ERROR) << "Init OpenCL runtime failed, enable_gl_texture set true, only support GPU mode.";
1820         return RET_ERROR;
1821       }
1822       this->context_->device_list_ = {{DT_CPU, {gpu_device_info.enable_float16_, MID_CPU}}};
1823       MS_LOG(WARNING) << "Init OpenCL runtime failed, change to CPU mode.";
1824     } else {
1825       MS_LOG(INFO) << "Init OpenCL runtime success.";
1826     }
1827 
1828     opencl_runtime->SetFp16Enable(gpu_device_info.enable_float16_);
1829 
1830     /* check chip support shared memory */
1831     auto enable_arm_import_memory = opencl_runtime->isExtensionEnable(EXT_ARM_IMPORT_MEMORY_HOST);
1832     if (!enable_arm_import_memory) {
1833       MS_LOG(WARNING) << "GPU do not support shared memory!";
1834     }
1835   }
1836 #endif
1837   // Setting the binding core will affect the opencl drive scheduling.
1838   if (context_->IsDeviceTypeEnabled(DT_CPU)) {
1839     ThreadPool *thread_pool = this->context_->thread_pool_;
1840     if (thread_pool != nullptr) {
1841       thread_pool->SetProcessAffinity(static_cast<BindMode>(NO_BIND));
1842     }
1843   }
1844   return RET_OK;
1845 }
1846 }  // namespace lite
1847 
CreateSession(const std::shared_ptr<InnerContext> & context)1848 lite::LiteSession *lite::LiteSession::CreateSession(const std::shared_ptr<InnerContext> &context) {
1849   auto session = new (std::nothrow) lite::LiteSession();
1850   if (session == nullptr) {
1851     MS_LOG(ERROR) << "create session failed";
1852     return nullptr;
1853   }
1854   auto ret = session->Init(context);
1855   if (ret != mindspore::lite::RET_OK) {
1856     MS_LOG(ERROR) << "init session failed";
1857     delete session;
1858     return nullptr;
1859   }
1860   return session;
1861 }
1862 
CreateSession(const char * model_buf,size_t size,const std::shared_ptr<InnerContext> & context)1863 lite::LiteSession *lite::LiteSession::CreateSession(const char *model_buf, size_t size,
1864                                                     const std::shared_ptr<InnerContext> &context) {
1865   auto *session = lite::LiteSession::CreateSession(context);
1866   if (session == nullptr) {
1867     MS_LOG(ERROR) << "Create session failed";
1868     return nullptr;
1869   }
1870   auto ret = reinterpret_cast<lite::LiteSession *>(session)->LoadModelAndCompileByBuf(
1871     model_buf, mindspore::ModelType::kMindIR_Lite, size);
1872   if (ret != RET_OK) {
1873     MS_LOG(ERROR) << "Init session failed";
1874     delete session;
1875     return nullptr;
1876   }
1877   return session;
1878 }
1879 
LoadModelByBuff(const char * model_buf,const size_t & buf_size,char ** lite_buf,size_t * size,mindspore::ModelType model_type)1880 mindspore::ModelType lite::LiteSession::LoadModelByBuff(const char *model_buf, const size_t &buf_size, char **lite_buf,
1881                                                         size_t *size, mindspore::ModelType model_type) {
1882   if (model_type == mindspore::ModelType::kMindIR_Lite) {
1883     *size = buf_size;
1884     *lite_buf = const_cast<char *>(model_buf);
1885     return mindspore::ModelType::kMindIR_Lite;
1886   }
1887 
1888   if (model_type != mindspore::ModelType::kMindIR) {
1889     return mindspore::ModelType::kUnknownType;
1890   }
1891 
1892   flatbuffers::Verifier verify((const uint8_t *)model_buf, buf_size, INT32_MAX, INT32_MAX);
1893   auto version_verify = lite::LiteModel::VersionVerify(&verify);
1894   if (version_verify != SCHEMA_INVALID) {
1895     MS_LOG(DEBUG) << "The kMindIR type model buffer is valid mslite model buffer";
1896     *size = buf_size;
1897     *lite_buf = const_cast<char *>(model_buf);
1898     return mindspore::ModelType::kMindIR_Lite;
1899   }
1900   MS_LOG(WARNING) << "Invalid mslite model.";
1901 
1902 #ifdef RUNTIME_CONVERT
1903   *lite_buf = RuntimeConvert(model_buf, buf_size, size, ms_context_);
1904 #else
1905   MS_LOG(WARNING) << "Please enable runtime convert.";
1906 #endif
1907 #ifdef ENABLE_CLOUD_FUSION_INFERENCE
1908   *size = buf_size;
1909   *lite_buf = const_cast<char *>(model_buf);
1910 #endif
1911   return mindspore::ModelType::kMindIR;
1912 }
1913 
LoadModelByPath(const std::string & file,mindspore::ModelType model_type,size_t * size,bool use_mmap)1914 const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspore::ModelType model_type, size_t *size,
1915                                                bool use_mmap) {
1916   size_t buf_size;
1917   char *model_buf;
1918   if (use_mmap) {
1919     model_buf = reinterpret_cast<char *>(lite::ReadFileByMmap(file.c_str(), &buf_size));
1920   } else {
1921     model_buf = lite::ReadFile(file.c_str(), &buf_size);
1922   }
1923   if (model_buf == nullptr) {
1924     MS_LOG(ERROR) << "The model path is invalid";
1925     return model_buf;
1926   }
1927 
1928   char *lite_buf = nullptr;
1929   auto buf_model_type = LoadModelByBuff(model_buf, buf_size, &lite_buf, size, model_type);
1930   if (buf_model_type == mindspore::ModelType::kUnknownType || lite_buf == nullptr) {
1931     if (use_mmap) {
1932       lite::UnmapMmapBuffer(const_cast<void *>(static_cast<const void *>(model_buf)), buf_size);
1933     } else {
1934       delete[] model_buf;
1935     }
1936     model_buf = nullptr;
1937     return nullptr;
1938   }
1939 
1940   return lite_buf;
1941 }
1942 
ParseWeightPath()1943 std::string lite::LiteSession::ParseWeightPath() {
1944   std::string weight_path = "";
1945   if (config_info_ != nullptr) {
1946     auto ms_weight = config_info_->find(kConfigModelFileSection);
1947     if (ms_weight != config_info_->end()) {
1948       auto ms_weight_iter = ms_weight->second;
1949       if (ms_weight_iter.find(kConfigMindIRPathKey) != ms_weight_iter.end()) {
1950         weight_path = ms_weight_iter[kConfigMindIRPathKey];
1951       }
1952     }
1953   }
1954   return weight_path;
1955 }
1956 
ReshapeWeightTensor(lite::Tensor * orig_tensor,lite::Tensor * new_tensor)1957 int lite::LiteSession::ReshapeWeightTensor(lite::Tensor *orig_tensor, lite::Tensor *new_tensor) {
1958   if (orig_tensor->data_type() != new_tensor->data_type()) {
1959     MS_LOG(ERROR) << "Cannot reshape tensor of different type: " << new_tensor->tensor_name();
1960     return RET_PARAM_INVALID;
1961   }
1962 
1963   if (orig_tensor->category() != lite::Category::CONST_TENSOR) {
1964     MS_LOG(ERROR) << "Cannot reshape non const tensor: " << new_tensor->tensor_name();
1965     return RET_ERROR;
1966   }
1967 
1968   auto orig_size = orig_tensor->Size();
1969   uint8_t *new_data = reinterpret_cast<uint8_t *>(new_tensor->data());
1970   if (new_data == nullptr) {
1971     // Copy original data into new_tensor
1972     new_data = reinterpret_cast<uint8_t *>(new_tensor->MutableData());
1973     if (new_data == nullptr) {
1974       MS_LOG(ERROR) << "Allocation of Data Failed" << new_tensor->tensor_name();
1975       return RET_ERROR;
1976     }
1977     if (orig_size == 0) {
1978       MS_LOG(ERROR) << "Operation failed: Both new tensors and original one have no data";
1979       return RET_ERROR;
1980     }
1981     uint8_t *orig_data = reinterpret_cast<uint8_t *>(orig_tensor->data());
1982     for (unsigned int loc = 0; loc < new_tensor->Size(); loc++) {
1983       new_data[loc] = orig_data[loc % orig_size];
1984     }
1985   }
1986 
1987   if (orig_tensor->shape() != new_tensor->shape()) {
1988     orig_tensor->FreeData();
1989     orig_tensor->set_data(nullptr);
1990     orig_tensor->set_shape(new_tensor->shape());
1991   }
1992 
1993   uint8_t *dst_data = reinterpret_cast<uint8_t *>(orig_tensor->MutableData());
1994   if (dst_data == nullptr) {
1995     MS_LOG(ERROR) << "Allocation of Data Failed";
1996     return RET_ERROR;
1997   }
1998   std::copy(new_data, new_data + orig_tensor->Size(), dst_data);
1999   return RET_OK;
2000 }
2001 
UpdateWeights(std::vector<lite::Tensor * > modify_tensors)2002 int lite::LiteSession::UpdateWeights(std::vector<lite::Tensor *> modify_tensors) {
2003   unsigned int num_of_found_tensors = 0;
2004   for (auto modify : modify_tensors) {
2005     if (modify == nullptr) {
2006       MS_LOG(ERROR) << "Tensor is nullptr";
2007       return RET_PARAM_INVALID;
2008     }
2009     for (auto tensor : tensors_) {
2010       if (modify->tensor_name() == tensor->tensor_name()) {
2011         if (tensor->Size() != modify->Size()) {
2012           model_buff_changed_ = true;
2013         }
2014         auto ret = ReshapeWeightTensor(tensor, modify);
2015         num_of_found_tensors++;
2016         if (ret != RET_OK) {
2017           model_buff_changed_ = false;
2018           return ret;
2019         }
2020         break;
2021       }
2022     }
2023   }
2024   if (num_of_found_tensors != modify_tensors.size()) {
2025     MS_LOG(ERROR) << "Did not find all the given tensors in the model";
2026     return RET_ERROR;
2027   }
2028   auto ret = ReSizeKernels(kernels_);
2029   if (ret != RET_OK) {
2030     MS_LOG(ERROR) << "Resize kernels fail!";
2031     model_buff_changed_ = false;
2032     return ret;
2033   }
2034 
2035   bool is_eval = IsEval();
2036   if (is_eval) {
2037     ret = Eval();
2038   }
2039   return ret;
2040 }
2041 
2042 #ifdef ENABLE_LITE_HELPER
LoadModelAndCompileByBuf(const char * model_buf,mindspore::ModelType model_type,const size_t & buf_size,mindspore::infer::helper::InferHelpers * infer_helpers)2043 int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, mindspore::ModelType model_type,
2044                                                 const size_t &buf_size,
2045                                                 mindspore::infer::helper::InferHelpers *infer_helpers) {
2046 #else
2047 int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, mindspore::ModelType model_type,
2048                                                 const size_t &buf_size) {
2049 #endif
2050   auto status = lite::PackWeightManager::GetInstance()->InitPackWeightManager(model_buf, buf_size, &model_id_,
2051                                                                               &runner_id_, config_info_);
2052   if (status != RET_OK) {
2053     MS_LOG(ERROR) << "InitPackWeightByBuf failed.";
2054     return RET_ERROR;
2055   }
2056   auto new_model_buf =
2057     lite::PackWeightManager::GetInstance()->GetSharedModelBuf(model_buf, model_id_, config_info_, &is_shared_weight_);
2058   if (new_model_buf == nullptr) {
2059     MS_LOG(ERROR) << "get shared model buf is nullptr.";
2060     return RET_ERROR;
2061   }
2062   size_t lite_buf_size = 0;
2063   char *lite_buf = nullptr;
2064   auto buf_model_type = LoadModelByBuff(new_model_buf, buf_size, &lite_buf, &lite_buf_size, model_type);
2065   if (buf_model_type == mindspore::ModelType::kUnknownType || lite_buf == nullptr) {
2066     MS_LOG(ERROR) << "Invalid model_buf";
2067     return RET_ERROR;
2068   }
2069   auto weight_path = ParseWeightPath();
2070 #ifdef ENABLE_LITE_HELPER
2071   auto *model = lite::ImportFromBuffer(lite_buf, lite_buf_size, true, model_type, weight_path, infer_helpers);
2072 #else
2073   auto *model = lite::ImportFromBuffer(lite_buf, lite_buf_size, true, model_type, weight_path);
2074 #endif
2075   if (model == nullptr) {
2076     MS_LOG(ERROR) << "Import model failed";
2077     return RET_ERROR;
2078   }
2079   (reinterpret_cast<lite::LiteModel *>(model))->set_keep_model_buf(keep_model_buf_);
2080   auto ret = CompileGraph(model);
2081   model->buf = nullptr;
2082   if (ret != lite::RET_OK) {
2083     MS_LOG(ERROR) << "Compile model failed";
2084     delete model;
2085     return RET_ERROR;
2086   }
2087   set_model(model);
2088   return RET_OK;
2089 }
2090 
2091 int lite::LiteSession::LoadModelAndCompileByPath(const std::string &model_path, mindspore::ModelType model_type) {
2092   size_t model_size;
2093   bool use_mmap = IsMmapEnable();
2094   auto model_buf = LoadModelByPath(model_path, model_type, &model_size, use_mmap);
2095   if (model_buf == nullptr) {
2096     MS_LOG(ERROR) << "Read model file failed";
2097     return RET_ERROR;
2098   }
2099   auto status = lite::PackWeightManager::GetInstance()->InitPackWeightManager(model_buf, model_size, &model_id_,
2100                                                                               &runner_id_, config_info_);
2101   if (status != RET_OK) {
2102     MS_LOG(ERROR) << "InitPackWeightByBuf failed.";
2103     return RET_ERROR;
2104   }
2105   auto new_model_buf =
2106     lite::PackWeightManager::GetInstance()->GetSharedModelBuf(model_buf, model_id_, config_info_, &is_shared_weight_);
2107   if (new_model_buf == nullptr) {
2108     MS_LOG(ERROR) << "get shared model buf is nullptr.";
2109     return RET_ERROR;
2110   }
2111   if (is_shared_weight_) {
2112     if (use_mmap) {
2113       lite::UnmapMmapBuffer(const_cast<void *>(static_cast<const void *>(model_buf)), model_size);
2114     } else {
2115       delete[] model_buf;
2116     }
2117     model_buf = nullptr;
2118   }
2119   auto *model = lite::ImportFromBuffer(new_model_buf, model_size, true, model_type, model_path);
2120   if (model == nullptr) {
2121     MS_LOG(ERROR) << "Import model failed";
2122     return RET_ERROR;
2123   }
2124   if (use_mmap && new_model_buf == model_buf) {
2125     reinterpret_cast<lite::LiteModel *>(model)->model_buf_by_mmap_ = true;
2126   }
2127   (reinterpret_cast<lite::LiteModel *>(model))->set_keep_model_buf(true);
2128   auto ret = CompileGraph(model);
2129   if (ret != lite::RET_OK) {
2130     MS_LOG(ERROR) << "Compile model failed";
2131     model->buf = nullptr;
2132     delete model;
2133     return RET_ERROR;
2134   }
2135   set_model(model);
2136   return RET_OK;
2137 }
2138 
2139 bool lite::LiteSession::IsMmapEnable() const {
2140 #if !defined(_WIN32) && !defined(_WIN64) && !defined(MS_COMPILE_IOS)
2141   if (delegate_device_type_ == static_cast<int>(DT_NPU)) {
2142     return false;
2143   }
2144   return true;
2145 #else
2146   return false;
2147 #endif
2148 }
2149 }  // namespace mindspore
2150