• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/litert/lite_session.h"
18 #include <set>
19 #include <vector>
20 #include <utility>
21 #include <fstream>
22 #include <algorithm>
23 #include "src/litert/pack_weight_manager.h"
24 #include "src/litert/runtime_pass.h"
25 #include "include/errorcode.h"
26 #include "src/common/log_adapter.h"
27 #include "src/litert/scheduler.h"
28 #include "src/litert/inner_allocator.h"
29 #include "src/litert/executor.h"
30 #include "src/common/context_util.h"
31 #include "src/common/utils.h"
32 #include "src/common/graph_util.h"
33 #include "src/common/tensor_util.h"
34 #include "src/common/file_utils.h"
35 #include "src/common/mmap_utils.h"
36 #include "src/litert/lite_model.h"
37 #include "src/litert/weight_decoder.h"
38 #include "src/litert/runtime_allocator.h"
39 #include "src/litert/kernel_exec_util.h"
40 #include "src/litert/cpu_info.h"
41 #ifndef CUSTOM_KERNEL_REGISTRY_CLIP
42 #include "src/registry/register_kernel_impl.h"
43 #endif
44 #ifdef ENABLE_MINDRT
45 #include "src/litert/mindrt_executor.h"
46 #endif
47 #ifdef SUPPORT_NPU
48 #include "src/litert/delegate/npu/npu_delegate.h"
49 #endif
50 #ifdef GPU_OPENCL
51 #include "src/litert/kernel/opencl/opencl_subgraph.h"
52 #endif
53 #ifdef GPU_TENSORRT
54 #include "src/litert/delegate/tensorrt/tensorrt_delegate.h"
55 #endif
56 #ifdef SUPPORT_NNAPI
57 #include "src/litert/delegate/nnapi/nnapi_delegate.h"
58 #endif
59 #ifdef ENABLE_COREML
60 #include "src/litert/delegate/coreml/coreml_delegate.h"
61 #endif
62 #include "src/litert/runtime_convert.h"
63 #include "extendrt/mindir_loader/model_loader.h"
64 #ifndef __ANDROID__
65 #include "kernel/ascend/plugin/ascend_kernel_plugin.h"
66 #endif
67 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
68 #include "thread/parallel_thread_pool_manager.h"
69 #endif
70 #include "src/litert/runtime_packed_node_pass.h"
71 #ifdef SUPPORT_NNRT
72 #include "src/litert/delegate/nnrt/nnrt_delegate.h"
73 #include "src/litert/delegate/nnrt/nnrt_allocator.h"
74 #endif
75 
76 using AbstractBaseModel = mindspore::infer::AbstractBaseModel;
77 
78 namespace mindspore {
79 #ifdef USE_GLOG
80 extern "C" {
81 extern void mindspore_log_init();
82 }
83 #endif
84 namespace lite {
85 namespace {
ExistCustomCpuKernel()86 bool ExistCustomCpuKernel() {
87 #ifndef CUSTOM_KERNEL_REGISTRY_CLIP
88   const std::string kArchCPU = "CPU";
89   auto custom_kernel_creators = registry::RegistryKernelImpl::GetInstance()->GetCustomKernelCreators();
90   for (const auto &custom_kernel_creator : custom_kernel_creators) {  // <provider, <arch, <type, CreateKernel*>>>
91     if (custom_kernel_creator.second.empty()) {
92       continue;
93     }
94     if (std::any_of(
95           custom_kernel_creator.second.begin(), custom_kernel_creator.second.end(),
96           [kArchCPU](const std::pair<std::string, std::unordered_map<std::string, registry::CreateKernel *>> &pair) {
97             return pair.first == kArchCPU && !pair.second.empty();
98           })) {
99       return true;
100     }
101   }
102 #endif
103   return false;
104 }
105 }  // namespace
106 
LiteSession()107 LiteSession::LiteSession() {
108 #ifdef USE_GLOG
109   mindspore::mindspore_log_init();
110 #endif
111   this->is_running_.store(false);
112 }
113 
CheckTensorValid(lite::Tensor * dst_tensor)114 int LiteSession::CheckTensorValid(lite::Tensor *dst_tensor) {
115   MS_ASSERT(dst_tensor != nullptr);
116   if (dst_tensor->data_type() == kObjectTypeTensorType) {
117     return RET_OK;
118   }
119   if (dst_tensor->IsGraphInput() || dst_tensor->IsGraphOutput()) {
120     return RET_OK;
121   }
122   if (dst_tensor->IsConst() == false && dst_tensor->data() != nullptr) {
123     return RET_ERROR;
124   }
125   return RET_OK;
126 }
127 
ConvertTensorsQuantParam(const schema::Tensor * src_tensor,lite::Tensor * dst_tensor)128 void LiteSession::ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lite::Tensor *dst_tensor) {
129   MS_ASSERT(src_tensor != nullptr);
130   MS_ASSERT(dst_tensor != nullptr);
131   auto quant_params = src_tensor->quantParams();
132   if (quant_params != nullptr) {
133     for (size_t j = 0; j < quant_params->size(); j++) {
134       auto quant_param = quant_params->Get(j);
135       LiteQuantParam quant_arg{};
136       if (quant_param == nullptr) {
137         quant_arg.inited = false;
138       } else {
139         quant_arg.inited = true;
140         quant_arg.bitNum = quant_param->numBits();
141         quant_arg.scale = quant_param->scale();
142         quant_arg.zeroPoint = quant_param->zeroPoint();
143         quant_arg.var_corr = quant_param->varCorr();
144         quant_arg.mean_corr = quant_param->meanCorr();
145         quant_arg.roundType = quant_param->roundType();
146         quant_arg.multiplier = quant_param->multiplier();
147         quant_arg.dstDtype = quant_param->dstDtype();
148         quant_arg.min = quant_param->min();
149         quant_arg.max = quant_param->max();
150       }
151       dst_tensor->AddQuantParam(quant_arg);
152     }
153   }
154   auto quant_clusters = src_tensor->quantClusters();
155   if (quant_clusters != nullptr) {
156     std::vector<float> clusters;
157     for (size_t j = 0; j < quant_clusters->size(); j++) {
158       clusters.push_back(quant_clusters->Get(j));
159     }
160     dst_tensor->set_quant_clusters(clusters);
161   }
162 }
163 
ConvertTensorsData(const lite::LiteModel * model,size_t tensor_index,lite::Tensor * dst_tensor)164 int LiteSession::ConvertTensorsData(const lite::LiteModel *model, size_t tensor_index, lite::Tensor *dst_tensor) {
165   MS_ASSERT(model != nullptr);
166   MS_ASSERT(dst_tensor != nullptr);
167   auto src_tensor = model->GetSchemaTensor(tensor_index);
168   if (src_tensor == nullptr || src_tensor->handler() == nullptr || src_tensor->data() == nullptr ||
169       src_tensor->length() == 0) {
170     MS_LOG(DEBUG) << "No valid data converted.";
171     return RET_OK;
172   }
173 
174   /* tensor list convert */
175   if (dst_tensor->data_type() == kObjectTypeTensorType) {
176     const int *src_data = reinterpret_cast<const int *>(src_tensor->data());
177     return DecodeTensorLsit(dst_tensor, src_data, src_tensor->length());
178   }
179 
180   /* normal tensor check */
181   auto shape_info = dst_tensor->shape();
182   if (shape_info.end() !=
183       std::find_if(shape_info.begin(), shape_info.end(), [](const int shape) { return shape <= 0; })) {
184     MS_LOG(ERROR) << "Invalid shape size, tensor name: " << src_tensor->handler()->name();
185     return RET_ERROR;
186   }
187 
188   int compress_type = src_tensor->handler()->weightQuantCompressType();
189   int ret = RET_NO_CHANGE;
190   if (compress_type != kFSEInfer) {
191     ret = WeightDecoder::DecompressTensor(*src_tensor, dst_tensor);
192   }
193   if (ret == RET_NO_CHANGE) {
194     if (dst_tensor->Size() == 0 || src_tensor->length() < dst_tensor->Size()) {
195       MS_LOG(ERROR) << "Tensor data shape invalid";
196       return RET_ERROR;
197     }
198     auto data_pair = src_tensor->ReleaseData();
199     dst_tensor->set_data(data_pair.second);
200     dst_tensor->set_own_data(data_pair.first);
201   } else if (ret != RET_OK) {
202     MS_LOG(ERROR) << "Decompress tensor data failed: " << ret;
203     return ret;
204   }
205   return RET_OK;
206 }
207 
ConvertTensor(const schema::Tensor & src_tensor)208 lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) {
209   int32_t data_type = src_tensor.dataType();
210   if (data_type <= kTypeUnknown || data_type >= kMonadTypeEnd) {
211     MS_LOG(ERROR) << "invalid data type. " << data_type;
212     return nullptr;
213   }
214   auto src_category = TensorCategory(src_tensor);
215   std::vector<int> shape;
216   if (src_tensor.dims() == nullptr) {
217     MS_LOG(DEBUG) << "Dims of src_tensor is nullptr";
218   }
219   if (src_tensor.dims() != nullptr) {
220     if (src_tensor.dataType() == kObjectTypeString && src_tensor.data() != nullptr) {
221       shape.push_back(src_tensor.data()->size());
222     } else {
223       for (size_t j = 0; j < src_tensor.dims()->size(); j++) {
224         shape.push_back(src_tensor.dims()->data()[j]);
225       }
226     }
227     if (std::any_of(shape.begin(), shape.end(), [](const int &element) { return element < 0 && element != -1; })) {
228       MS_LOG(ERROR) << "Dims of src_tensor is unsupported";
229       return nullptr;
230     }
231   }
232   lite::Tensor *dst_tensor = nullptr;
233   if (TypeId(data_type) == kObjectTypeTensorType) {
234     MS_CHECK_TRUE_RET(src_tensor.data() != nullptr, nullptr);
235     MS_CHECK_TRUE_RET(src_tensor.data()->size() > 0, nullptr);
236     auto src_data = src_tensor.data()->data();
237     dst_tensor = CreateTensorList(shape, src_category, src_data);
238   } else {
239     dst_tensor = new (std::nothrow)
240       Tensor(TypeId(data_type), shape, static_cast<mindspore::Format>(src_tensor.format()), src_category);
241   }
242   if (dst_tensor == nullptr) {
243     MS_LOG(ERROR) << "create dst_tensor is nullptr.";
244     return nullptr;
245   }
246   if (src_tensor.name() != nullptr) {
247     dst_tensor->set_tensor_name(src_tensor.name()->str());
248   }
249   auto compress_type = static_cast<CompressType>(src_tensor.weightQuantCompressType());
250   if (compress_type == kFSEInfer) {
251     dst_tensor->set_compress_type(static_cast<CompressType>(compress_type));
252     dst_tensor->set_compressed_size(src_tensor.data()->size());
253   }
254   return dst_tensor;
255 }
256 
ConvertTensors(const lite::Model * model)257 int LiteSession::ConvertTensors(const lite::Model *model) {
258   MS_ASSERT(model != nullptr);
259   auto lite_model = reinterpret_cast<const lite::LiteModel *>(model);
260   uint32_t tensor_count = model->graph_.all_tensors_.size();
261   auto model_input_indices = model->graph_.input_indices_;
262   auto model_output_indices = model->graph_.output_indices_;
263 
264   for (uint32_t i = 0; i < tensor_count; ++i) {
265     auto *src_tensor = model->graph_.all_tensors_[i];
266     if (src_tensor == nullptr) {
267       MS_LOG(ERROR) << i << "th tensor in model is nullptr";
268       return RET_NULL_PTR;
269     }
270     auto *dst_tensor = ConvertTensor(*src_tensor);
271     if (dst_tensor == nullptr) {
272       MS_LOG(ERROR) << "Convert new " << i << "th tensor failed!";
273       return RET_NULL_PTR;
274     }
275     auto ret = ConvertTensorsData(lite_model, i, dst_tensor);
276     if (ret != RET_OK) {
277       MS_LOG(ERROR) << "Convert data of " << i << "th tensor failed";
278       delete dst_tensor;
279       return ret;
280     }
281     ConvertTensorsQuantParam(src_tensor, dst_tensor);
282     if (IsContain(model_input_indices, i)) {
283       dst_tensor->set_category(Category::GRAPH_INPUT);
284     }
285     if (IsContain(model_output_indices, i)) {
286       // a tensor is as both input and output, would be treated as an input.
287       if (!dst_tensor->IsGraphInput()) {
288         dst_tensor->set_category(Category::GRAPH_OUTPUT);
289       }
290     }
291 
292     ret = CheckTensorValid(dst_tensor);
293     if (ret != RET_OK) {
294       MS_LOG(ERROR) << "Check " << i << "th tensor failed";
295       delete dst_tensor;
296       return ret;
297     }
298 
299     this->tensors_.emplace_back(dst_tensor);
300   }
301   return RET_OK;
302 }
303 
InitGraphInputTensors(const lite::Model * model)304 void LiteSession::InitGraphInputTensors(const lite::Model *model) {
305   MS_ASSERT(model != nullptr);
306   auto graph_in_size = model->graph_.input_indices_.size();
307   for (size_t i = 0; i < graph_in_size; ++i) {
308     auto in_tensor_idx = model->graph_.input_indices_[i];
309     MS_ASSERT(in_tensor_idx < this->tensors_.size());
310     auto *in_tensor = this->tensors_.at(in_tensor_idx);
311     MS_ASSERT(in_tensor != nullptr);
312     this->inputs_.emplace_back(in_tensor);
313   }
314 }
315 
InitGraphInputMSTensors()316 void LiteSession::InitGraphInputMSTensors() {
317   MS_ASSERT(this->input_vec_.empty());
318   for (auto &input_tensor : this->inputs_) {
319     MS_ASSERT(input_tensor != nullptr);
320     this->input_vec_.emplace_back(input_tensor);
321   }
322 }
323 
InitGraphOutputTensors(const lite::Model * model)324 void LiteSession::InitGraphOutputTensors(const lite::Model *model) {
325   MS_ASSERT(model != nullptr);
326   MS_ASSERT(this->outputs_.empty());
327   auto graph_out_size = model->graph_.output_indices_.size();
328   for (size_t i = 0; i < graph_out_size; ++i) {
329     auto out_tensor_idx = model->graph_.output_indices_[i];
330     MS_ASSERT(out_tensor_idx < this->tensors_.size());
331     auto *out_tensor = this->tensors_.at(out_tensor_idx);
332     MS_ASSERT(out_tensor != nullptr);
333     this->outputs_.emplace_back(out_tensor);
334   }
335 }
336 
InitGraphInputMap(const lite::Model * model)337 void LiteSession::InitGraphInputMap(const lite::Model *model) {
338   MS_ASSERT(model != nullptr);
339   MS_ASSERT(this->input_map_.empty());
340   MS_ASSERT(this->input_shape_map_.empty());
341   auto graph_input_node_indexes = GetGraphInputNodes(model);
342   auto graph_in_size = model->graph_.input_indices_.size();
343   for (auto in_node_index : graph_input_node_indexes) {
344     auto in_node = model->graph_.all_nodes_[in_node_index];
345     MS_ASSERT(in_node != nullptr);
346     auto in_size = in_node->input_indices_.size();
347     for (size_t i = 0; i < in_size; ++i) {
348       if (this->input_map_.find(in_node->name_ + std::to_string(i)) != this->input_map_.end()) {
349         MS_LOG(ERROR) << "cant find input " << in_node->name_ + std::to_string(i) << "at input_map_";
350         return;
351       }
352       auto in_tensor_index = size_t(in_node->input_indices_[i]);
353       bool is_graph_input = false;
354       for (size_t j = 0; j < graph_in_size; ++j) {
355         if (in_tensor_index == model->graph_.input_indices_[j]) {
356           is_graph_input = true;
357           break;
358         }
359       }
360       if (!is_graph_input) {
361         continue;
362       }
363       MS_ASSERT(in_tensor_index < this->tensors_.size());
364       auto *in_tensor = this->tensors_.at(in_tensor_index);
365       if (in_tensor == nullptr) {
366         MS_LOG(ERROR) << "in_tensor is null!";
367         return;
368       }
369       auto tensor_name = in_node->name_ + std::to_string(i);
370       this->input_map_[tensor_name] = in_tensor;
371       this->input_shape_map_[in_tensor] = in_tensor->shape();
372       if (!in_tensor->tensor_name().empty()) {
373         this->input_map_[in_tensor->tensor_name()] = in_tensor;
374       }
375     }
376   }
377 
378   for (auto input_tensor : this->inputs_) {
379     MS_ASSERT(input_tensor != nullptr);
380     if (this->input_map_.find(input_tensor->tensor_name()) == this->input_map_.end()) {
381       this->input_map_[input_tensor->tensor_name()] = input_tensor;
382     }
383     if (this->input_shape_map_.find(input_tensor) == this->input_shape_map_.end()) {
384       this->input_shape_map_[input_tensor] = input_tensor->shape();
385     }
386   }
387 }
388 
InitGraphOutputNodeMap(const lite::Model * model)389 void LiteSession::InitGraphOutputNodeMap(const lite::Model *model) {
390   MS_ASSERT(model != nullptr);
391   auto graph_output_node_indexes = GetGraphOutputNodes(model);
392   auto graph_out_size = model->graph_.output_indices_.size();
393   for (auto out_node_index : graph_output_node_indexes) {
394     auto out_node = model->graph_.all_nodes_[out_node_index];
395     MS_ASSERT(out_node != nullptr);
396     auto out_size = out_node->output_indices_.size();
397     for (size_t i = 0; i < out_size; ++i) {
398       auto out_tensor_index = out_node->output_indices_[i];
399       bool is_graph_output = false;
400       for (size_t j = 0; j < graph_out_size; ++j) {
401         if (out_tensor_index == model->graph_.output_indices_[j]) {
402           is_graph_output = true;
403           break;
404         }
405       }
406       if (!is_graph_output) {
407         continue;
408       }
409       MS_ASSERT(out_tensor_index < this->tensors_.size());
410       auto *out_tensor = this->tensors_.at(out_tensor_index);
411       if (out_tensor == nullptr) {
412         MS_LOG(ERROR) << "out_tensor is null!";
413         return;
414       }
415       this->output_node_map_[out_node->name_].emplace_back(out_tensor);
416     }
417   }
418 }
419 
InitGraphOutputTensorMap(const lite::Model * model)420 void LiteSession::InitGraphOutputTensorMap(const lite::Model *model) {
421   MS_ASSERT(model != nullptr);
422   MS_ASSERT(this->output_tensor_map_.empty());
423   auto graph_out_size = model->graph_.output_indices_.size();
424   for (size_t i = 0; i < graph_out_size; ++i) {
425     size_t graph_out_index = model->graph_.output_indices_[i];
426     MS_ASSERT(graph_out_index < this->tensors_.size());
427     auto *out_tensor = this->tensors_.at(graph_out_index);
428     if (out_tensor == nullptr) {
429       MS_LOG(ERROR) << "out_tensor is null!";
430       return;
431     }
432     if (!out_tensor->tensor_name().empty()) {
433       this->output_tensor_map_.insert(std::make_pair(out_tensor->tensor_name(), out_tensor));
434       this->output_tensor_names_.emplace_back(out_tensor->tensor_name());
435     } else {
436       this->output_tensor_map_.insert(std::make_pair(std::to_string(graph_out_index), out_tensor));
437       this->output_tensor_names_.emplace_back(std::to_string(graph_out_index));
438     }
439   }
440 }
441 
InitGraphInOutTensorsMap(const lite::Model * model)442 void LiteSession::InitGraphInOutTensorsMap(const lite::Model *model) {
443   InitGraphInputMSTensors();
444   InitGraphInputMap(model);
445   InitGraphOutputNodeMap(model);
446   InitGraphOutputTensorMap(model);
447 }
448 
IsolateOutputTensor()449 int LiteSession::IsolateOutputTensor() {
450   for (Tensor *src_tensor : outputs_) {
451     if (src_tensor->IsGraphInput()) {
452       continue;
453     }
454     Tensor *new_tensor = new (std::nothrow)
455       Tensor(src_tensor->data_type(), src_tensor->shape(), src_tensor->format(), Category::GRAPH_OUTPUT);
456     if (MS_UNLIKELY(new_tensor == nullptr)) {
457       MS_LOG(ERROR) << "duplicate new output failed.";
458       return RET_NULL_PTR;
459     }
460     new_tensor->set_allocator(src_tensor->allocator()); /* GPU use opencl allocator */
461     new_tensor->set_tensor_name(src_tensor->tensor_name() + "_duplicate");
462     for (LiteQuantParam quant : src_tensor->quant_params()) {
463       new_tensor->AddQuantParam(quant);
464     }
465     new_tensor->set_init_ref_count(src_tensor->init_ref_count());
466 
467     /* src tensor set for graph calculate */
468     if (src_tensor->data_type() == kNumberTypeFloat16) {
469       src_tensor->set_data_type(kNumberTypeFloat32);
470     }
471     src_tensor->set_ref_count(1);
472 
473     isolate_graph_output_map_.insert(std::make_pair(new_tensor, src_tensor));
474 
475     /* set new tensor for calculate */
476     for (auto subgraph : kernels_) {
477       /* subgraph input and output */
478       auto in_size = subgraph->in_tensors().size();
479       for (size_t i = 0; i < in_size; ++i) {
480         if (subgraph->in_tensors()[i] == src_tensor) {
481           subgraph->set_in_tensor(new_tensor, i);
482         }
483       }
484       auto out_size = subgraph->out_tensors().size();
485       for (size_t i = 0; i < out_size; ++i) {
486         if (subgraph->out_tensors()[i] == src_tensor) {
487           subgraph->set_out_tensor(new_tensor, i);
488         }
489       }
490       if (subgraph->desc().arch == kernel::kDelegate) {
491         continue;
492       }
493       /* node input and output */
494       auto nodes = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
495       auto nodes_size = nodes.size();
496       for (size_t i = 0; i < nodes_size; ++i) {
497         auto node = nodes[i];
498         out_size = node->out_tensors().size();
499         for (size_t j = 0; j < out_size; ++j) {
500           if (node->out_tensors()[j] == src_tensor) {
501             node->set_out_tensor(new_tensor, j);
502             break;
503           }
504         }
505         in_size = node->in_tensors().size();
506         for (size_t j = 0; j < in_size; ++j) {
507           if (node->in_tensors()[j] == src_tensor) {
508             node->set_in_tensor(new_tensor, j);
509           }
510         }
511       }
512     }
513   }
514 
515   UpdateLinkInfoForIsolateOutput();
516   return RET_OK;
517 }
518 
UpdateLinkInfoForIsolateOutput()519 void LiteSession::UpdateLinkInfoForIsolateOutput() {
520   for (auto &item : isolate_graph_output_map_) {
521     context_->ReplaceLinkInfoReceiverWithNewOne(item.first, item.second);
522   }
523   return;
524 }
525 
FreePackOpWeight(const std::vector<kernel::KernelExec * > & kernels)526 void LiteSession::FreePackOpWeight(const std::vector<kernel::KernelExec *> &kernels) {
527   // For reducing runtime RAM
528   // free pack-op weight because pack-op will not access origin weight in runtime
529   for (auto *kernel : kernels) {
530     MS_ASSERT(kernel != nullptr);
531     if (kernel->subgraph_type() == kernel::kNotSubGraph) {
532       if (!IsPackedOp(static_cast<int>(kernel::SchemaType(kernel->type())))) {
533         continue;
534       }
535     } else {
536       auto subgraph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
537       FreePackOpWeight(subgraph->nodes());
538     }
539     auto inputs = kernel->in_tensors();
540     for (auto *tensor : inputs) {
541       MS_ASSERT(tensor != nullptr);
542       if (!tensor->IsConst() || tensor->ref_count() >= 1) {
543         continue;
544       }
545       tensor->FreeData();
546     }
547   }
548 }
549 
MarkSharedWeight(const std::vector<kernel::KernelExec * > & kernels)550 void LiteSession::MarkSharedWeight(const std::vector<kernel::KernelExec *> &kernels) {
551   // For reducing runtime RAM
552   // free pack-op weight because pack-op will not access origin weight in runtime
553   for (auto *kernel : kernels) {
554     MS_ASSERT(kernel != nullptr);
555     if (kernel->subgraph_type() == kernel::kNotSubGraph) {
556       if (IsPackedOp(static_cast<int>(kernel::SchemaType(kernel->type())))) {
557         continue;
558       }
559     } else {
560       auto subgraph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
561       MarkSharedWeight(subgraph->nodes());
562     }
563     auto inputs = kernel->in_tensors();
564     for (auto *tensor : inputs) {
565       MS_ASSERT(tensor != nullptr);
566       if (tensor->IsConst()) {
567         tensor->IncRefCount();
568       }
569     }
570   }
571 }
572 
CompileGraph(Model * model)573 int LiteSession::CompileGraph(Model *model) {
574   auto ret = PreCheck(model);
575   if (ret != RET_OK) {
576     MS_LOG(ERROR) << "schedule check failed: " << ret;
577     is_running_.store(false);
578     return ret;
579   }
580 
581   if (model->model_type_ != ModelType_MSLite) {
582     ret = reinterpret_cast<AbstractBaseModel *>(model)->ConvertTensors(&this->tensors_);
583   } else {
584     // Convert to abstract base model interface
585     ret = ConvertTensors(model);
586     context_->set_schema_version(reinterpret_cast<LiteModel *>(model)->GetSchemaVersion());
587   }
588   if (ret != RET_OK) {
589     MS_LOG(ERROR) << "ConvertTensors failed: " << ret;
590     is_running_.store(false);
591     return ret;
592   }
593   ret = lite::PackWeightManager::GetInstance()->StoreOriginTensorData(model, &tensors_);
594   if (ret != RET_OK) {
595     MS_LOG(ERROR) << "StoreOriginTensorData failed.";
596     is_running_.store(false);
597     return RET_ERROR;
598   }
599   InitGraphInputTensors(model);
600   InitGraphOutputTensors(model);
601 
602   PackedNodePass::GetInstance().Run(model, tensors_);
603 
604   // scheduler kernels
605   Scheduler scheduler(context_.get(), ms_context_, model, &tensors_, &inputs_, &outputs_, is_train_session_,
606                       &is_infershape_, &is_control_flow_, &infer_along_running_, execution_plan_, delegate_,
607                       delegate_device_type_);
608   scheduler.SetupSchedulerCb(std::move(sched_cb_));
609   scheduler.SetConfig(config_info_);
610   ret = scheduler.Schedule(&kernels_);
611   if (ret != RET_OK) {
612     MS_LOG(ERROR) << "Schedule kernels failed: " << ret;
613     is_running_.store(false);
614     return ret;
615   }
616   if (ms_context_->GetThreadNum() == 1 && !context_->IsCpuFloat16Enabled() && is_control_flow_) {
617     context_->DeleteThreadPool();
618     (void)context_->CreateThreadPool(is_control_flow_);
619   }
620 
621   infer_along_running_ = infer_along_running_ && !is_control_flow_ && !is_train_session_ && (is_infershape_ != RET_OK);
622   InitGraphInOutTensorsMap(model);
623 
624   non_tail_call_kernels_ = scheduler.NonTailCallNodes();
625 
626   ret = PrepareKernels(model);
627   if (ret != RET_OK) {
628     MS_LOG(ERROR) << "Prepare kernels failed: " << ret;
629     is_running_.store(false);
630     return ret;
631   }
632 
633   if (is_train_session_ || is_prepare_session_) {
634     is_running_.store(false);
635     return RET_OK;
636   }
637 
638   ret = InitExecutor();
639   if (ret != RET_OK) {
640     MS_LOG(ERROR) << "InitExecutor failed: " << ret;
641     is_running_.store(false);
642     return ret;
643   }
644 
645   MarkSharedWeight(kernels_);
646   FreePackOpWeight(kernels_);
647 
648   infer_along_running_ = infer_along_running_ && (runtime_allocator_ == nullptr);
649   if (infer_along_running_) {
650     this->context_->set_infer_checker(InferCheckerAll);
651   }
652   is_running_.store(false);
653   return RET_OK;
654 }
655 
IsIsolatedSubGraph(const kernel::KernelExec * kernel)656 bool LiteSession::IsIsolatedSubGraph(const kernel::KernelExec *kernel) {
657   auto cur_in_tensors = kernel->in_tensors();
658   for (auto cur_kernel : this->kernels_) {
659     if (cur_kernel == kernel) {
660       continue;
661     }
662     auto out_tensors = cur_kernel->out_tensors();
663     for (auto tensor : cur_in_tensors) {
664       if (IsContain(out_tensors, tensor)) {
665         return false;
666       }
667     }
668   }
669   return true;
670 }
671 
SetAllocatorForDelegateKernels(const kernel::KernelExec * kernel)672 int LiteSession::SetAllocatorForDelegateKernels(const kernel::KernelExec *kernel) {
673   if (kernel == nullptr) {
674     return RET_NULL_PTR;
675   }
676   for (auto input : kernel->in_tensors()) {
677     CHECK_NULL_RETURN(input);
678     input->set_allocator(this->context_->allocator);
679   }
680   for (auto output : kernel->out_tensors()) {
681     CHECK_NULL_RETURN(output);
682     output->set_allocator(this->context_->allocator);
683   }
684   return RET_OK;
685 }
686 
CreateNNRTDelegate()687 int LiteSession::CreateNNRTDelegate() {
688 #if SUPPORT_NNRT
689   auto iter = std::find_if(context_->device_list_.begin(), context_->device_list_.end(),
690                            [](DeviceContext &device) { return device.device_type_ == lite::DT_NNRT; });
691   if (iter == context_->device_list_.end()) {
692     MS_LOG(ERROR) << "Found non NNRT device info";
693     return RET_ERROR;
694   }
695 
696   delegate_ = std::make_shared<NNRTDelegate>(iter->device_info_.nnrt_device_info_);
697   if (delegate_ == nullptr) {
698     MS_LOG(ERROR) << "New NNRT delegate failed";
699     return RET_ERROR;
700   }
701   delegate_device_type_ = DT_NNRT;
702   this->context_->delegate = delegate_;
703 #endif
704   return RET_OK;
705 };
706 
DrawGraph(kernel::SubGraphKernel * graph)707 int LiteSession::DrawGraph(kernel::SubGraphKernel *graph) {
708   if (graph == nullptr) {
709     return RET_NULL_PTR;
710   }
711   // create and open .dot file
712   std::ofstream dotfile;
713   dotfile.open("./graph.dot", std::ios::out | std::ios::trunc);
714   if (!dotfile.is_open()) {
715     MS_LOG(ERROR) << "create or open dotfile failed.";
716     return RET_ERROR;
717   }
718   // write data to .dot file
719   dotfile << "digraph " << graph->name() << " {\n";
720   for (auto node : graph->nodes()) {
721     std::replace(node->name().begin(), node->name().end(), '/', '-');
722     // first node
723     if (node->in_kernels().empty()) {
724       dotfile << "\tinput->" << node->name();
725       dotfile << "[label=\"";
726       std::vector<int> input_shapes = node->in_tensors().front()->shape();
727       for (auto iter = input_shapes.begin(); iter != input_shapes.end(); iter++) {
728         if (iter == input_shapes.end() - 1) {
729           dotfile << *iter;
730         } else {
731           dotfile << *iter << "*";
732         }
733       }
734       dotfile << "\"]\n";
735       continue;
736     }
737 
738     for (size_t i = 0; i < node->in_kernels().size(); ++i) {
739       dotfile << "\t" << node->in_kernels()[i]->name() << "->" << node->name() << "[label=\"";
740       std::vector<int32_t> in_kernel_shapes = node->in_tensors()[i]->shape();
741 
742       for (auto iter = in_kernel_shapes.begin(); iter != in_kernel_shapes.end(); iter++) {
743         if (iter == in_kernel_shapes.end() - 1) {
744           dotfile << *iter;
745         } else {
746           dotfile << *iter << "*";
747         }
748       }
749       dotfile << "\"]\n";
750     }
751     // last node
752     if (node->out_kernels().empty()) {
753       dotfile << "\t" << node->name() << "->output";
754       dotfile << "[label=\"";
755       std::vector<int32_t> out_shapes = node->out_tensors().front()->shape();
756       for (auto iter = out_shapes.begin(); iter != out_shapes.end(); iter++) {
757         if (iter == out_shapes.end() - 1) {
758           dotfile << *iter;
759         } else {
760           dotfile << *iter << "*";
761         }
762       }
763       dotfile << "\"]\n";
764     }
765   }
766   dotfile.close();
767   return RET_OK;
768 }
769 
SetInitRefCountOfPartialSubgraphInputs(const Model * model)770 void LiteSession::SetInitRefCountOfPartialSubgraphInputs(const Model *model) {
771   if (model == nullptr) {
772     return;
773   }
774   constexpr size_t kFirstPartialSubgraphIndex = 1U;
775   const auto &sub_graphs = model->graph_.sub_graphs_;
776   // Find out partial subgraph's inputs and set their 'init_ref_count' to INT_MAX to avoid trigger 'FreeData()'.
777   // Here start with index:1 to skip main subgraph.
778   for (size_t i = kFirstPartialSubgraphIndex; i < sub_graphs.size(); i++) {
779     for (auto index : sub_graphs[i]->input_indices_) {
780       tensors_[index]->set_init_ref_count(INT_MAX);
781     }
782   }
783 }
784 
PrepareKernels(const Model * model)785 int LiteSession::PrepareKernels(const Model *model) {
786   // find kernel's in_kernels and out_kernels in every subgraph
787   kernel::KernelExecUtil::FindAllInoutKernelsInSubgraphKernel(this->kernels_);
788   // find in_kernels and out_kernels between subgraph kernels
789   kernel::KernelExecUtil::FindAllInoutKernels(this->kernels_);
790 
791   // init init_ref_count for subgraphs and kernels
792   auto ret = SetTensorInitRefCount();
793   if (ret != RET_OK) {
794     MS_LOG(ERROR) << "SetTensorInitRefCount failed.";
795     return ret;
796   }
797   // When running control flow model, if partial subgraph's input is also it's output,
798   // 'init_ref_count' is not correctly initialized in 'SetTensorInitRefCount()', which would cause an error
799   // of referencing on input tensor's data_ptr after it's reset to NULL when ref_count down to 0.
800   // Here we set partial input tensor's 'init_ref_count' to INT_MAX to avoid null-filling in above case.
801   SetInitRefCountOfPartialSubgraphInputs(model);
802 
803   for (auto kernel : this->kernels_) {
804     if (kernel->desc().arch == kernel::kDelegate) {
805       ret = SetAllocatorForDelegateKernels(kernel);
806       if (ret != RET_OK) {
807         MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret;
808         return ret;
809       }
810     }
811 
812     if (!is_train_session_ && kernel->desc().arch != kernel::kDelegate && kernel->desc().arch != kernel::kGPU) {
813       auto subgraph_kernel = static_cast<kernel::SubGraphKernel *>(kernel);
814       if (subgraph_kernel == nullptr) {
815         MS_LOG(ERROR) << "kernel: " << kernel->name() << " not is subgraph kernel.";
816         return RET_ERROR;
817       }
818       for (auto &node : subgraph_kernel->nodes()) {
819         ret = PackKernelExec(node, tensors_);
820         if (ret != RET_OK) {
821           MS_LOG(ERROR) << "Pack KernelExec failed.";
822           return ret;
823         }
824         ret = node->Prepare();
825         if (ret != RET_OK) {
826           MS_LOG(ERROR) << "node: " << node->name() << " prepare failed.";
827           return ret;
828         }
829       }
830     }
831 
832 #if (defined DEBUG) && (defined MSLITE_EXPORT_COMPUTE_IR)
833     auto subgraph_kernel = static_cast<kernel::SubGraphKernel *>(kernel);
834     ret = DrawGraph(subgraph_kernel);
835     if (ret != RET_OK) {
836       MS_LOG(ERROR) << "graph: " << kernel->name() << " draw failed.";
837     }
838 #endif
839 
840     ret = kernel->Prepare();
841     if (ret != RET_OK) {
842       MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret;
843       return ret;
844     }
845   }
846   return RET_OK;
847 }
848 
SetTensorInitRefCount()849 int LiteSession::SetTensorInitRefCount() {
850   for (auto *kernel : this->kernels_) {
851     kernel->InitOutTensorInitRefCount();
852     if (kernel->desc().arch == kernel::kDelegate) {
853       continue;
854     }
855     if (IsIsolatedSubGraph(kernel)) {
856       static_cast<kernel::SubGraphKernel *>(kernel)->InitInputTensorInitRefCount();
857     }
858   }
859 
860   if (!non_tail_call_kernels_.empty()) {
861     return SetNonTaiCallSubgraphOutputInitRefCount();
862   }
863   return RET_OK;
864 }
865 
SetNonTaiCallSubgraphOutputInitRefCount()866 int LiteSession::SetNonTaiCallSubgraphOutputInitRefCount() {
867   for (auto call_kernel : non_tail_call_kernels_) {
868     auto call_output = call_kernel->out_tensors();
869     auto all_out_subgraphs = kernel::KernelExecUtil::GetCallInputPartialsCorrespondingOutputSubgraph(call_kernel);
870     for (auto subgraph : all_out_subgraphs) {
871       MS_CHECK_TRUE_MSG(subgraph->out_tensors().size() == call_output.size(), RET_ERROR,
872                         "non tail call output size is not same as subgraph output.");
873       std::set<Tensor *> subgraph_outputs_set{};
874       for (size_t i = 0; i < subgraph->out_tensors().size(); ++i) {
875         auto output = subgraph->out_tensors()[i];
876         if (subgraph_outputs_set.find(output) == subgraph_outputs_set.end()) {
877           output->set_init_ref_count(1);
878           (void)subgraph_outputs_set.insert(output);
879         } else {
880           output->set_init_ref_count(output->init_ref_count() + 1);
881         }
882       }
883     }
884   }
885   return RET_OK;
886 }
887 
GetInputs() const888 std::vector<mindspore::lite::Tensor *> LiteSession::GetInputs() const { return this->input_vec_; }
889 
RunGraph(const KernelCallBack & before,const KernelCallBack & after)890 int LiteSession::RunGraph(const KernelCallBack &before, const KernelCallBack &after) {
891   bool expected = false;
892   if (!is_running_.compare_exchange_strong(expected, true)) {
893     MS_LOG(ERROR) << "Not support multi-threading";
894     return RET_ERROR;
895   }
896 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
897   ParallelThreadPoolManager::GetInstance()->ActivatePool(runner_id_, worker_id_);
898 #endif
899   STATUS ret = CheckTensorsInvalid(inputs_);
900   if (MS_UNLIKELY(ret != RET_OK)) {
901     is_running_.store(false);
902     MS_LOG(ERROR) << "CheckInputs failed.";
903     return ret;
904   }
905   ret = CheckGraphInputShapes(inputs_, input_shape_map_);
906   if (MS_UNLIKELY(ret != RET_OK)) {
907     is_running_.store(false);
908     MS_LOG(ERROR) << "Check graph input shapes failed.";
909     return ret;
910   }
911   MS_ASSERT(this->context_ != nullptr);
912   ret = executor_->Run(this->inputs_, this->outputs_, this->kernels_, before, after);
913   if (MS_UNLIKELY(ret != RET_OK)) {
914     MS_LOG(ERROR) << "RunGraph failed : " << ret;
915   }
916   if (infer_along_running_) {
917     this->context_->set_infer_checker(InferCheckerInput);
918     for (auto input : inputs_) {
919       input->set_shape_changed(false);
920     }
921   }
922 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
923   ParallelThreadPoolManager::GetInstance()->SetFreePool(runner_id_, worker_id_);
924 #endif
925   is_running_.store(false);
926   return ret;
927 }
928 
InitSharedThreadPool()929 int LiteSession::InitSharedThreadPool() {
930   int workers_num = -1;
931   int remaining_thread_num = -1;
932   int thread_num_limit = -1;
933   bool enable_shared_pool = false;
934   if (config_info_ != nullptr) {
935     auto runner_info_item = config_info_->find(kInnerModelParallelRunnerSection);
936     if (runner_info_item != config_info_->end()) {
937       auto item_runner = runner_info_item->second.find(kInnerRunnerIDKey);
938       if (item_runner != runner_info_item->second.end()) {
939         runner_id_ = runner_info_item->second.at(kInnerRunnerIDKey);
940       }
941       auto shared_pool_item = runner_info_item->second.find(kEnableSharedThreadPoolKey);
942       if (shared_pool_item != runner_info_item->second.end() &&
943           runner_info_item->second.at(kEnableSharedThreadPoolKey) == "true") {
944         workers_num = std::atoi(runner_info_item->second.at(kInnerWorkerNumKey).c_str());
945         remaining_thread_num = std::atoi(runner_info_item->second.at(kThreadNumRemainingPerWorkerKey).c_str());
946         thread_num_limit = std::atoi(runner_info_item->second.at(kThreadNumLimitPerWorkerKey).c_str());
947         worker_id_ = std::atoi(runner_info_item->second.at(kInnerModelIDKey).c_str());
948         enable_shared_pool = true;
949       }
950     }
951   }
952   MS_LOG(INFO) << "runner id: " << runner_id_ << "  enable_shared_pool: " << enable_shared_pool
953                << "  workers_num: " << workers_num << "  thread_num_limit: " << thread_num_limit
954                << "  remaining_thread_num: " << remaining_thread_num;
955 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
956   ParallelThreadPoolManager::GetInstance()->Init(enable_shared_pool, runner_id_, workers_num, remaining_thread_num,
957                                                  thread_num_limit);
958 #endif
959   return RET_OK;
960 }
961 
InitContext(const std::shared_ptr<InnerContext> & context)962 int LiteSession::InitContext(const std::shared_ptr<InnerContext> &context) {
963   if (context == nullptr) {
964     MS_LOG(ERROR) << "context is nullptr";
965     return RET_NULL_PTR;
966   }
967   this->context_ = context;
968   context_->SetBindRunnerId(runner_id_);
969   auto ret = this->context_->Init();
970   if (ret != RET_OK) {
971     MS_LOG(ERROR) << "Init Context failed";
972     return ret;
973   }
974 
975   ms_context_ = MSContextFromContext(context);
976   if (ms_context_ == nullptr) {
977     MS_LOG(ERROR) << "transfer context to ms context failed.";
978     return RET_NULL_PTR;
979   }
980 
981 #ifdef MS_COMPILE_IOS
982   context_->thread_pool_->SetMaxSpinCount(kDefaulLiteIosSpinCount);
983   context_->thread_pool_->SetMinSpinCount(kDefaulLiteIosSpinCount);
984 #endif
985 
986 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
987   if (context_->inter_op_parallel_num_ > 1 && !runner_id_.empty() &&
988       ParallelThreadPoolManager::GetInstance()->GetEnableSharedThreadPool(runner_id_)) {
989     MS_LOG(INFO) << "Enable subgraph parallelism and enable thread pool sharing";
990     ParallelThreadPoolManager::GetInstance()->BindPoolToRunner(context_->thread_pool_, config_info_);
991   }
992 #endif
993 
994   return RET_OK;
995 }
996 
InitAscend(const std::shared_ptr<InnerContext> & context)997 int LiteSession::InitAscend(const std::shared_ptr<InnerContext> &context) {
998 #if !defined(__ANDROID__) && !defined(MS_COMPILE_OHOS)
999   if (!context->IsDeviceTypeEnabled(DT_ASCEND)) {
1000     MS_LOG(INFO) << "There is no Ascend device type.";
1001     return RET_OK;
1002   }
1003   return mindspore::AscendKernelPlugin::GetInstance().Register();
1004 #else
1005   return RET_OK;
1006 #endif
1007 }
1008 
CreateTensorRTDelegate()1009 int LiteSession::CreateTensorRTDelegate() {
1010 #ifdef GPU_TENSORRT
1011   std::string cache_model_path;
1012   std::string serialize_path;
1013   size_t vocab_size = 0;
1014   size_t device_cache_size = 0;
1015   std::map<std::string, std::string> input_ranges;
1016   if (config_info_ != nullptr) {
1017     auto input_ranges_iter = config_info_->find(kGPUContextSection);
1018     if (input_ranges_iter != config_info_->end()) {
1019       input_ranges = input_ranges_iter->second;
1020     }
1021     auto ms_cache_iter = config_info_->find(kMSCacheSection);
1022     if (ms_cache_iter != config_info_->end()) {
1023       auto ms_cache = ms_cache_iter->second;
1024       auto model_path_iter = ms_cache.find(kMSCacheModelPathKey);
1025       if (model_path_iter != ms_cache.end()) {
1026         cache_model_path = model_path_iter->second;
1027       }
1028 
1029       auto vocab_size_iter = ms_cache.find(kMSCacheVocabSizeKey);
1030       if (vocab_size_iter != ms_cache.end()) {
1031         auto vocab_size_opt = GenericParseValue<size_t>(vocab_size_iter->second);
1032         if (!vocab_size_opt.IsNone()) {
1033           vocab_size = vocab_size_opt.Get();
1034         }
1035       }
1036 
1037       auto device_cache_size_iter = ms_cache.find(kMSCacheDeviceSizeKey);
1038       if (device_cache_size_iter != ms_cache.end()) {
1039         auto device_cache_size_opt = GenericParseValue<size_t>(device_cache_size_iter->second);
1040         if (!device_cache_size_opt.IsNone()) {
1041           device_cache_size = device_cache_size_opt.Get();
1042         }
1043       }
1044 
1045       auto serialize_path_iter = ms_cache.find(kMSCacheSerializePathKey);
1046       if (serialize_path_iter != ms_cache.end()) {
1047         serialize_path = serialize_path_iter->second;
1048       }
1049     }
1050   }
1051 
1052   delegate_ = std::make_shared<TensorRTDelegate>(ms_context_, cache_model_path, vocab_size, device_cache_size,
1053                                                  serialize_path, input_ranges);
1054   if (delegate_ == nullptr) {
1055     MS_LOG(ERROR) << "New tensorrt delegate_ failed";
1056     return RET_ERROR;
1057   }
1058   delegate_device_type_ = DT_GPU;
1059   this->context_->delegate = delegate_;
1060 #endif
1061   return RET_OK;
1062 }
1063 
CreateNPUDelegate()1064 int LiteSession::CreateNPUDelegate() {
1065 #ifdef SUPPORT_NPU
1066   std::string model_cache_dir;
1067   if (config_info_ != nullptr) {
1068     auto common_context_iter = config_info_->find(kCommonContextSection);
1069     if (common_context_iter != config_info_->end()) {
1070       auto common_context = common_context_iter->second;
1071       auto model_cache_dir_iter = common_context.find(kGraphCompilerCacheDirKey);
1072       if (model_cache_dir_iter != common_context.end()) {
1073         model_cache_dir = model_cache_dir_iter->second;
1074       }
1075     }
1076   }
1077   delegate_ = std::make_shared<NPUDelegate>(context_->GetDeviceInfo(DT_NPU).npu_device_info_, model_cache_dir);
1078   if (delegate_ == nullptr) {
1079     MS_LOG(ERROR) << "New delegate_ failed";
1080     return RET_ERROR;
1081   }
1082   delegate_device_type_ = DT_NPU;
1083   this->context_->delegate = delegate_;
1084 #endif
1085   return RET_OK;
1086 }
1087 
CreateNNAPIDelegate()1088 int LiteSession::CreateNNAPIDelegate() {
1089 #ifdef SUPPORT_NNAPI
1090   bool enable_fp16 =
1091     context_->IsCpuFloat16Enabled() || context_->IsGpuFloat16Enabled() || context_->IsNpuFloat16Enabled();
1092   bool only_acc_device = !context_->IsDeviceTypeEnabled(DT_CPU) && !context_->IsDeviceTypeEnabled(DT_GPU) &&
1093                          context_->IsDeviceTypeEnabled(DT_NPU);
1094   bool disable_cpu = !context_->IsDeviceTypeEnabled(DT_CPU);
1095   auto providers = context_->GetProviders();
1096   std::vector<std::string> specified_devices(providers.begin(), providers.end());
1097   delegate_ = std::make_shared<NNAPIDelegate>(enable_fp16, only_acc_device, disable_cpu, specified_devices);
1098   if (delegate_ == nullptr) {
1099     MS_LOG(ERROR) << "New delegate_ failed";
1100     return RET_ERROR;
1101   }
1102   this->context_->delegate = delegate_;
1103 #endif
1104   return RET_OK;
1105 }
1106 
CreateCoreMLDelegate()1107 int LiteSession::CreateCoreMLDelegate() {
1108 #ifdef ENABLE_COREML
1109   delegate_ = std::make_shared<CoreMLDelegate>();
1110   if (delegate_ == nullptr) {
1111     MS_LOG(ERROR) << "New delegate_ failed";
1112     return RET_ERROR;
1113   }
1114   this->context_->delegate = delegate_;
1115 #endif
1116   return RET_OK;
1117 }
1118 
InitDelegate()1119 int LiteSession::InitDelegate() {
1120 #ifndef DELEGATE_CLIP
1121   int ret = RET_OK;
1122   if (context_->delegate != nullptr) {
1123     delegate_ = context_->delegate;
1124     delegate_device_type_ = -1;
1125   } else if (context_->delegate_mode_ != kNoDelegate) {
1126     switch (context_->delegate_mode_) {
1127       case kNNAPI:
1128         ret = CreateNNAPIDelegate();
1129         break;
1130       case kCoreML:
1131         ret = CreateCoreMLDelegate();
1132         break;
1133       default:
1134         MS_LOG(ERROR) << "Unsupported built-in delegate mode: " << context_->delegate_mode_;
1135         return RET_ERROR;
1136     }
1137   } else {
1138     if (context_->IsDeviceTypeEnabled(DT_NPU)) {
1139       ret = CreateNPUDelegate();
1140     } else if (context_->IsDeviceTypeEnabled(DT_GPU)) {
1141       ret = CreateTensorRTDelegate();
1142     } else if (context_->IsDeviceTypeEnabled(DT_NNRT)) {
1143       ret = CreateNNRTDelegate();
1144     }
1145   }
1146 
1147   if (ret != RET_OK) {
1148     return ret;
1149   }
1150   if (delegate_ != nullptr) {
1151     auto delegate_ret = delegate_->Init();
1152     if (delegate_ret == mindspore::kLiteNotSupport) {
1153       MS_LOG(DEBUG) << "Delegate is unsupported";
1154       delegate_.reset();
1155       delegate_ = nullptr;
1156     } else if (delegate_ret == mindspore::kSuccess) {
1157       MS_LOG(INFO) << "Delegate init successfully";
1158     } else {
1159       MS_LOG(ERROR) << "Delegate init failed";
1160       return RET_ERROR;
1161     }
1162   }
1163 #endif
1164   return RET_OK;
1165 }
1166 
Init(const std::shared_ptr<InnerContext> & context)1167 int LiteSession::Init(const std::shared_ptr<InnerContext> &context) {
1168   bool expected = false;
1169   if (!is_running_.compare_exchange_strong(expected, true)) {
1170     MS_LOG(ERROR) << "Not support multi-threading";
1171     return RET_ERROR;
1172   }
1173 
1174   if (!PlatformInstructionSetSupportCheck()) {
1175     MS_LOG(ERROR) << "Device not support isa";
1176     is_running_.store(false);
1177     return RET_NOT_SUPPORT;
1178   }
1179 
1180   auto status = InitSharedThreadPool();
1181   if (status != RET_OK) {
1182     MS_LOG(ERROR) << "init Shared thread pool failed";
1183     is_running_.store(false);
1184     return status;
1185   }
1186   auto ret = InitContext(context);
1187   if (ret != RET_OK) {
1188     MS_LOG(ERROR) << "Init Context failed";
1189     is_running_.store(false);
1190     return ret;
1191   }
1192 
1193   ret = InitAscend(context);
1194   if (ret != RET_OK) {
1195     MS_LOG(ERROR) << "Open Ascend kernel plugin failed";
1196     is_running_.store(false);
1197     return ret;
1198   }
1199 
1200   ret = InitDelegate();
1201   if (ret != RET_OK) {
1202     MS_LOG(ERROR) << "Init delegate failed.";
1203     is_running_.store(false);
1204     return ret;
1205   }
1206 
1207   ret = InitGPURuntime();
1208   if (ret != RET_OK) {
1209     MS_LOG(ERROR) << "Init GPU runtime failed.";
1210     is_running_.store(false);
1211     return ret;
1212   }
1213 
1214   is_running_.store(false);
1215   return RET_OK;
1216 }
1217 
BindThread(bool if_bind)1218 void LiteSession::BindThread(bool if_bind) {
1219   // Abandoned code
1220   // Bind thread in executor
1221   return;
1222 }
1223 
~LiteSession()1224 LiteSession::~LiteSession() {
1225   delegate_.reset();
1226   bool expected = false;
1227   if (!is_running_.compare_exchange_strong(expected, true)) {
1228     MS_LOG(ERROR) << "Not support multi-threading";
1229     return;
1230   }
1231   for (auto *kernel : kernels_) {
1232     delete kernel;
1233     kernel = nullptr;
1234   }
1235   for (auto tensor : tensors_) {
1236     if (tensor == nullptr) {
1237       continue;
1238     }
1239     // Data of const tensor which doesn't own data will not freed.
1240     // Such as const data from meta_graph which will be freed when freeing meta_graph.
1241     if (tensor->IsConst() && !tensor->own_data()) {
1242       tensor->set_data(nullptr);
1243     }
1244 
1245     /* situation : user set graph-output-tensor data */
1246     if (tensor->IsGraphOutput() && tensor->allocator() == nullptr) {
1247       tensor->set_data(nullptr);
1248     }
1249     delete tensor;
1250     tensor = nullptr;
1251   }
1252 
1253   for (auto item : isolate_graph_output_map_) {
1254     auto isolate_output_tensor = item.first;
1255     isolate_output_tensor->set_data(nullptr);
1256     delete isolate_output_tensor;
1257     isolate_output_tensor = nullptr;
1258   }
1259 
1260   for (auto map : isolate_input_map_) {
1261     auto isolate_input_tensor = map.first;
1262     isolate_input_tensor->set_data(nullptr);
1263     delete isolate_input_tensor;
1264   }
1265 
1266   // Tensor * in input_map output_map are freed in tensors
1267   input_map_.clear();
1268   input_shape_map_.clear();
1269   output_node_map_.clear();
1270   output_tensor_map_.clear();
1271   input_vec_.clear();
1272   isolate_graph_output_map_.clear();
1273 
1274   delete this->executor_;
1275   this->executor_ = nullptr;
1276 #ifdef GPU_OPENCL
1277   delete opencl_runtime_wrapper_;
1278   opencl_runtime_wrapper_ = nullptr;
1279 #endif
1280   delete ms_context_;
1281   ms_context_ = nullptr;
1282 #if defined(PARALLEL_INFERENCE) && defined(ENABLE_MINDRT)
1283   ParallelThreadPoolManager::GetInstance()->ResetParallelThreadPoolManager(runner_id_);
1284 #endif
1285   lite::PackWeightManager::GetInstance()->FreePackWeight(runner_id_, model_id_);
1286   if (model_ != nullptr && is_shared_weight_) {
1287     model_->buf = nullptr;
1288   }
1289   delete (model_);
1290   model_ = nullptr;
1291 #ifdef SUPPORT_NNRT
1292   NNRTAllocator::GetInstance()->ClearFreeList();
1293 #endif
1294   is_running_.store(false);
1295 }
1296 
GetInputsByTensorName(const std::string & name) const1297 mindspore::lite::Tensor *LiteSession::GetInputsByTensorName(const std::string &name) const {
1298   auto ret = input_map_.find(name);
1299   if (ret == input_map_.end()) {
1300     MS_LOG(WARNING) << "Tensor  " << name << " is not exist";
1301     return nullptr;
1302   }
1303   return ret->second;
1304 }
1305 
GetOutputsByNodeName(const std::string & node_name) const1306 std::vector<mindspore::lite::Tensor *> LiteSession::GetOutputsByNodeName(const std::string &node_name) const {
1307   auto ret = output_node_map_.find(node_name);
1308   if (ret == output_node_map_.end()) {
1309     MS_LOG(WARNING) << "Node  " << node_name << " is not an output node";
1310     std::vector<mindspore::lite::Tensor *> empty_ret;
1311     return empty_ret;
1312   }
1313   return ret->second;
1314 }
1315 
GetOutputTensorNames() const1316 std::vector<std::string> LiteSession::GetOutputTensorNames() const { return this->output_tensor_names_; }
1317 
GetOutputByTensorName(const std::string & tensor_name) const1318 mindspore::lite::Tensor *LiteSession::GetOutputByTensorName(const std::string &tensor_name) const {
1319   auto ret = output_tensor_map_.find(tensor_name);
1320   if (ret == output_tensor_map_.end()) {
1321     MS_LOG(WARNING) << "Tensor  " << tensor_name << " is not an output node";
1322     return nullptr;
1323   }
1324   return ret->second;
1325 }
1326 
GetOutputs() const1327 std::unordered_map<std::string, mindspore::lite::Tensor *> LiteSession::GetOutputs() const {
1328   return this->output_tensor_map_;
1329 }
1330 
UpdateInputShapeMap()1331 int LiteSession::UpdateInputShapeMap() {
1332   for (auto input : inputs_) {
1333     MS_CHECK_TRUE_MSG(input != nullptr, RET_ERROR, "graph input tensor is nullptr.");
1334     if (input_shape_map_.find(input) != input_shape_map_.end()) {
1335       input_shape_map_.at(input) = input->shape();
1336     } else {
1337       MS_LOG(ERROR) << "can't find " << input->tensor_name() << " in input_shape_map";
1338       return RET_ERROR;
1339     }
1340   }
1341   return RET_OK;
1342 }
1343 
ResizeInputs(const std::vector<mindspore::lite::Tensor * > & inputs,const std::vector<std::vector<int>> & dims)1344 int LiteSession::ResizeInputs(const std::vector<mindspore::lite::Tensor *> &inputs,
1345                               const std::vector<std::vector<int>> &dims) {
1346   if (inputs.size() != inputs_.size()) {
1347     MS_LOG(ERROR) << "Inputs size " << inputs.size() << " is not equal to " << inputs_.size();
1348     return RET_PARAM_INVALID;
1349   }
1350 
1351   if (dims.size() != inputs.size()) {
1352     MS_LOG(ERROR) << "Input dims size " << dims.size() << " is not equal to the inputs size " << inputs.size();
1353     return RET_PARAM_INVALID;
1354   }
1355 
1356   for (size_t i = 0; i < inputs.size(); ++i) {
1357     if (inputs[i] != inputs_[i]) {
1358       MS_LOG(ERROR) << "Input[" << i << "] tensor is not equal to the inputs have been saved!";
1359       return RET_PARAM_INVALID;
1360     }
1361     inputs_[i]->FreeData();
1362     if (infer_along_running_ && !inputs_[i]->get_shape_changed()) {
1363       inputs_[i]->set_shape_changed(dims[i] != inputs_[i]->shape());
1364     }
1365     inputs_[i]->set_shape(dims[i]);
1366   }
1367   if (!is_train_session_) {
1368     executor_->Resize(inputs, dims);
1369   }
1370   return RET_OK;
1371 }
1372 
ResetInputsShape(const std::vector<std::vector<int>> & dims)1373 void LiteSession::ResetInputsShape(const std::vector<std::vector<int>> &dims) {
1374   for (size_t i = 0; i < inputs_.size(); ++i) {
1375     inputs_[i]->FreeData();
1376     inputs_[i]->set_shape(dims[i]);
1377     inputs_[i]->set_shape_changed(false);
1378   }
1379 }
1380 
ReSizeKernels(const std::vector<kernel::KernelExec * > & kernels,const std::unordered_map<Tensor *,Tensor * > & isolate_input_map)1381 int LiteSession::ReSizeKernels(const std::vector<kernel::KernelExec *> &kernels,
1382                                const std::unordered_map<Tensor *, Tensor *> &isolate_input_map) {
1383   for (auto kernel : kernels) {
1384     if (kernel == nullptr) {
1385       MS_LOG(ERROR) << "input kernel is nullptr!";
1386       return RET_ERROR;
1387     }
1388     auto ret = RET_OK;
1389     if (kernel->desc().arch == kernel::kDelegate) {
1390       ret = kernel->ReSize();
1391     } else {
1392       // resize subgraph inputs
1393       auto sub_graph_kernel = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
1394       for (auto input : sub_graph_kernel->in_tensors()) {
1395         if (isolate_input_map.find(input) != isolate_input_map.end()) {
1396           input->set_shape(isolate_input_map.at(input)->shape());
1397         }
1398       }
1399       if (kernel->subgraph_type() == kernel::kGpuFp16SubGraph || kernel->subgraph_type() == kernel::kGpuFp32SubGraph) {
1400 #ifdef GPU_OPENCL
1401         auto sub_graph = reinterpret_cast<kernel::OpenCLSubGraph *>(kernel);
1402         ret = sub_graph->ReSize();
1403 #endif
1404       } else {
1405         auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
1406         ret = sub_graph->ReSize();
1407       }
1408     }
1409     if (ret == RET_INFER_INVALID) {
1410       MS_LOG(INFO) << "InferShape is interrupted";
1411       continue;
1412     }
1413     if (ret != RET_OK) {
1414       MS_LOG(ERROR) << "ReSize node " << kernel->name() << " failed";
1415       return RET_ERROR;
1416     }
1417   }
1418   return RET_OK;
1419 }
1420 
SynIsolateInOutputDataType()1421 void LiteSession::SynIsolateInOutputDataType() {
1422   for (auto &tensor_map : isolate_input_map_) {
1423     auto dst_tensor = tensor_map.second;
1424     auto src_tensor = tensor_map.first;
1425 
1426     src_tensor->set_data_type(dst_tensor->data_type());
1427   }
1428 
1429   for (auto &tensor_map : isolate_graph_output_map_) {
1430     auto dst_tensor = tensor_map.second;
1431     auto src_tensor = tensor_map.first;
1432 
1433     src_tensor->set_data_type(dst_tensor->data_type());
1434   }
1435 }
1436 
BindGLTexture2DMemory(const std::map<std::string,unsigned int> & inputGLTexture,std::map<std::string,unsigned int> * outputGLTexture)1437 int LiteSession::BindGLTexture2DMemory(const std::map<std::string, unsigned int> &inputGLTexture,
1438                                        std::map<std::string, unsigned int> *outputGLTexture) {
1439 #ifdef GPU_OPENCL
1440   if (!this->context_->GetDeviceInfo(DT_GPU).gpu_device_info_.enable_gl_texture_) {
1441     MS_LOG(ERROR) << "the context isn't set to support OpenGL texture";
1442     return RET_ERROR;
1443   }
1444   for (const auto &[name, GLTexture_id] : inputGLTexture) {
1445     auto iter = input_map_.find(name);
1446     if (iter == input_map_.end()) {
1447       MS_LOG(ERROR) << "the in tensor name " << name << "is not match any model input name";
1448       return RET_ERROR;
1449     }
1450     auto in_data = iter->second->MutableData();
1451     if (in_data == nullptr) {
1452       std::cout << "MallocData for input Tensor failed" << std::endl;
1453       return RET_ERROR;
1454     }
1455     memcpy(in_data, &GLTexture_id, sizeof(cl_GLuint));
1456     iter->second->set_data_type(kNumberTypeGLUInt);
1457   }
1458   for (auto [name, GLTexture_id] : *outputGLTexture) {
1459     auto iter = output_tensor_map_.find(name);
1460     if (iter == output_tensor_map_.end()) {
1461       MS_LOG(ERROR) << "the out tensor name " << name << "is not match any model output name";
1462       return RET_ERROR;
1463     }
1464     auto out_data = iter->second->MutableData();
1465     if (out_data == nullptr) {
1466       std::cout << "MallocData for input Tensor failed" << std::endl;
1467       return RET_ERROR;
1468     }
1469     memcpy(out_data, &GLTexture_id, sizeof(cl_GLuint));
1470     iter->second->set_data_type(kNumberTypeGLUInt);
1471   }
1472 
1473 #ifdef ENABLE_MINDRT
1474   SynIsolateInOutputDataType();  // Synchronized input/output with isolate input/output data types
1475 #endif
1476 
1477   if (this->kernels_.size() != 1) {
1478     MS_LOG(ERROR) << "Now only support one opencl subgraph if you want to input opengl texture";
1479     return RET_ERROR;
1480   }
1481   auto opencl_subgraph = reinterpret_cast<kernel::OpenCLSubGraph *>(kernels_.front());
1482   for (size_t i = 0; i < outputs_.size(); i++) {
1483     (opencl_subgraph)->set_out_tensor(outputs_[i], i);
1484   }
1485   for (auto node : opencl_subgraph->out_nodes()) {
1486     node->set_out_tensors(opencl_subgraph->out_tensors());
1487   }
1488 #endif
1489   return RET_OK;
1490 }
1491 
Resize(const std::vector<mindspore::lite::Tensor * > & inputs,const std::vector<std::vector<int>> & dims)1492 int LiteSession::Resize(const std::vector<mindspore::lite::Tensor *> &inputs,
1493                         const std::vector<std::vector<int>> &dims) {
1494   bool expected = false;
1495   if (!is_running_.compare_exchange_strong(expected, true)) {
1496     MS_LOG(ERROR) << "Not support multi-threading";
1497     return RET_ERROR;
1498   }
1499   for (size_t i = 0; i < dims.size(); i++) {
1500     auto model_input_rank = inputs[i]->ConvertToTensorC()->shape_size_;
1501     if (dims[i].size() != model_input_rank && model_input_rank != 0) {
1502       MS_LOG(ERROR) << "Tensor " << i << "'s rank: " << dims[i].size() << " must match the rank: " << model_input_rank
1503                     << " of model input " << i;
1504       return RET_ERROR;
1505     }
1506   }
1507   std::vector<std::vector<int>> old_dims;
1508   for (size_t i = 0; i < inputs_.size(); ++i) {
1509     old_dims.push_back(inputs_[i]->shape());
1510   }
1511   auto ret = ResizeInputs(inputs, dims);
1512   if (ret != RET_OK) {
1513     ResetInputsShape(old_dims);
1514     is_running_.store(false);
1515     return ret;
1516   }
1517   ret = UpdateInputShapeMap();
1518   if (ret != RET_OK) {
1519     MS_LOG(ERROR) << "update input shape map failed.";
1520     return RET_ERROR;
1521   }
1522   if (infer_along_running_) {
1523     is_running_.store(false);
1524     return ret;
1525   }
1526 
1527   ret = ReSizeKernels(kernels_, isolate_input_map_);
1528   if (ret != RET_OK) {
1529     ResetInputsShape(old_dims);
1530     auto resize_ret = ReSizeKernels(kernels_);
1531     if (resize_ret != RET_OK) {
1532       MS_LOG(ERROR) << "restore kernel size fail!ret: " << resize_ret;
1533     }
1534     is_running_.store(false);
1535     return ret;
1536   }
1537 
1538   auto status = GraphOptimizePass(&kernels_);
1539   if (status != RET_OK) {
1540     MS_LOG(ERROR) << "GraphOptimizePass failed.";
1541     return RET_ERROR;
1542   }
1543 
1544   is_running_.store(false);
1545   return RET_OK;
1546 }
1547 
PreCheck(Model * model)1548 int LiteSession::PreCheck(Model *model) {
1549   bool expected = false;
1550   if (!is_running_.compare_exchange_strong(expected, true)) {
1551     MS_LOG(ERROR) << "Not support multi-threading";
1552     return RET_ERROR;
1553   }
1554   if (model == nullptr) {
1555     MS_LOG(ERROR) << "The input model is nullptr.";
1556     return RET_PARAM_INVALID;
1557   }
1558   if (model->buf == nullptr) {
1559     MS_LOG(ERROR) << "The input model buf is nullptr.";
1560     return RET_PARAM_INVALID;
1561   }
1562   if (model->model_type_ != ModelType_MSLite) {
1563     // abstract base model
1564     if (!reinterpret_cast<AbstractBaseModel *>(model)->ModelVerify()) {
1565       MS_LOG(ERROR) << "wrong model input, please check";
1566       return RET_ERROR;
1567     }
1568   } else {
1569     // old routine, convert to abstract base model
1570     if (!reinterpret_cast<LiteModel *>(model)->ModelVerify()) {
1571       MS_LOG(ERROR) << "wrong model input, please check";
1572       return RET_ERROR;
1573     }
1574   }
1575 
1576 #ifndef ENABLE_FP16
1577   if (context_->GetDeviceInfo(DT_CPU).cpu_device_info_.enable_float16_) {
1578     MS_LOG(WARNING) << unsupport_fp16_log;
1579   }
1580 #endif
1581   return RET_OK;
1582 }
1583 
InitExecutor()1584 int LiteSession::InitExecutor() {
1585   int ret;
1586 #ifdef ENABLE_MINDRT
1587   if (ms_context_->GetThreadNum() == 1 && !context_->IsCpuFloat16Enabled() && !is_control_flow_) {
1588     executor_ = new (std::nothrow) Executor();
1589   } else {
1590     ret = IsolateOutputTensor();
1591     if (ret != RET_OK) {
1592       MS_LOG(ERROR) << "Isolate output tensor failed.";
1593       return ret;
1594     }
1595     executor_ = new (std::nothrow) MindrtExecutor(&isolate_graph_output_map_, &isolate_input_map_);
1596   }
1597 #else
1598   executor_ = new (std::nothrow) Executor();
1599 #endif
1600   if (executor_ == nullptr) {
1601     MS_LOG(ERROR) << "New Executor failed";
1602     return RET_ERROR;
1603   }
1604 
1605   ret = executor_->Prepare(kernels_, inputs_, outputs_, context_.get());
1606   if (ret != RET_OK) {
1607     MS_LOG(ERROR) << "Prepare executor failed: " << ret;
1608     return ret;
1609   }
1610   return RET_OK;
1611 }
1612 
RuntimeAllocatorValid()1613 int LiteSession::RuntimeAllocatorValid() {
1614 #ifdef ENABLE_ARM32
1615   MS_LOG(DEBUG) << "Not support runtime allocator in arm32.";
1616   return RET_ERROR;
1617 #endif
1618 
1619 #ifndef ENABLE_MINDRT
1620   MS_LOG(DEBUG) << "Not support runtime allocator in converter.";
1621   return RET_ERROR;
1622 #endif
1623 
1624 #ifdef BFC_MEMORY
1625   MS_LOG(DEBUG) << "Not support runtime allocator when BFC_MEMORY on.";
1626   return RET_ERROR;
1627 #endif
1628 
1629   if ((context_->enable_parallel_ == true) || (context_->inter_op_parallel_num_ > 1)) {
1630     MS_LOG(DEBUG) << "Not support runtime allocator in subgraph parallel.";
1631     return RET_ERROR;
1632   }
1633   if (is_train_session_ == true) {
1634     MS_LOG(DEBUG) << "Not support runtime allocator in train session.";
1635     return RET_ERROR;
1636   }
1637   if (is_infershape_ != RET_OK) {
1638     MS_LOG(DEBUG) << "Not support runtime allocator in runtime-infershape.";
1639     return RET_ERROR;
1640   }
1641 #ifdef ENABLE_MINDRT
1642   if (kernels_.size() != 1) {
1643     MS_LOG(DEBUG) << "Not support runtime allocator in random subgraph sort";
1644     return RET_ERROR;
1645   }
1646 #endif
1647 #ifdef ENABLE_ARM64
1648   MS_LOG(DEBUG) << "support runtime allocator.";
1649   return RET_OK;
1650 #endif
1651   return RET_ERROR;
1652 }
1653 
RuntimeAllocatorInitGraphOutput()1654 void LiteSession::RuntimeAllocatorInitGraphOutput() {
1655   AllocatorPtr default_allocator = context_->allocator;
1656   for (auto graph_out : isolate_graph_output_map_) {
1657     auto cal_t = graph_out.first;
1658     auto out_t = graph_out.second;
1659     if (cal_t->allocator() != runtime_allocator_ || out_t->allocator() != default_allocator) {
1660       continue;
1661     }
1662     out_t->set_allocator(runtime_allocator_);
1663     if (cal_t->data_type() != out_t->data_type()) {
1664       runtime_allocator_->MallocTensorData(out_t);
1665     }
1666   }
1667   return;
1668 }
1669 
RuntimeAllocatorInitSubgraphInputs(const kernel::KernelExec * subgraph,const AllocatorPtr & default_allocator,const RuntimeAllocatorPtr & runtime_allocator,const std::unordered_map<Tensor *,Tensor * > & isolate_input_map,std::unordered_map<Tensor *,int> * tensor_ref_count,std::unordered_map<size_t,int> * data_ref_count)1670 void RuntimeAllocatorInitSubgraphInputs(const kernel::KernelExec *subgraph, const AllocatorPtr &default_allocator,
1671                                         const RuntimeAllocatorPtr &runtime_allocator,
1672                                         const std::unordered_map<Tensor *, Tensor *> &isolate_input_map,
1673                                         std::unordered_map<Tensor *, int> *tensor_ref_count,
1674                                         std::unordered_map<size_t, int> *data_ref_count) {
1675   MS_ASSERT(subgraph != nullptr && tensor_ref_count != nullptr && data_ref_count != nullptr);
1676   for (auto in_tensor : subgraph->in_tensors()) {
1677     auto iter = isolate_input_map.find(in_tensor);
1678     if (isolate_input_map.end() == iter) break;
1679     auto src_t = iter->second;
1680 
1681     if (src_t->data_type() == in_tensor->data_type()) {
1682       in_tensor->set_allocator(src_t->allocator());
1683       if (src_t->allocator() == runtime_allocator) {
1684         (*tensor_ref_count)[in_tensor] = in_tensor->init_ref_count();
1685         (*data_ref_count)[runtime_allocator->GetOffsetMap().at(src_t)] += in_tensor->init_ref_count();
1686         runtime_allocator->SetDataOffset(in_tensor, runtime_allocator->GetOffsetMap().at(src_t));
1687       }
1688     } else {
1689       if (in_tensor->allocator() == default_allocator) {
1690         in_tensor->set_allocator(runtime_allocator);
1691         runtime_allocator->MallocTensorData(in_tensor);
1692         (*tensor_ref_count)[in_tensor] = in_tensor->init_ref_count();
1693         (*data_ref_count)[runtime_allocator->GetOffsetMap().at(in_tensor)] = in_tensor->init_ref_count();
1694       }
1695     }
1696 
1697     if (src_t->allocator() != runtime_allocator) {
1698       continue;
1699     }
1700 
1701     (*tensor_ref_count)[src_t]--;
1702     (*data_ref_count)[runtime_allocator->GetOffsetMap().at(src_t)]--;
1703 
1704     if ((*tensor_ref_count)[src_t] <= 0) {
1705       if ((*data_ref_count)[runtime_allocator->GetOffsetMap().at(src_t)] <= 0) {
1706         runtime_allocator->FreeTensorData(src_t);
1707       }
1708     }
1709   }
1710 }
1711 
RuntimeAllocatorInitSubgraph()1712 void LiteSession::RuntimeAllocatorInitSubgraph() {
1713   AllocatorPtr default_allocator = context_->allocator;
1714   std::unordered_map<lite::Tensor *, int> tensor_ref_count;
1715   std::unordered_map<size_t, int> data_ref_count;
1716 
1717   for (auto subgraph : kernels_) {
1718     if (subgraph->desc().arch != kernel::KERNEL_ARCH::kCPU) {
1719       continue;
1720     }
1721 
1722     RuntimeAllocatorInitSubgraphInputs(subgraph, default_allocator, runtime_allocator_, isolate_input_map_,
1723                                        &tensor_ref_count, &data_ref_count);
1724 
1725     auto kernel_list = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
1726     for (auto kernel : kernel_list) {
1727       /* malloc for output */
1728       for (auto tensor : kernel->out_tensors()) {
1729         if (tensor->allocator() != default_allocator || tensor->IsConst()) {
1730           continue;
1731         }
1732         tensor->set_allocator(runtime_allocator_);
1733         runtime_allocator_->MallocTensorData(tensor);
1734         tensor_ref_count[tensor] = tensor->init_ref_count();
1735         data_ref_count[runtime_allocator_->GetOffsetMap().at(tensor)] = tensor->init_ref_count();
1736       }
1737 
1738       /* free input after run */
1739       for (auto tensor : kernel->in_tensors()) {
1740         if (tensor->allocator() != runtime_allocator_) {
1741           continue;
1742         }
1743         tensor_ref_count[tensor]--;
1744         data_ref_count[runtime_allocator_->GetOffsetMap().at(tensor)]--;
1745 
1746         if (tensor_ref_count[tensor] <= 0 && tensor->allocator() == runtime_allocator_) {
1747           if (data_ref_count[runtime_allocator_->GetOffsetMap().at(tensor)] <= 0) {
1748             runtime_allocator_->FreeTensorData(tensor);
1749           }
1750         }
1751       }
1752     }
1753   }
1754   return;
1755 }
1756 
InitRuntimeAllocator()1757 int LiteSession::InitRuntimeAllocator() {
1758   if (RuntimeAllocatorValid() != RET_OK) {
1759     return RET_OK;
1760   }
1761   if (ExistCustomCpuKernel()) {
1762     return RET_OK;
1763   }
1764   if (runtime_allocator_ == nullptr) {
1765     runtime_allocator_ = std::shared_ptr<RuntimeAllocator>(new (std::nothrow) RuntimeAllocator());
1766   } else {
1767     runtime_allocator_->Clear(context_->allocator);
1768   }
1769   if (runtime_allocator_ == nullptr) {
1770     MS_LOG(ERROR) << "RuntimeAllocator is null.";
1771     return RET_ERROR;
1772   }
1773 
1774   RuntimeAllocatorInitSubgraph();
1775 
1776   RuntimeAllocatorInitGraphOutput();
1777 
1778   auto ret = RuntimeAllocatorSetData();
1779   if (ret != RET_OK) {
1780     MS_LOG(ERROR) << "using optimize allocator failed.";
1781     return ret;
1782   }
1783   return RET_OK;
1784 }
1785 
RuntimeAllocatorSetData()1786 int LiteSession::RuntimeAllocatorSetData() {
1787   void *data = runtime_allocator_->MallocOptData();
1788   if (data == nullptr) {
1789     MS_LOG(ERROR) << "malloc optimize data failed.";
1790     return RET_ERROR;
1791   }
1792   int8_t *int8_data = reinterpret_cast<int8_t *>(data);
1793   auto offset_map = runtime_allocator_->GetOffsetMap();
1794 
1795   for (auto &iter : offset_map) {
1796     auto tensor = iter.first;
1797     if (tensor->allocator() != runtime_allocator_) {
1798       return RET_ERROR;
1799     }
1800     tensor->set_data(int8_data + iter.second);
1801   }
1802   return RET_OK;
1803 }
1804 
InitGPURuntime()1805 int LiteSession::InitGPURuntime() {
1806   if (context_->IsDeviceTypeEnabled(DT_CPU)) {
1807     CpuBindMode cpu_bind_mode = context_->GetDeviceInfo(DT_CPU).cpu_device_info_.cpu_bind_mode_;
1808     ThreadPool *thread_pool = this->context_->thread_pool_;
1809     if (thread_pool != nullptr) {
1810       thread_pool->SetProcessAffinity(static_cast<BindMode>(cpu_bind_mode));
1811     }
1812   }
1813 #ifdef GPU_OPENCL
1814   if (this->context_->IsDeviceTypeEnabled(DT_GPU)) {
1815     opencl_runtime_wrapper_ = new (std::nothrow) opencl::OpenCLRuntimeInnerWrapper();
1816     if (opencl_runtime_wrapper_ == nullptr) {
1817       MS_LOG(ERROR) << "create OpenCLRuntimeInnerWrapper failed";
1818       return RET_ERROR;
1819     }
1820     const auto &gpu_device_info = this->context_->GetDeviceInfo(DT_GPU).gpu_device_info_;
1821     auto opencl_runtime = opencl_runtime_wrapper_->GetInstance();
1822     opencl_runtime->SetGLTextureEnable(gpu_device_info.enable_gl_texture_);
1823     opencl_runtime->SetGLContext(gpu_device_info.gl_context_);
1824     opencl_runtime->SetGLDisplay(gpu_device_info.gl_display_);
1825     if (opencl_runtime->Init() != RET_OK) {
1826       if (gpu_device_info.enable_gl_texture_) {
1827         MS_LOG(ERROR) << "Init OpenCL runtime failed, enable_gl_texture set true, only support GPU mode.";
1828         return RET_ERROR;
1829       }
1830       this->context_->device_list_ = {{DT_CPU, {gpu_device_info.enable_float16_, MID_CPU}}};
1831       MS_LOG(WARNING) << "Init OpenCL runtime failed, change to CPU mode.";
1832     } else {
1833       MS_LOG(INFO) << "Init OpenCL runtime success.";
1834     }
1835 
1836     opencl_runtime->SetFp16Enable(gpu_device_info.enable_float16_);
1837 
1838     /* check chip support shared memory */
1839     auto enable_arm_import_memory = opencl_runtime->isExtensionEnable(EXT_ARM_IMPORT_MEMORY_HOST);
1840     if (!enable_arm_import_memory) {
1841       MS_LOG(WARNING) << "GPU do not support shared memory!";
1842     }
1843   }
1844 #endif
1845   // Setting the binding core will affect the opencl drive scheduling.
1846   if (context_->IsDeviceTypeEnabled(DT_CPU)) {
1847     ThreadPool *thread_pool = this->context_->thread_pool_;
1848     if (thread_pool != nullptr) {
1849       thread_pool->SetProcessAffinity(static_cast<BindMode>(NO_BIND));
1850     }
1851   }
1852   return RET_OK;
1853 }
1854 }  // namespace lite
1855 
CreateSession(const std::shared_ptr<InnerContext> & context)1856 lite::LiteSession *lite::LiteSession::CreateSession(const std::shared_ptr<InnerContext> &context) {
1857   auto session = new (std::nothrow) lite::LiteSession();
1858   if (session == nullptr) {
1859     MS_LOG(ERROR) << "create session failed";
1860     return nullptr;
1861   }
1862   auto ret = session->Init(context);
1863   if (ret != mindspore::lite::RET_OK) {
1864     MS_LOG(ERROR) << "init session failed";
1865     delete session;
1866     return nullptr;
1867   }
1868   return session;
1869 }
1870 
CreateSession(const char * model_buf,size_t size,const std::shared_ptr<InnerContext> & context)1871 lite::LiteSession *lite::LiteSession::CreateSession(const char *model_buf, size_t size,
1872                                                     const std::shared_ptr<InnerContext> &context) {
1873   auto *session = lite::LiteSession::CreateSession(context);
1874   if (session == nullptr) {
1875     MS_LOG(ERROR) << "Create session failed";
1876     return nullptr;
1877   }
1878   auto ret = reinterpret_cast<lite::LiteSession *>(session)->LoadModelAndCompileByBuf(
1879     model_buf, mindspore::ModelType::kMindIR_Lite, size);
1880   if (ret != RET_OK) {
1881     MS_LOG(ERROR) << "Init session failed";
1882     delete session;
1883     return nullptr;
1884   }
1885   return session;
1886 }
1887 
LoadModelByBuff(const char * model_buf,const size_t & buf_size,char ** lite_buf,size_t * size,mindspore::ModelType model_type)1888 mindspore::ModelType lite::LiteSession::LoadModelByBuff(const char *model_buf, const size_t &buf_size, char **lite_buf,
1889                                                         size_t *size, mindspore::ModelType model_type) {
1890   if (model_type == mindspore::ModelType::kMindIR_Lite) {
1891     *size = buf_size;
1892     *lite_buf = const_cast<char *>(model_buf);
1893     return mindspore::ModelType::kMindIR_Lite;
1894   }
1895 
1896   if (model_type != mindspore::ModelType::kMindIR) {
1897     return mindspore::ModelType::kUnknownType;
1898   }
1899 
1900   flatbuffers::Verifier verify((const uint8_t *)model_buf, buf_size, INT32_MAX, INT32_MAX);
1901   auto version_verify = lite::LiteModel::VersionVerify(&verify);
1902   if (version_verify != SCHEMA_INVALID) {
1903     MS_LOG(DEBUG) << "The kMindIR type model buffer is valid mslite model buffer";
1904     *size = buf_size;
1905     *lite_buf = const_cast<char *>(model_buf);
1906     return mindspore::ModelType::kMindIR_Lite;
1907   }
1908   MS_LOG(WARNING) << "Invalid mslite model.";
1909 
1910 #ifdef RUNTIME_CONVERT
1911   *lite_buf = RuntimeConvert(model_buf, buf_size, size, ms_context_);
1912 #else
1913   MS_LOG(WARNING) << "Please enable runtime convert.";
1914 #endif
1915 #ifdef ENABLE_CLOUD_FUSION_INFERENCE
1916   *size = buf_size;
1917   *lite_buf = const_cast<char *>(model_buf);
1918 #endif
1919   return mindspore::ModelType::kMindIR;
1920 }
1921 
LoadModelByPath(const std::string & file,mindspore::ModelType model_type,size_t * size,bool use_mmap)1922 const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspore::ModelType model_type, size_t *size,
1923                                                bool use_mmap) {
1924   size_t buf_size;
1925   char *model_buf;
1926   if (use_mmap) {
1927     model_buf = reinterpret_cast<char *>(lite::ReadFileByMmap(file.c_str(), &buf_size));
1928   } else {
1929     model_buf = lite::ReadFile(file.c_str(), &buf_size);
1930   }
1931   if (model_buf == nullptr) {
1932     MS_LOG(ERROR) << "The model path is invalid";
1933     return model_buf;
1934   }
1935 
1936   char *lite_buf = nullptr;
1937   auto buf_model_type = LoadModelByBuff(model_buf, buf_size, &lite_buf, size, model_type);
1938   if (buf_model_type == mindspore::ModelType::kUnknownType || lite_buf == nullptr) {
1939     if (use_mmap) {
1940       lite::UnmapMmapBuffer(const_cast<void *>(static_cast<const void *>(model_buf)), buf_size);
1941     } else {
1942       delete[] model_buf;
1943     }
1944     model_buf = nullptr;
1945     return nullptr;
1946   }
1947 
1948   return lite_buf;
1949 }
1950 
ParseWeightPath()1951 std::string lite::LiteSession::ParseWeightPath() {
1952   std::string weight_path = "";
1953   if (config_info_ != nullptr) {
1954     auto ms_weight = config_info_->find(kConfigModelFileSection);
1955     if (ms_weight != config_info_->end()) {
1956       auto ms_weight_iter = ms_weight->second;
1957       if (ms_weight_iter.find(kConfigMindIRPathKey) != ms_weight_iter.end()) {
1958         weight_path = ms_weight_iter[kConfigMindIRPathKey];
1959       }
1960     }
1961   }
1962   return weight_path;
1963 }
1964 
ReshapeWeightTensor(lite::Tensor * orig_tensor,lite::Tensor * new_tensor)1965 int lite::LiteSession::ReshapeWeightTensor(lite::Tensor *orig_tensor, lite::Tensor *new_tensor) {
1966   if (orig_tensor->data_type() != new_tensor->data_type()) {
1967     MS_LOG(ERROR) << "Cannot reshape tensor of different type: " << new_tensor->tensor_name();
1968     return RET_PARAM_INVALID;
1969   }
1970 
1971   if (orig_tensor->category() != lite::Category::CONST_TENSOR) {
1972     MS_LOG(ERROR) << "Cannot reshape non const tensor: " << new_tensor->tensor_name();
1973     return RET_ERROR;
1974   }
1975 
1976   auto orig_size = orig_tensor->Size();
1977   uint8_t *new_data = reinterpret_cast<uint8_t *>(new_tensor->data());
1978   if (new_data == nullptr) {
1979     // Copy original data into new_tensor
1980     new_data = reinterpret_cast<uint8_t *>(new_tensor->MutableData());
1981     if (new_data == nullptr) {
1982       MS_LOG(ERROR) << "Allocation of Data Failed" << new_tensor->tensor_name();
1983       return RET_ERROR;
1984     }
1985     if (orig_size == 0) {
1986       MS_LOG(ERROR) << "Operation failed: Both new tensors and original one have no data";
1987       return RET_ERROR;
1988     }
1989     uint8_t *orig_data = reinterpret_cast<uint8_t *>(orig_tensor->data());
1990     for (unsigned int loc = 0; loc < new_tensor->Size(); loc++) {
1991       new_data[loc] = orig_data[loc % orig_size];
1992     }
1993   }
1994 
1995   if (orig_tensor->shape() != new_tensor->shape()) {
1996     orig_tensor->FreeData();
1997     orig_tensor->set_data(nullptr);
1998     orig_tensor->set_shape(new_tensor->shape());
1999   }
2000 
2001   uint8_t *dst_data = reinterpret_cast<uint8_t *>(orig_tensor->MutableData());
2002   if (dst_data == nullptr) {
2003     MS_LOG(ERROR) << "Allocation of Data Failed";
2004     return RET_ERROR;
2005   }
2006   std::copy(new_data, new_data + orig_tensor->Size(), dst_data);
2007   return RET_OK;
2008 }
2009 
UpdateWeights(std::vector<lite::Tensor * > modify_tensors)2010 int lite::LiteSession::UpdateWeights(std::vector<lite::Tensor *> modify_tensors) {
2011   unsigned int num_of_found_tensors = 0;
2012   for (auto modify : modify_tensors) {
2013     if (modify == nullptr) {
2014       MS_LOG(ERROR) << "Tensor is nullptr";
2015       return RET_PARAM_INVALID;
2016     }
2017     for (auto tensor : tensors_) {
2018       if (modify->tensor_name() == tensor->tensor_name()) {
2019         if (tensor->Size() != modify->Size()) {
2020           model_buff_changed_ = true;
2021         }
2022         auto ret = ReshapeWeightTensor(tensor, modify);
2023         num_of_found_tensors++;
2024         if (ret != RET_OK) {
2025           model_buff_changed_ = false;
2026           return ret;
2027         }
2028         break;
2029       }
2030     }
2031   }
2032   if (num_of_found_tensors != modify_tensors.size()) {
2033     MS_LOG(ERROR) << "Did not find all the given tensors in the model";
2034     return RET_ERROR;
2035   }
2036   auto ret = ReSizeKernels(kernels_);
2037   if (ret != RET_OK) {
2038     MS_LOG(ERROR) << "Resize kernels fail!";
2039     model_buff_changed_ = false;
2040     return ret;
2041   }
2042 
2043   bool is_eval = IsEval();
2044   if (is_eval) {
2045     ret = Eval();
2046   }
2047   return ret;
2048 }
2049 
2050 #ifdef ENABLE_LITE_HELPER
LoadModelAndCompileByBuf(const char * model_buf,mindspore::ModelType model_type,const size_t & buf_size,mindspore::infer::helper::InferHelpers * infer_helpers)2051 int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, mindspore::ModelType model_type,
2052                                                 const size_t &buf_size,
2053                                                 mindspore::infer::helper::InferHelpers *infer_helpers) {
2054 #else
2055 int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, mindspore::ModelType model_type,
2056                                                 const size_t &buf_size) {
2057 #endif
2058   auto status = lite::PackWeightManager::GetInstance()->InitPackWeightManager(model_buf, buf_size, &model_id_,
2059                                                                               &runner_id_, config_info_);
2060   if (status != RET_OK) {
2061     MS_LOG(ERROR) << "InitPackWeightByBuf failed.";
2062     return RET_ERROR;
2063   }
2064   auto new_model_buf =
2065     lite::PackWeightManager::GetInstance()->GetSharedModelBuf(model_buf, model_id_, config_info_, &is_shared_weight_);
2066   if (new_model_buf == nullptr) {
2067     MS_LOG(ERROR) << "get shared model buf is nullptr.";
2068     return RET_ERROR;
2069   }
2070   size_t lite_buf_size = 0;
2071   char *lite_buf = nullptr;
2072   auto buf_model_type = LoadModelByBuff(new_model_buf, buf_size, &lite_buf, &lite_buf_size, model_type);
2073   if (buf_model_type == mindspore::ModelType::kUnknownType || lite_buf == nullptr) {
2074     MS_LOG(ERROR) << "Invalid model_buf";
2075     return RET_ERROR;
2076   }
2077   auto weight_path = ParseWeightPath();
2078 #ifdef ENABLE_LITE_HELPER
2079   auto *model = lite::ImportFromBuffer(lite_buf, lite_buf_size, true, model_type, weight_path, infer_helpers);
2080 #else
2081   auto *model = lite::ImportFromBuffer(lite_buf, lite_buf_size, true, model_type, weight_path);
2082 #endif
2083   if (model == nullptr) {
2084     MS_LOG(ERROR) << "Import model failed";
2085     return RET_ERROR;
2086   }
2087   (reinterpret_cast<lite::LiteModel *>(model))->set_keep_model_buf(keep_model_buf_);
2088   auto ret = CompileGraph(model);
2089   model->buf = nullptr;
2090   if (ret != lite::RET_OK) {
2091     MS_LOG(ERROR) << "Compile model failed";
2092     delete model;
2093     return RET_ERROR;
2094   }
2095   set_model(model);
2096   return RET_OK;
2097 }
2098 
2099 int lite::LiteSession::LoadModelAndCompileByPath(const std::string &model_path, mindspore::ModelType model_type) {
2100   size_t model_size;
2101   bool use_mmap = IsMmapEnable();
2102   auto model_buf = LoadModelByPath(model_path, model_type, &model_size, use_mmap);
2103   if (model_buf == nullptr) {
2104     MS_LOG(ERROR) << "Read model file failed";
2105     return RET_ERROR;
2106   }
2107   auto status = lite::PackWeightManager::GetInstance()->InitPackWeightManager(model_buf, model_size, &model_id_,
2108                                                                               &runner_id_, config_info_);
2109   if (status != RET_OK) {
2110     MS_LOG(ERROR) << "InitPackWeightByBuf failed.";
2111     return RET_ERROR;
2112   }
2113   auto new_model_buf =
2114     lite::PackWeightManager::GetInstance()->GetSharedModelBuf(model_buf, model_id_, config_info_, &is_shared_weight_);
2115   if (new_model_buf == nullptr) {
2116     MS_LOG(ERROR) << "get shared model buf is nullptr.";
2117     return RET_ERROR;
2118   }
2119   if (is_shared_weight_) {
2120     if (use_mmap) {
2121       lite::UnmapMmapBuffer(const_cast<void *>(static_cast<const void *>(model_buf)), model_size);
2122     } else {
2123       delete[] model_buf;
2124     }
2125     model_buf = nullptr;
2126   }
2127   auto *model = lite::ImportFromBuffer(new_model_buf, model_size, true, model_type, model_path);
2128   if (model == nullptr) {
2129     MS_LOG(ERROR) << "Import model failed";
2130     return RET_ERROR;
2131   }
2132   if (use_mmap && new_model_buf == model_buf) {
2133     reinterpret_cast<lite::LiteModel *>(model)->model_buf_by_mmap_ = true;
2134   }
2135   (reinterpret_cast<lite::LiteModel *>(model))->set_keep_model_buf(true);
2136   auto ret = CompileGraph(model);
2137   if (ret != lite::RET_OK) {
2138     MS_LOG(ERROR) << "Compile model failed";
2139     model->buf = nullptr;
2140     delete model;
2141     return RET_ERROR;
2142   }
2143   set_model(model);
2144   return RET_OK;
2145 }
2146 
2147 bool lite::LiteSession::IsMmapEnable() const {
2148 #if !defined(_WIN32) && !defined(_WIN64) && !defined(MS_COMPILE_IOS)
2149   if (delegate_device_type_ == static_cast<int>(DT_NPU)) {
2150     return false;
2151   }
2152   return true;
2153 #else
2154   return false;
2155 #endif
2156 }
2157 }  // namespace mindspore
2158