• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/lite_session.h"
18 #include <vector>
19 #include <utility>
20 #include "include/errorcode.h"
21 #include "src/common/log_adapter.h"
22 #include "src/scheduler.h"
23 #include "src/runtime/inner_allocator.h"
24 #include "src/executor.h"
25 #include "src/common/context_util.h"
26 #include "src/common/utils.h"
27 #include "src/common/prim_util.h"
28 #include "src/common/graph_util.h"
29 #include "src/common/tensor_util.h"
30 #include "src/common/file_utils.h"
31 #include "src/kernel_registry.h"
32 #include "src/lite_model.h"
33 #include "src/weight_decoder.h"
34 #include "src/lite_kernel_util.h"
35 #ifdef ENABLE_MINDRT
36 #include "src/mindrt_executor.h"
37 #endif
38 #if SUPPORT_NPU
39 #include "src/delegate/npu/npu_delegate.h"
40 #endif
41 #if GPU_OPENCL
42 #include "src/runtime/kernel/opencl/opencl_subgraph.h"
43 #endif
44 #if GPU_TENSORRT
45 #include "src/delegate/tensorrt/tensorrt_delegate.h"
46 #endif
47 #ifdef SUPPORT_NNRT
48 #include "src/delegate/nnrt/nnrt_delegate.h"
49 #endif
50 #ifndef WEIGHT_DECODE_CLIP
51 #include "tools/converter/quantizer/fse_decoder.h"
52 #endif
53 namespace mindspore {
54 namespace lite {
55 namespace {
NeedBitUppackCheck(const schema::Tensor & src_tensor)56 bool NeedBitUppackCheck(const schema::Tensor &src_tensor) {
57   if (src_tensor.enableHuffmanCode()) {
58     return true;
59   }
60   bool need_bit_unpack = src_tensor.quantParams() != nullptr && src_tensor.quantParams()->size() > 0 &&
61                          src_tensor.quantParams()->Get(0) != nullptr;
62   if (need_bit_unpack) {
63     auto num_bits = src_tensor.quantParams()->Get(0)->numBits();
64     need_bit_unpack = ((num_bits >= kBitNum1 && num_bits < kBitNum8) || (num_bits > kBitNum8 && num_bits < kBitNum16));
65   }
66 
67   return need_bit_unpack;
68 }
69 
DecompressTensor(const schema::Tensor & src_tensor,Tensor * dst_tensor)70 int DecompressTensor(const schema::Tensor &src_tensor, Tensor *dst_tensor) {
71   MS_ASSERT(dst_tensor != nullptr);
72 #ifndef WEIGHT_DECODE_CLIP
73   if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_FSE) {
74     return quant::FSEDecoder::DeCompress(src_tensor, dst_tensor);
75   } else if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_INDEXING) {
76     return IndexingDecompress(src_tensor, dst_tensor);
77   } else if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_SPARSE) {
78     return SparseDecompress(src_tensor, dst_tensor);
79   }
80 #else
81   if (src_tensor.weightQunatCompressType() != schema::WeightQunatCompressType_NONE) {
82     MS_LOG(ERROR) << unsupport_weight_decode_log;
83     return RET_ERROR;
84   }
85 #endif
86   if (!NeedBitUppackCheck(src_tensor)) {
87     return RET_NO_CHANGE;
88   } else {
89 #ifndef WEIGHT_DECODE_CLIP
90     return WeightDecoder::UnPack(src_tensor, dst_tensor);
91 #else
92     MS_LOG(ERROR) << unsupport_weight_decode_log;
93     return RET_ERROR;
94 #endif
95   }
96 }
97 }  // namespace
98 
LiteSession()99 LiteSession::LiteSession() { this->is_running_.store(false); }
100 
ConvertTensorsQuantParam(const schema::Tensor * src_tensor,lite::Tensor * dst_tensor)101 void LiteSession::ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lite::Tensor *dst_tensor) {
102   MS_ASSERT(src_tensor != nullptr);
103   MS_ASSERT(dst_tensor != nullptr);
104   auto quant_params = src_tensor->quantParams();
105   if (quant_params != nullptr) {
106     for (size_t j = 0; j < quant_params->size(); j++) {
107       auto quant_param = quant_params->Get(j);
108       LiteQuantParam quant_arg{};
109       if (quant_param == nullptr) {
110         quant_arg.inited = false;
111       } else {
112         quant_arg.inited = true;
113         quant_arg.bitNum = quant_param->numBits();
114         quant_arg.scale = quant_param->scale();
115         quant_arg.zeroPoint = quant_param->zeroPoint();
116         quant_arg.var_corr = quant_param->varCorr();
117         quant_arg.mean_corr = quant_param->meanCorr();
118         quant_arg.roundType = quant_param->roundType();
119         quant_arg.multiplier = quant_param->multiplier();
120         quant_arg.dstDtype = quant_param->dstDtype();
121       }
122       dst_tensor->AddQuantParam(quant_arg);
123     }
124   }
125   auto quant_clusters = src_tensor->quantClusters();
126   if (quant_clusters != nullptr) {
127     std::vector<float> clusters;
128     for (size_t j = 0; j < quant_clusters->size(); j++) {
129       clusters.push_back(quant_clusters->Get(j));
130     }
131     dst_tensor->set_quant_clusters(clusters);
132   }
133 }
134 
ConvertTensorsData(const lite::Model * model,size_t tensor_index,const schema::Tensor * src_tensor,lite::Tensor * dst_tensor)135 int LiteSession::ConvertTensorsData(const lite::Model *model, size_t tensor_index, const schema::Tensor *src_tensor,
136                                     lite::Tensor *dst_tensor) {
137   MS_ASSERT(src_tensor != nullptr);
138   MS_ASSERT(dst_tensor != nullptr);
139   if (src_tensor->data() == nullptr || src_tensor->data()->size() <= 0) {
140     MS_LOG(DEBUG) << "No valid data converted.";
141     return RET_OK;
142   }
143 
144   /* tensor list convert */
145   if (dst_tensor->data_type() == kObjectTypeTensorType) {
146 #ifndef CONTROLFLOW_TENSORLIST_CLIP
147     auto tensor_list = reinterpret_cast<TensorList *>(dst_tensor);
148     if (tensor_list->Decode(reinterpret_cast<const int *>(src_tensor->data()->data())) != RET_OK) {
149       MS_LOG(ERROR) << "Decode tensorlist data failed";
150       return RET_ERROR;
151     }
152     return RET_OK;
153 #else
154     MS_LOG(ERROR) << unsupport_controlflow_tensorlist_log;
155     return RET_NOT_SUPPORT;
156 #endif
157   }
158 
159   /* normal tensor check */
160   auto shape_info = dst_tensor->shape();
161   if (shape_info.end() !=
162       std::find_if(shape_info.begin(), shape_info.end(), [](const int shape) { return shape <= 0; })) {
163     MS_LOG(ERROR) << "Invalid shape size." << src_tensor->name()->c_str();
164     return RET_ERROR;
165   }
166 
167   auto ret = DecompressTensor(*src_tensor, dst_tensor);
168   if (ret == RET_NO_CHANGE) {
169     dst_tensor->set_data(const_cast<unsigned char *>(src_tensor->data()->data()));
170     dst_tensor->set_own_data(false);
171   } else if (ret != RET_OK) {
172     MS_LOG(ERROR) << "Decompress tensor data failed: " << ret;
173     return ret;
174   }
175   return RET_OK;
176 }
177 
ConvertTensor(const schema::Tensor & src_tensor)178 lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) {
179   int32_t data_type = src_tensor.dataType();
180   if (data_type <= kTypeUnknown || data_type >= kMonadTypeEnd) {
181     MS_LOG(ERROR) << "invalid data type. " << data_type;
182     return nullptr;
183   }
184   auto src_category = TensorCategory(&src_tensor);
185   std::vector<int> shape;
186   if (src_tensor.dims() == nullptr) {
187     MS_LOG(DEBUG) << "Dims of src_tensor is nullptr";
188   }
189   if (src_tensor.dims() != nullptr) {
190     if (src_tensor.dataType() == kObjectTypeString && src_tensor.data() != nullptr) {
191       shape.push_back(src_tensor.data()->size());
192     } else {
193       for (size_t j = 0; j < src_tensor.dims()->size(); j++) {
194         shape.push_back(src_tensor.dims()->data()[j]);
195       }
196     }
197   }
198   lite::Tensor *dst_tensor = nullptr;
199   if (TypeId(data_type) == kObjectTypeTensorType) {
200 #ifndef CONTROLFLOW_TENSORLIST_CLIP
201     dst_tensor = new (std::nothrow) TensorList(shape, std::vector<int>(), src_category);
202     // set tensor list datatype
203     auto tensor_list = reinterpret_cast<TensorList *>(dst_tensor);
204     if (src_tensor.data() != nullptr) {
205       auto tensor_data_type = TypeId(reinterpret_cast<const int *>(src_tensor.data()->data())[0]);
206       tensor_list->set_tensors_data_type(tensor_data_type);
207     }
208 #else
209     MS_LOG(ERROR) << unsupport_controlflow_tensorlist_log;
210 #endif
211   } else {
212     dst_tensor = new (std::nothrow)
213       Tensor(TypeId(data_type), shape, static_cast<mindspore::Format>(src_tensor.format()), src_category);
214   }
215   return dst_tensor;
216 }
217 
ConvertTensors(const lite::Model * model)218 int LiteSession::ConvertTensors(const lite::Model *model) {
219   MS_ASSERT(model != nullptr);
220   uint32_t tensor_count = model->graph_.all_tensors_.size();
221   auto model_input_indices = model->graph_.input_indices_;
222   auto model_output_indices = model->graph_.output_indices_;
223   for (uint32_t i = 0; i < tensor_count; ++i) {
224     auto *src_tensor = static_cast<schema::Tensor *>(model->graph_.all_tensors_[i]);
225     if (src_tensor == nullptr) {
226       MS_LOG(ERROR) << i << "th tensor in model is nullptr";
227       return RET_NULL_PTR;
228     }
229     auto *dst_tensor = ConvertTensor(*src_tensor);
230     if (dst_tensor == nullptr) {
231       MS_LOG(ERROR) << "Convert new " << i << "th tensor failed!";
232       return RET_NULL_PTR;
233     }
234     auto ret = ConvertTensorsData(model, i, src_tensor, dst_tensor);
235     if (ret != RET_OK) {
236       MS_LOG(ERROR) << "Convert data of " << i << "th tensor failed";
237       delete dst_tensor;
238       return ret;
239     }
240     ConvertTensorsQuantParam(src_tensor, dst_tensor);
241     if (IsContain(model_input_indices, i)) {
242       if (dst_tensor->data() != nullptr) {
243         MS_LOG(ERROR) << "Graph input shouldn't have data";
244         delete dst_tensor;
245         return RET_ERROR;
246       }
247       dst_tensor->set_category(Tensor::GRAPH_INPUT);
248     }
249     if (IsContain(model_output_indices, i)) {
250       if (dst_tensor->data() != nullptr) {
251         MS_LOG(ERROR) << "Graph output shouldn't have data";
252         delete dst_tensor;
253         return RET_ERROR;
254       }
255       // a tensor is as both input and output, would be treated as an input.
256       if (!dst_tensor->IsGraphInput()) {
257         dst_tensor->set_category(Tensor::GRAPH_OUTPUT);
258       }
259     }
260     if (src_tensor->name() != nullptr) {
261       dst_tensor->set_tensor_name(src_tensor->name()->str());
262     }
263     this->tensors_.emplace_back(dst_tensor);
264   }
265   return RET_OK;
266 }
267 
InitGraphInputTensors(const lite::Model * model)268 void LiteSession::InitGraphInputTensors(const lite::Model *model) {
269   MS_ASSERT(model != nullptr);
270   auto graph_in_size = model->graph_.input_indices_.size();
271   for (size_t i = 0; i < graph_in_size; ++i) {
272     auto in_tensor_idx = model->graph_.input_indices_[i];
273     MS_ASSERT(in_tensor_idx < this->tensors_.size());
274     auto *in_tensor = this->tensors_.at(in_tensor_idx);
275     MS_ASSERT(in_tensor != nullptr);
276     this->inputs_.emplace_back(in_tensor);
277   }
278 }
279 
InitGraphInputMSTensors()280 void LiteSession::InitGraphInputMSTensors() {
281   MS_ASSERT(this->input_vec_.empty());
282   for (auto &input_tensor : this->inputs_) {
283     MS_ASSERT(input_tensor != nullptr);
284     this->input_vec_.emplace_back(input_tensor);
285   }
286 }
287 
InitGraphOutputTensors(const lite::Model * model)288 void LiteSession::InitGraphOutputTensors(const lite::Model *model) {
289   MS_ASSERT(model != nullptr);
290   MS_ASSERT(this->outputs_.empty());
291   auto graph_out_size = model->graph_.output_indices_.size();
292   for (size_t i = 0; i < graph_out_size; ++i) {
293     auto out_tensor_idx = model->graph_.output_indices_[i];
294     MS_ASSERT(out_tensor_idx < this->tensors_.size());
295     auto *out_tensor = this->tensors_.at(out_tensor_idx);
296     MS_ASSERT(out_tensor != nullptr);
297     this->outputs_.emplace_back(out_tensor);
298   }
299 }
300 
InitGraphInputMap(const lite::Model * model)301 void LiteSession::InitGraphInputMap(const lite::Model *model) {
302   MS_ASSERT(model != nullptr);
303   MS_ASSERT(this->input_map_.empty());
304   auto graph_input_node_indexes = GetGraphInputNodes(model);
305   auto graph_in_size = model->graph_.input_indices_.size();
306   for (auto in_node_index : graph_input_node_indexes) {
307     auto in_node = model->graph_.all_nodes_[in_node_index];
308     MS_ASSERT(in_node != nullptr);
309     auto in_size = in_node->input_indices_.size();
310     for (size_t i = 0; i < in_size; ++i) {
311       MS_ASSERT(this->input_map_.find(in_node->name_ + std::to_string(i)) == this->input_map_.end());
312       auto in_tensor_index = size_t(in_node->input_indices_[i]);
313       bool is_graph_input = false;
314       for (size_t j = 0; j < graph_in_size; ++j) {
315         if (in_tensor_index == model->graph_.input_indices_[j]) {
316           is_graph_input = true;
317           break;
318         }
319       }
320       if (!is_graph_input) {
321         continue;
322       }
323       MS_ASSERT(in_tensor_index < this->tensors_.size());
324       auto *in_tensor = this->tensors_.at(in_tensor_index);
325       if (in_tensor == nullptr) {
326         MS_LOG(ERROR) << "in_tensor is null!";
327         return;
328       }
329       auto tensor_name = in_node->name_ + std::to_string(i);
330       this->input_map_[tensor_name] = in_tensor;
331       if (!in_tensor->tensor_name().empty()) {
332         this->input_map_[in_tensor->tensor_name()] = in_tensor;
333       }
334     }
335   }
336 }
337 
InitGraphOutputNodeMap(const lite::Model * model)338 void LiteSession::InitGraphOutputNodeMap(const lite::Model *model) {
339   MS_ASSERT(model != nullptr);
340   auto graph_output_node_indexes = GetGraphOutputNodes(model);
341   auto graph_out_size = model->graph_.output_indices_.size();
342   for (auto out_node_index : graph_output_node_indexes) {
343     auto out_node = model->graph_.all_nodes_[out_node_index];
344     MS_ASSERT(out_node != nullptr);
345     auto out_size = out_node->output_indices_.size();
346     for (size_t i = 0; i < out_size; ++i) {
347       auto out_tensor_index = out_node->output_indices_[i];
348       bool is_graph_output = false;
349       for (size_t j = 0; j < graph_out_size; ++j) {
350         if (out_tensor_index == model->graph_.output_indices_[j]) {
351           is_graph_output = true;
352           break;
353         }
354       }
355       if (!is_graph_output) {
356         continue;
357       }
358       MS_ASSERT(out_tensor_index < this->tensors_.size());
359       auto *out_tensor = this->tensors_.at(out_tensor_index);
360       if (out_tensor == nullptr) {
361         MS_LOG(ERROR) << "out_tensor is null!";
362         return;
363       }
364       this->output_node_map_[out_node->name_].emplace_back(out_tensor);
365     }
366   }
367 }
368 
InitGraphOutputTensorMap(const lite::Model * model)369 void LiteSession::InitGraphOutputTensorMap(const lite::Model *model) {
370   MS_ASSERT(model != nullptr);
371   MS_ASSERT(this->output_tensor_map_.empty());
372   auto graph_out_size = model->graph_.output_indices_.size();
373   for (size_t i = 0; i < graph_out_size; ++i) {
374     size_t graph_out_index = model->graph_.output_indices_[i];
375     MS_ASSERT(graph_out_index < this->tensors_.size());
376     auto *out_tensor = this->tensors_.at(graph_out_index);
377     if (out_tensor == nullptr) {
378       MS_LOG(ERROR) << "out_tensor is null!";
379       return;
380     }
381     if (!out_tensor->tensor_name().empty()) {
382       this->output_tensor_map_.insert(std::make_pair(out_tensor->tensor_name(), out_tensor));
383       this->output_tensor_names_.emplace_back(out_tensor->tensor_name());
384     } else {
385       this->output_tensor_map_.insert(std::make_pair(std::to_string(graph_out_index), out_tensor));
386       this->output_tensor_names_.emplace_back(std::to_string(graph_out_index));
387     }
388   }
389 }
390 
AdjustModelOutputTensorInitRefCount(const lite::Model * model)391 void LiteSession::AdjustModelOutputTensorInitRefCount(const lite::Model *model) {
392   MS_ASSERT(model != nullptr);
393   auto graph_out_size = model->graph_.output_indices_.size();
394   for (size_t i = 0; i < graph_out_size; ++i) {
395     size_t graph_out_index = model->graph_.output_indices_[i];
396     MS_ASSERT(graph_out_index < this->tensors_.size());
397     auto *out_tensor = this->tensors_.at(graph_out_index);
398     if (out_tensor == nullptr) {
399       MS_LOG(ERROR) << "out_tensor is null!";
400       return;
401     }
402     out_tensor->set_init_ref_count(out_tensor->init_ref_count() + 1);
403   }
404 }
405 
InitGraphInOutTensorsMap(const lite::Model * model)406 void LiteSession::InitGraphInOutTensorsMap(const lite::Model *model) {
407   InitGraphInputMSTensors();
408   InitGraphInputMap(model);
409   InitGraphOutputNodeMap(model);
410   InitGraphOutputTensorMap(model);
411 }
412 
IsolateOutputTensor()413 int LiteSession::IsolateOutputTensor() {
414   for (Tensor *src_tensor : outputs_) {
415     if (src_tensor->IsGraphInput()) {
416       continue;
417     }
418     Tensor *new_tensor =
419       new Tensor(src_tensor->data_type(), src_tensor->shape(), src_tensor->format(), Tensor::GRAPH_OUTPUT);
420     if (new_tensor == nullptr) {
421       MS_LOG(ERROR) << "duplicate new outptu failed.";
422       return RET_NULL_PTR;
423     }
424     new_tensor->set_allocator(src_tensor->allocator()); /* GPU use opencl allocator */
425     new_tensor->set_tensor_name(src_tensor->tensor_name() + "_duplicate");
426     for (LiteQuantParam quant : src_tensor->quant_params()) {
427       new_tensor->AddQuantParam(quant);
428     }
429     new_tensor->set_init_ref_count(src_tensor->init_ref_count());
430 
431     /* src tensor set for graph calculate */
432     if (src_tensor->data_type() == kNumberTypeFloat16) {
433       src_tensor->set_data_type(kNumberTypeFloat32);
434     }
435     src_tensor->set_ref_count(1);
436 
437     graph_output_map_.insert(std::make_pair(new_tensor, src_tensor));
438 
439     /* set new tensor for calculate */
440     for (auto subgraph : kernels_) {
441       /* subgraph input and output */
442       for (size_t i = 0; i < subgraph->in_tensors().size(); i++) {
443         if (subgraph->in_tensors()[i] == src_tensor) {
444           subgraph->set_in_tensor(new_tensor, i);
445         }
446       }
447       for (size_t i = 0; i < subgraph->out_tensors().size(); i++) {
448         if (subgraph->out_tensors()[i] == src_tensor) {
449           subgraph->set_out_tensor(new_tensor, i);
450         }
451       }
452 #ifndef DELEGATE_CLIP
453       if (subgraph->desc().arch == kernel::kDelegate) {
454         continue;
455       }
456 #endif
457       /* node input and output */
458       auto nodes = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
459       for (size_t i = 0; i < nodes.size(); i++) {
460         auto node = nodes[i];
461         for (size_t j = 0; j < node->out_tensors().size(); j++) {
462           if (node->out_tensors()[j] == src_tensor) {
463             node->set_out_tensor(new_tensor, j);
464           }
465         }
466         for (size_t j = 0; j < node->in_tensors().size(); j++) {
467           if (node->in_tensors()[j] == src_tensor) {
468             node->set_in_tensor(new_tensor, j);
469           }
470         }
471       }
472     }
473   }
474   return RET_OK;
475 }
476 
FreePackOpWeight(const std::vector<kernel::LiteKernel * > & kernels)477 void LiteSession::FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kernels) {
478   for (auto *kernel : kernels) {
479     MS_ASSERT(kernel != nullptr);
480     if (kernel->subgraph_type() == kernel::kNotSubGraph) {
481       if (!IsPackedOp(kernel->type())) {
482         continue;
483       }
484     } else {
485       auto subgraph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
486       FreePackOpWeight(subgraph->nodes());
487     }
488     auto inputs = kernel->in_tensors();
489     for (auto *tensor : inputs) {
490       MS_ASSERT(tensor != nullptr);
491       if (!tensor->IsConst()) {
492         continue;
493       }
494       tensor->FreeData();
495     }
496   }
497 }
498 
CompileGraph(Model * model)499 int LiteSession::CompileGraph(Model *model) {
500   bool expected = false;
501   if (!is_running_.compare_exchange_strong(expected, true)) {
502     MS_LOG(ERROR) << "Not support multi-threading";
503     return RET_ERROR;
504   }
505   // model.MetaGraph ==> kernels
506   if (model == nullptr) {
507     MS_LOG(ERROR) << "The input model is nullptr.";
508     is_running_.store(false);
509     return RET_PARAM_INVALID;
510   }
511   if (model->buf == nullptr) {
512     MS_LOG(ERROR) << "The input model buf is nullptr.";
513     is_running_.store(false);
514     return RET_PARAM_INVALID;
515   }
516   if (!reinterpret_cast<LiteModel *>(model)->ModelVerify()) {
517     MS_LOG(ERROR) << "wrong model input, please check";
518     is_running_.store(false);
519     return RET_ERROR;
520   }
521 
522   auto ret = ConvertTensors(model);
523   if (ret != RET_OK) {
524     MS_LOG(ERROR) << "ConvertTensors failed: " << ret;
525     is_running_.store(false);
526     return ret;
527   }
528   InitGraphInputTensors(model);
529   InitGraphOutputTensors(model);
530 #ifndef ENABLE_FP16
531   if (context_->GetCpuInfo().enable_float16_) {
532     MS_LOG(WARNING) << unsupport_fp16_log;
533   }
534 #endif
535   // scheduler kernels
536   Scheduler scheduler(context_, ms_context_, model, &tensors_, inputs_, outputs_, is_train_session_, execution_plan_,
537                       delegate_, delegate_device_type_);
538   scheduler.SetupSchedulerCb(std::move(sched_cb_));
539   ret = scheduler.Schedule(&kernels_);
540   if (ret != RET_OK) {
541     MS_LOG(ERROR) << "Schedule kernels failed: " << ret;
542     is_running_.store(false);
543     return ret;
544   }
545   InitGraphInOutTensorsMap(model);
546 
547   ret = PrepareKernels(model);
548   if (ret != RET_OK) {
549     MS_LOG(ERROR) << "Prepare kernels failed: " << ret;
550     is_running_.store(false);
551     return ret;
552   }
553 
554   if (is_train_session_) {
555     is_running_.store(false);
556     return RET_OK;
557   }
558 
559 #ifdef ENABLE_MINDRT
560   ret = IsolateOutputTensor();
561   if (ret != RET_OK) {
562     MS_LOG(ERROR) << "Isolate output tensor failed.";
563     is_running_.store(false);
564     return ret;
565   }
566   executor_ = new (std::nothrow) MindrtExecutor(&graph_output_map_);
567 #else
568   executor_ = new (std::nothrow) Executor();
569 #endif
570   if (executor_ == nullptr) {
571     MS_LOG(ERROR) << "New Executor failed";
572     is_running_.store(false);
573     return RET_ERROR;
574   }
575 
576   ret = executor_->Prepare(this->kernels_, this->inputs_, this->outputs_, context_);
577   if (ret != RET_OK) {
578     MS_LOG(ERROR) << "Prepare executor failed: " << ret;
579     is_running_.store(false);
580     return ret;
581   }
582 
583   // For reducing runtime RAM, free packop weight because packop will pack weight and will not access to origin weight
584   FreePackOpWeight(kernels_);
585 
586   is_running_.store(false);
587   return RET_OK;
588 }
589 
IsIsolatedSubGraph(kernel::LiteKernel * kernel)590 bool LiteSession::IsIsolatedSubGraph(kernel::LiteKernel *kernel) {
591   auto cur_in_tensors = kernel->in_tensors();
592   for (auto cur_kernel : this->kernels_) {
593     if (cur_kernel == kernel) {
594       continue;
595     }
596     auto out_tensors = cur_kernel->out_tensors();
597     for (auto tensor : cur_in_tensors) {
598       if (IsContain(out_tensors, tensor)) {
599         return false;
600       }
601     }
602   }
603   return true;
604 }
605 
SetAllocatorForDelegateKernels(const kernel::LiteKernel * kernel)606 int LiteSession::SetAllocatorForDelegateKernels(const kernel::LiteKernel *kernel) {
607   if (kernel == nullptr) {
608     return RET_NULL_PTR;
609   }
610   for (auto input : kernel->in_tensors()) {
611     CHECK_NULL_RETURN(input);
612     input->set_allocator(this->context_->allocator);
613   }
614   for (auto output : kernel->out_tensors()) {
615     CHECK_NULL_RETURN(output);
616     output->set_allocator(this->context_->allocator);
617   }
618   return RET_OK;
619 }
620 
PrepareKernels(Model * model)621 int LiteSession::PrepareKernels(Model *model) {
622   std::vector<kernel::LiteKernel *> all_kernels;
623   for (auto kernel : this->kernels_) {
624 #ifndef DELEGATE_CLIP
625     if (kernel->desc().arch == kernel::kDelegate) {
626       all_kernels.push_back(kernel);
627       continue;
628     }
629 #endif
630     auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
631     MS_ASSERT(sub_graph != nullptr);
632     auto kernel_in_subgraph = sub_graph->nodes();
633     all_kernels.insert(all_kernels.end(), kernel_in_subgraph.begin(), kernel_in_subgraph.end());
634   }
635 
636   // find in_kernels and out_kernels for kernels
637   kernel::LiteKernelUtil::FindAllInoutKernels(all_kernels);
638 
639   // find in_sub and out_sub for subgraph
640   kernel::LiteKernelUtil::FindAllInoutKernels(this->kernels_);
641 
642   // init init_ref_count for subgraphs and kernels
643   for (auto *kernel : this->kernels_) {
644     kernel->InitOutTensorInitRefCount();
645 #ifndef DELEGATE_CLIP
646     if (kernel->desc().arch == kernel::kDelegate) {
647       continue;
648     }
649 #endif
650     if (IsIsolatedSubGraph(kernel)) {
651       static_cast<kernel::SubGraphKernel *>(kernel)->InitInputTensorInitRefCount();
652     }
653   }
654   AdjustModelOutputTensorInitRefCount(model);
655   for (auto kernel : this->kernels_) {
656     if (kernel->desc().arch == kernel::kDelegate) {
657       auto ret = SetAllocatorForDelegateKernels(kernel);
658       if (ret != RET_OK) {
659         MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret;
660         return ret;
661       }
662     }
663     auto ret = kernel->Prepare();
664     if (ret != RET_OK) {
665       MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret;
666       return ret;
667     }
668   }
669   return RET_OK;
670 }
671 
GetInputs() const672 std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputs() const { return this->input_vec_; }
673 
RunGraph(const KernelCallBack & before,const KernelCallBack & after)674 int LiteSession::RunGraph(const KernelCallBack &before, const KernelCallBack &after) {
675   bool expected = false;
676   if (!is_running_.compare_exchange_strong(expected, true)) {
677     MS_LOG(ERROR) << "Not support multi-threading";
678     return RET_ERROR;
679   }
680   STATUS ret = CheckTensorsInvalid(inputs_);
681   if (ret != RET_OK) {
682     MS_LOG(ERROR) << "CheckInputs failed.";
683     return ret;
684   }
685   MS_ASSERT(this->context_ != nullptr);
686   if (before == nullptr && after == nullptr) {
687     ret = executor_->Run(this->inputs_, this->outputs_, this->kernels_);
688   } else {
689     ret = executor_->Run(this->inputs_, this->outputs_, this->kernels_, before, after);
690   }
691   if (ret != RET_OK) {
692     MS_LOG(ERROR) << "RunGraph failed : " << ret;
693   }
694   is_running_.store(false);
695   return ret;
696 }
697 
Init(InnerContext * context)698 int LiteSession::Init(InnerContext *context) {
699   bool expected = false;
700   if (!is_running_.compare_exchange_strong(expected, true)) {
701     MS_LOG(ERROR) << "Not support multi-threading";
702     return RET_ERROR;
703   }
704   if (context == nullptr) {
705     MS_LOG(ERROR) << "context is nullptr";
706     is_running_.store(false);
707     return RET_NULL_PTR;
708   }
709   this->context_ = context;
710 
711   auto ret = this->context_->Init();
712   if (ret != RET_OK) {
713     MS_LOG(ERROR) << "Init Context failed";
714     is_running_.store(false);
715     return ret;
716   }
717 
718 #ifdef MS_COMPILE_IOS
719   context_->thread_pool()->SetMaxSpinCount(kDefaulLiteIosSpinCount);
720   context_->thread_pool()->SetMinSpinCount(kDefaulLiteIosSpinCount);
721 #endif
722 
723   if (context->delegate != nullptr) {
724 #ifndef DELEGATE_CLIP
725     delegate_ = context->delegate;
726     delegate_device_type_ = -1;
727 #else
728     MS_LOG(ERROR) << unsupport_delegate_log;
729     is_running_.store(false);
730     return RET_NOT_SUPPORT;
731 #endif
732   }
733   ms_context_ = MSContextFromContext(context);
734   if (ms_context_ == nullptr) {
735     MS_LOG(ERROR) << "transfer context to ms context failed.";
736     is_running_.store(false);
737     return RET_NULL_PTR;
738   }
739 #ifndef DELEGATE_CLIP
740 #if SUPPORT_NPU
741   if (delegate_ == nullptr && context_->IsNpuEnabled()) {
742     delegate_ = std::make_shared<NPUDelegate>(context_->GetNpuInfo());
743     if (delegate_ == nullptr) {
744       MS_LOG(ERROR) << "New delegate_ failed";
745       return RET_ERROR;
746     }
747     delegate_device_type_ = DT_NPU;
748     this->context_->delegate = delegate_;
749   }
750 #endif
751 #if GPU_TENSORRT
752   if (delegate_ == nullptr && context_->IsGpuEnabled()) {
753     delegate_ = std::make_shared<TensorRTDelegate>(ms_context_);
754     if (delegate_ == nullptr) {
755       MS_LOG(ERROR) << "New tensorrt delegate_ failed";
756       return RET_ERROR;
757     }
758     delegate_device_type_ = DT_GPU;
759     this->context_->delegate = delegate_;
760   }
761 #endif
762 #ifdef SUPPORT_NNRT
763   if (delegate_ == nullptr && context_->IsNNRtEnabled()) {
764     delegate_ = std::make_shared<NNRTDelegate>();
765     if (delegate_ == nullptr) {
766       MS_LOG(ERROR) << "New NNRt delegate failed";
767       return RET_ERROR;
768     }
769     delegate_device_type_ = DT_NNRT;
770     this->context_->delegate = delegate_;
771   }
772 #endif
773   if (delegate_ != nullptr) {
774     auto delegate_ret = delegate_->Init();
775     if (delegate_ret == mindspore::kLiteNotSupport) {
776       MS_LOG(DEBUG) << "Delegate is unsupported";
777       delegate_.reset();
778       delegate_ = nullptr;
779     } else if (delegate_ret == mindspore::kSuccess) {
780       MS_LOG(INFO) << "Delegate init successfully";
781     } else {
782       MS_LOG(ERROR) << "Delegate init failed";
783       return RET_ERROR;
784     }
785   }
786 #endif
787   ret = InitGPURuntime();
788   if (ret != RET_OK) {
789     MS_LOG(ERROR) << "Init GPU runtime failed.";
790     is_running_.store(false);
791     return ret;
792   }
793   is_running_.store(false);
794   return RET_OK;
795 }
796 
BindThread(bool if_bind)797 void LiteSession::BindThread(bool if_bind) {
798   // Abandoned code
799   // Bind thread in executor
800   return;
801 }
802 
~LiteSession()803 LiteSession::~LiteSession() {
804   delegate_.reset();
805   bool expected = false;
806   if (!is_running_.compare_exchange_strong(expected, true)) {
807     MS_LOG(ERROR) << "Not support multi-threading";
808     return;
809   }
810   for (auto *kernel : kernels_) {
811     delete kernel;
812     kernel = nullptr;
813   }
814   for (auto tensor : tensors_) {
815     if (tensor == nullptr) {
816       continue;
817     }
818     // Data of const tensor which doesn't own data will not freed.
819     // Such as const data from meta_graph which will be freed when freeing meta_graph.
820     if (tensor->IsConst() && !tensor->own_data()) {
821       tensor->set_data(nullptr);
822     }
823 
824     /* situation : user set graph-output-tensor data */
825     if (tensor->IsGraphOutput() && tensor->allocator() == nullptr) {
826       tensor->set_data(nullptr);
827     }
828     delete tensor;
829     tensor = nullptr;
830   }
831 
832   for (auto item : graph_output_map_) {
833     auto isolate_output_tensor = item.first;
834     isolate_output_tensor->set_data(nullptr);
835     delete isolate_output_tensor;
836     isolate_output_tensor = nullptr;
837   }
838 
839   // Tensor * in input_map output_map are freed in tensors
840   input_map_.clear();
841   output_node_map_.clear();
842   output_tensor_map_.clear();
843   input_vec_.clear();
844   graph_output_map_.clear();
845 
846   delete this->executor_;
847   this->executor_ = nullptr;
848 #if GPU_OPENCL
849   delete opencl_runtime_wrapper_;
850 #endif
851   delete ms_context_;
852   ms_context_ = nullptr;
853   delete this->context_;
854   this->context_ = nullptr;
855   delete (model_);
856   is_running_.store(false);
857 }
858 
GetInputsByTensorName(const std::string & name) const859 mindspore::tensor::MSTensor *LiteSession::GetInputsByTensorName(const std::string &name) const {
860   auto ret = input_map_.find(name);
861   if (ret == input_map_.end()) {
862     MS_LOG(WARNING) << "Tensor  " << name << " is not exist";
863     return nullptr;
864   }
865   return ret->second;
866 }
867 
GetOutputsByNodeName(const std::string & node_name) const868 std::vector<mindspore::tensor::MSTensor *> LiteSession::GetOutputsByNodeName(const std::string &node_name) const {
869   auto ret = output_node_map_.find(node_name);
870   if (ret == output_node_map_.end()) {
871     MS_LOG(WARNING) << "Node  " << node_name << " is not an output node";
872     std::vector<mindspore::tensor::MSTensor *> empty_ret;
873     return empty_ret;
874   }
875   return ret->second;
876 }
877 
GetOutputTensorNames() const878 std::vector<std::string> LiteSession::GetOutputTensorNames() const { return this->output_tensor_names_; }
879 
GetOutputByTensorName(const std::string & tensor_name) const880 mindspore::tensor::MSTensor *LiteSession::GetOutputByTensorName(const std::string &tensor_name) const {
881   auto ret = output_tensor_map_.find(tensor_name);
882   if (ret == output_tensor_map_.end()) {
883     MS_LOG(WARNING) << "Tensor  " << tensor_name << " is not an output node";
884     return nullptr;
885   }
886   return ret->second;
887 }
888 
GetOutputs() const889 std::unordered_map<std::string, mindspore::tensor::MSTensor *> LiteSession::GetOutputs() const {
890   return this->output_tensor_map_;
891 }
892 
ResizeInputs(const std::vector<mindspore::tensor::MSTensor * > & inputs,const std::vector<std::vector<int>> & dims)893 int LiteSession::ResizeInputs(const std::vector<mindspore::tensor::MSTensor *> &inputs,
894                               const std::vector<std::vector<int>> &dims) {
895   if (inputs.size() != inputs_.size()) {
896     MS_LOG(ERROR) << "Inputs size " << inputs.size() << " is not equal to " << inputs_.size();
897     return RET_PARAM_INVALID;
898   }
899 
900   if (dims.size() != inputs.size()) {
901     MS_LOG(ERROR) << "Input dims size " << dims.size() << " is not equal to the inputs size " << inputs.size();
902     return RET_PARAM_INVALID;
903   }
904 
905   for (size_t i = 0; i < inputs.size(); ++i) {
906     if (inputs[i] != inputs_[i]) {
907       MS_LOG(ERROR) << "Input[" << i << "] tensor is not equal to the inputs have been saved!";
908       return RET_PARAM_INVALID;
909     }
910     inputs_[i]->FreeData();
911     inputs_[i]->set_shape(dims[i]);
912   }
913   if (!is_train_session_) {
914     executor_->Resize(inputs, dims);
915   }
916   return RET_OK;
917 }
918 
ResetInputsShape(const std::vector<std::vector<int>> & dims)919 void LiteSession::ResetInputsShape(const std::vector<std::vector<int>> &dims) {
920   for (size_t i = 0; i < inputs_.size(); ++i) {
921     inputs_[i]->FreeData();
922     inputs_[i]->set_shape(dims[i]);
923   }
924 }
925 
ReSizeKernels(const std::vector<kernel::LiteKernel * > & kernels)926 int LiteSession::ReSizeKernels(const std::vector<kernel::LiteKernel *> &kernels) {
927   for (auto kernel : kernels) {
928     if (kernel == nullptr) {
929       MS_LOG(ERROR) << "input kernel is nullptr!";
930       return RET_ERROR;
931     }
932     auto ret = RET_OK;
933 #ifndef DELEGATE_CLIP
934     if (kernel->desc().arch == kernel::kDelegate) {
935       ret = kernel->ReSize();
936     } else {
937 #endif
938       if (kernel->subgraph_type() == kernel::kGpuFp16SubGraph || kernel->subgraph_type() == kernel::kGpuFp32SubGraph) {
939 #if GPU_OPENCL
940         auto sub_graph = reinterpret_cast<kernel::OpenCLSubGraph *>(kernel);
941         ret = sub_graph->ReSize(false);
942 #endif
943       } else {
944         auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
945         ret = sub_graph->ReSize();
946       }
947 #ifndef DELEGATE_CLIP
948     }
949 #endif
950     if (ret == RET_INFER_INVALID) {
951       MS_LOG(INFO) << "InferShape is interrupted";
952       continue;
953     }
954     if (ret != RET_OK) {
955       MS_LOG(ERROR) << "ReSize node " << kernel->name() << " failed";
956       return RET_ERROR;
957     }
958   }
959   return RET_OK;
960 }
961 
Resize(const std::vector<mindspore::tensor::MSTensor * > & inputs,const std::vector<std::vector<int>> & dims)962 int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs,
963                         const std::vector<std::vector<int>> &dims) {
964   bool expected = false;
965   if (!is_running_.compare_exchange_strong(expected, true)) {
966     MS_LOG(ERROR) << "Not support multi-threading";
967     return RET_ERROR;
968   }
969   std::vector<std::vector<int>> old_dims;
970   for (size_t i = 0; i < inputs_.size(); ++i) {
971     old_dims.push_back(inputs_[i]->shape());
972   }
973   auto ret = ResizeInputs(inputs, dims);
974   if (ret != RET_OK) {
975     ResetInputsShape(old_dims);
976     is_running_.store(false);
977     return ret;
978   }
979 
980   ret = ReSizeKernels(kernels_);
981   if (ret != RET_OK) {
982     ResetInputsShape(old_dims);
983     auto resize_ret = ReSizeKernels(kernels_);
984     if (resize_ret != RET_OK) {
985       MS_LOG(ERROR) << "restore kernel size fail!ret: " << resize_ret;
986     }
987     is_running_.store(false);
988     return ret;
989   }
990   is_running_.store(false);
991   return RET_OK;
992 }
993 
InitGPURuntime()994 int LiteSession::InitGPURuntime() {
995   if (context_->IsCpuEnabled()) {
996     CpuBindMode cpu_bind_mode = context_->GetCpuDeviceInfo()->cpu_bind_mode_;
997     ThreadPool *thread_pool = this->context_->thread_pool();
998     if (thread_pool == nullptr) {
999       MS_LOG(ERROR) << "thread pool is nullptr";
1000       is_running_.store(false);
1001       return RET_NULL_PTR;
1002     }
1003     thread_pool->SetProcessAffinity(static_cast<BindMode>(cpu_bind_mode));
1004   }
1005 #if GPU_OPENCL
1006   if (this->context_->IsGpuEnabled()) {
1007     opencl_runtime_wrapper_ = new (std::nothrow) opencl::OpenCLRuntimeInnerWrapper();
1008     if (opencl_runtime_wrapper_ == nullptr) {
1009       MS_LOG(ERROR) << "create OpenCLRuntimeInnerWrapper failed";
1010       return RET_ERROR;
1011     }
1012     auto gpu_device_info = this->context_->GetGpuInfo();
1013     auto opencl_runtime = opencl_runtime_wrapper_->GetInstance();
1014     opencl_runtime->SetFp16Enable(gpu_device_info.enable_float16_);
1015     if (opencl_runtime->Init() != RET_OK) {
1016       this->context_->device_list_ = {{DT_CPU, {gpu_device_info.enable_float16_, MID_CPU}}};
1017       MS_LOG(WARNING) << "Init OpenCL runtime failed, change to CPU mode.";
1018     } else {
1019       MS_LOG(INFO) << "Init OpenCL runtime success.";
1020     }
1021 
1022     /* check chip support shared memory */
1023     auto enable_arm_import_memory = opencl_runtime->isExtensionEnable(EXT_ARM_IMPORT_MEMORY_HOST);
1024     if (!enable_arm_import_memory) {
1025       MS_LOG(WARNING) << "GPU do not support shared memory!";
1026     }
1027   }
1028 #endif
1029   // Setting the binding core will affect the opencl drive scheduling.
1030   if (context_->IsCpuEnabled()) {
1031     ThreadPool *thread_pool = this->context_->thread_pool();
1032     thread_pool->SetProcessAffinity(static_cast<BindMode>(NO_BIND));
1033   }
1034   return RET_OK;
1035 }
1036 }  // namespace lite
1037 
CreateSession(const lite::Context * context)1038 session::LiteSession *session::LiteSession::CreateSession(const lite::Context *context) {
1039   if (context == nullptr) {
1040     return nullptr;
1041   }
1042 
1043   auto session = new (std::nothrow) lite::LiteSession();
1044   if (session == nullptr) {
1045     MS_LOG(ERROR) << "create session failed";
1046     return nullptr;
1047   }
1048 
1049   mindspore::lite::InnerContext *inner_context = new (std::nothrow) mindspore::lite::InnerContext(context);
1050   if (inner_context == nullptr) {
1051     MS_LOG(ERROR) << "new inner context failed";
1052     delete session;
1053     return nullptr;
1054   }
1055 
1056   auto ret = session->Init(inner_context);
1057   if (ret != mindspore::lite::RET_OK) {
1058     MS_LOG(ERROR) << "init session failed";
1059     delete session;
1060     return nullptr;
1061   }
1062   return session;
1063 }
1064 
CreateSession(const char * model_buf,size_t size,const lite::Context * context)1065 session::LiteSession *session::LiteSession::CreateSession(const char *model_buf, size_t size,
1066                                                           const lite::Context *context) {
1067   auto *session = LiteSession::CreateSession(context);
1068   if (session == nullptr) {
1069     MS_LOG(ERROR) << "Create session failed";
1070     return nullptr;
1071   }
1072   auto ret = reinterpret_cast<lite::LiteSession *>(session)->LoadModelAndCompileByBuf(model_buf, size);
1073   if (ret != RET_OK) {
1074     MS_LOG(ERROR) << "Init session failed";
1075     delete session;
1076     return nullptr;
1077   }
1078   return session;
1079 }
1080 
CreateSession(const std::string & model_path,const lite::Context * context)1081 session::LiteSession *lite::LiteSession::CreateSession(const std::string &model_path, const lite::Context *context) {
1082   auto *session = session::LiteSession::CreateSession(context);
1083   if (session == nullptr) {
1084     MS_LOG(ERROR) << "Create session failed";
1085     return nullptr;
1086   }
1087   auto ret = reinterpret_cast<lite::LiteSession *>(session)->LoadModelAndCompileByPath(model_path);
1088   if (ret != RET_OK) {
1089     MS_LOG(ERROR) << "Init session failed";
1090     delete session;
1091     return nullptr;
1092   }
1093   return session;
1094 }
1095 
LoadModelAndCompileByBuf(const char * model_buf,size_t buf_size)1096 int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, size_t buf_size) {
1097   auto *model = lite::ImportFromBuffer(model_buf, buf_size, true);
1098   if (model == nullptr) {
1099     MS_LOG(ERROR) << "Import model failed";
1100     return RET_ERROR;
1101   }
1102   auto ret = CompileGraph(model);
1103   if (ret != lite::RET_OK) {
1104     MS_LOG(ERROR) << "Compile model failed";
1105     model->buf = nullptr;
1106     delete model;
1107     return RET_ERROR;
1108   }
1109   model->buf = nullptr;
1110   set_model(model);
1111   return RET_OK;
1112 }
1113 
LoadModelAndCompileByPath(const std::string & model_path)1114 int lite::LiteSession::LoadModelAndCompileByPath(const std::string &model_path) {
1115   size_t model_size;
1116   auto model_buf = lite::ReadFile(model_path.c_str(), &model_size);
1117   if (model_buf == nullptr) {
1118     MS_LOG(ERROR) << "Read model file failed";
1119     return RET_ERROR;
1120   }
1121   auto *model = lite::ImportFromBuffer(model_buf, model_size, true);
1122   if (model == nullptr) {
1123     MS_LOG(ERROR) << "Import model failed";
1124     return RET_ERROR;
1125   }
1126   (reinterpret_cast<lite::LiteModel *>(model))->set_keep_model_buf(true);
1127   auto ret = CompileGraph(model);
1128   if (ret != lite::RET_OK) {
1129     MS_LOG(ERROR) << "Compile model failed";
1130     return RET_ERROR;
1131   }
1132   set_model(model);
1133   return RET_OK;
1134 }
1135 
1136 }  // namespace mindspore
1137