1 /**
2 * Copyright 2020-2021 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/lite_session.h"
18 #include <vector>
19 #include <utility>
20 #include "include/errorcode.h"
21 #include "src/common/log_adapter.h"
22 #include "src/scheduler.h"
23 #include "src/runtime/inner_allocator.h"
24 #include "src/executor.h"
25 #include "src/common/context_util.h"
26 #include "src/common/utils.h"
27 #include "src/common/prim_util.h"
28 #include "src/common/graph_util.h"
29 #include "src/common/tensor_util.h"
30 #include "src/common/file_utils.h"
31 #include "src/kernel_registry.h"
32 #include "src/lite_model.h"
33 #include "src/weight_decoder.h"
34 #include "src/lite_kernel_util.h"
35 #ifdef ENABLE_MINDRT
36 #include "src/mindrt_executor.h"
37 #endif
38 #if SUPPORT_NPU
39 #include "src/delegate/npu/npu_delegate.h"
40 #endif
41 #if GPU_OPENCL
42 #include "src/runtime/kernel/opencl/opencl_subgraph.h"
43 #endif
44 #if GPU_TENSORRT
45 #include "src/delegate/tensorrt/tensorrt_delegate.h"
46 #endif
47 #ifdef SUPPORT_NNRT
48 #include "src/delegate/nnrt/nnrt_delegate.h"
49 #endif
50 #ifndef WEIGHT_DECODE_CLIP
51 #include "tools/converter/quantizer/fse_decoder.h"
52 #endif
53 namespace mindspore {
54 namespace lite {
55 namespace {
NeedBitUppackCheck(const schema::Tensor & src_tensor)56 bool NeedBitUppackCheck(const schema::Tensor &src_tensor) {
57 if (src_tensor.enableHuffmanCode()) {
58 return true;
59 }
60 bool need_bit_unpack = src_tensor.quantParams() != nullptr && src_tensor.quantParams()->size() > 0 &&
61 src_tensor.quantParams()->Get(0) != nullptr;
62 if (need_bit_unpack) {
63 auto num_bits = src_tensor.quantParams()->Get(0)->numBits();
64 need_bit_unpack = ((num_bits >= kBitNum1 && num_bits < kBitNum8) || (num_bits > kBitNum8 && num_bits < kBitNum16));
65 }
66
67 return need_bit_unpack;
68 }
69
DecompressTensor(const schema::Tensor & src_tensor,Tensor * dst_tensor)70 int DecompressTensor(const schema::Tensor &src_tensor, Tensor *dst_tensor) {
71 MS_ASSERT(dst_tensor != nullptr);
72 #ifndef WEIGHT_DECODE_CLIP
73 if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_FSE) {
74 return quant::FSEDecoder::DeCompress(src_tensor, dst_tensor);
75 } else if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_INDEXING) {
76 return IndexingDecompress(src_tensor, dst_tensor);
77 } else if (src_tensor.weightQunatCompressType() == schema::WeightQunatCompressType_SPARSE) {
78 return SparseDecompress(src_tensor, dst_tensor);
79 }
80 #else
81 if (src_tensor.weightQunatCompressType() != schema::WeightQunatCompressType_NONE) {
82 MS_LOG(ERROR) << unsupport_weight_decode_log;
83 return RET_ERROR;
84 }
85 #endif
86 if (!NeedBitUppackCheck(src_tensor)) {
87 return RET_NO_CHANGE;
88 } else {
89 #ifndef WEIGHT_DECODE_CLIP
90 return WeightDecoder::UnPack(src_tensor, dst_tensor);
91 #else
92 MS_LOG(ERROR) << unsupport_weight_decode_log;
93 return RET_ERROR;
94 #endif
95 }
96 }
97 } // namespace
98
LiteSession()99 LiteSession::LiteSession() { this->is_running_.store(false); }
100
ConvertTensorsQuantParam(const schema::Tensor * src_tensor,lite::Tensor * dst_tensor)101 void LiteSession::ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lite::Tensor *dst_tensor) {
102 MS_ASSERT(src_tensor != nullptr);
103 MS_ASSERT(dst_tensor != nullptr);
104 auto quant_params = src_tensor->quantParams();
105 if (quant_params != nullptr) {
106 for (size_t j = 0; j < quant_params->size(); j++) {
107 auto quant_param = quant_params->Get(j);
108 LiteQuantParam quant_arg{};
109 if (quant_param == nullptr) {
110 quant_arg.inited = false;
111 } else {
112 quant_arg.inited = true;
113 quant_arg.bitNum = quant_param->numBits();
114 quant_arg.scale = quant_param->scale();
115 quant_arg.zeroPoint = quant_param->zeroPoint();
116 quant_arg.var_corr = quant_param->varCorr();
117 quant_arg.mean_corr = quant_param->meanCorr();
118 quant_arg.roundType = quant_param->roundType();
119 quant_arg.multiplier = quant_param->multiplier();
120 quant_arg.dstDtype = quant_param->dstDtype();
121 }
122 dst_tensor->AddQuantParam(quant_arg);
123 }
124 }
125 auto quant_clusters = src_tensor->quantClusters();
126 if (quant_clusters != nullptr) {
127 std::vector<float> clusters;
128 for (size_t j = 0; j < quant_clusters->size(); j++) {
129 clusters.push_back(quant_clusters->Get(j));
130 }
131 dst_tensor->set_quant_clusters(clusters);
132 }
133 }
134
ConvertTensorsData(const lite::Model * model,size_t tensor_index,const schema::Tensor * src_tensor,lite::Tensor * dst_tensor)135 int LiteSession::ConvertTensorsData(const lite::Model *model, size_t tensor_index, const schema::Tensor *src_tensor,
136 lite::Tensor *dst_tensor) {
137 MS_ASSERT(src_tensor != nullptr);
138 MS_ASSERT(dst_tensor != nullptr);
139 if (src_tensor->data() == nullptr || src_tensor->data()->size() <= 0) {
140 MS_LOG(DEBUG) << "No valid data converted.";
141 return RET_OK;
142 }
143
144 /* tensor list convert */
145 if (dst_tensor->data_type() == kObjectTypeTensorType) {
146 #ifndef CONTROLFLOW_TENSORLIST_CLIP
147 auto tensor_list = reinterpret_cast<TensorList *>(dst_tensor);
148 if (tensor_list->Decode(reinterpret_cast<const int *>(src_tensor->data()->data())) != RET_OK) {
149 MS_LOG(ERROR) << "Decode tensorlist data failed";
150 return RET_ERROR;
151 }
152 return RET_OK;
153 #else
154 MS_LOG(ERROR) << unsupport_controlflow_tensorlist_log;
155 return RET_NOT_SUPPORT;
156 #endif
157 }
158
159 /* normal tensor check */
160 auto shape_info = dst_tensor->shape();
161 if (shape_info.end() !=
162 std::find_if(shape_info.begin(), shape_info.end(), [](const int shape) { return shape <= 0; })) {
163 MS_LOG(ERROR) << "Invalid shape size." << src_tensor->name()->c_str();
164 return RET_ERROR;
165 }
166
167 auto ret = DecompressTensor(*src_tensor, dst_tensor);
168 if (ret == RET_NO_CHANGE) {
169 dst_tensor->set_data(const_cast<unsigned char *>(src_tensor->data()->data()));
170 dst_tensor->set_own_data(false);
171 } else if (ret != RET_OK) {
172 MS_LOG(ERROR) << "Decompress tensor data failed: " << ret;
173 return ret;
174 }
175 return RET_OK;
176 }
177
ConvertTensor(const schema::Tensor & src_tensor)178 lite::Tensor *LiteSession::ConvertTensor(const schema::Tensor &src_tensor) {
179 int32_t data_type = src_tensor.dataType();
180 if (data_type <= kTypeUnknown || data_type >= kMonadTypeEnd) {
181 MS_LOG(ERROR) << "invalid data type. " << data_type;
182 return nullptr;
183 }
184 auto src_category = TensorCategory(&src_tensor);
185 std::vector<int> shape;
186 if (src_tensor.dims() == nullptr) {
187 MS_LOG(DEBUG) << "Dims of src_tensor is nullptr";
188 }
189 if (src_tensor.dims() != nullptr) {
190 if (src_tensor.dataType() == kObjectTypeString && src_tensor.data() != nullptr) {
191 shape.push_back(src_tensor.data()->size());
192 } else {
193 for (size_t j = 0; j < src_tensor.dims()->size(); j++) {
194 shape.push_back(src_tensor.dims()->data()[j]);
195 }
196 }
197 }
198 lite::Tensor *dst_tensor = nullptr;
199 if (TypeId(data_type) == kObjectTypeTensorType) {
200 #ifndef CONTROLFLOW_TENSORLIST_CLIP
201 dst_tensor = new (std::nothrow) TensorList(shape, std::vector<int>(), src_category);
202 // set tensor list datatype
203 auto tensor_list = reinterpret_cast<TensorList *>(dst_tensor);
204 if (src_tensor.data() != nullptr) {
205 auto tensor_data_type = TypeId(reinterpret_cast<const int *>(src_tensor.data()->data())[0]);
206 tensor_list->set_tensors_data_type(tensor_data_type);
207 }
208 #else
209 MS_LOG(ERROR) << unsupport_controlflow_tensorlist_log;
210 #endif
211 } else {
212 dst_tensor = new (std::nothrow)
213 Tensor(TypeId(data_type), shape, static_cast<mindspore::Format>(src_tensor.format()), src_category);
214 }
215 return dst_tensor;
216 }
217
ConvertTensors(const lite::Model * model)218 int LiteSession::ConvertTensors(const lite::Model *model) {
219 MS_ASSERT(model != nullptr);
220 uint32_t tensor_count = model->graph_.all_tensors_.size();
221 auto model_input_indices = model->graph_.input_indices_;
222 auto model_output_indices = model->graph_.output_indices_;
223 for (uint32_t i = 0; i < tensor_count; ++i) {
224 auto *src_tensor = static_cast<schema::Tensor *>(model->graph_.all_tensors_[i]);
225 if (src_tensor == nullptr) {
226 MS_LOG(ERROR) << i << "th tensor in model is nullptr";
227 return RET_NULL_PTR;
228 }
229 auto *dst_tensor = ConvertTensor(*src_tensor);
230 if (dst_tensor == nullptr) {
231 MS_LOG(ERROR) << "Convert new " << i << "th tensor failed!";
232 return RET_NULL_PTR;
233 }
234 auto ret = ConvertTensorsData(model, i, src_tensor, dst_tensor);
235 if (ret != RET_OK) {
236 MS_LOG(ERROR) << "Convert data of " << i << "th tensor failed";
237 delete dst_tensor;
238 return ret;
239 }
240 ConvertTensorsQuantParam(src_tensor, dst_tensor);
241 if (IsContain(model_input_indices, i)) {
242 if (dst_tensor->data() != nullptr) {
243 MS_LOG(ERROR) << "Graph input shouldn't have data";
244 delete dst_tensor;
245 return RET_ERROR;
246 }
247 dst_tensor->set_category(Tensor::GRAPH_INPUT);
248 }
249 if (IsContain(model_output_indices, i)) {
250 if (dst_tensor->data() != nullptr) {
251 MS_LOG(ERROR) << "Graph output shouldn't have data";
252 delete dst_tensor;
253 return RET_ERROR;
254 }
255 // a tensor is as both input and output, would be treated as an input.
256 if (!dst_tensor->IsGraphInput()) {
257 dst_tensor->set_category(Tensor::GRAPH_OUTPUT);
258 }
259 }
260 if (src_tensor->name() != nullptr) {
261 dst_tensor->set_tensor_name(src_tensor->name()->str());
262 }
263 this->tensors_.emplace_back(dst_tensor);
264 }
265 return RET_OK;
266 }
267
InitGraphInputTensors(const lite::Model * model)268 void LiteSession::InitGraphInputTensors(const lite::Model *model) {
269 MS_ASSERT(model != nullptr);
270 auto graph_in_size = model->graph_.input_indices_.size();
271 for (size_t i = 0; i < graph_in_size; ++i) {
272 auto in_tensor_idx = model->graph_.input_indices_[i];
273 MS_ASSERT(in_tensor_idx < this->tensors_.size());
274 auto *in_tensor = this->tensors_.at(in_tensor_idx);
275 MS_ASSERT(in_tensor != nullptr);
276 this->inputs_.emplace_back(in_tensor);
277 }
278 }
279
InitGraphInputMSTensors()280 void LiteSession::InitGraphInputMSTensors() {
281 MS_ASSERT(this->input_vec_.empty());
282 for (auto &input_tensor : this->inputs_) {
283 MS_ASSERT(input_tensor != nullptr);
284 this->input_vec_.emplace_back(input_tensor);
285 }
286 }
287
InitGraphOutputTensors(const lite::Model * model)288 void LiteSession::InitGraphOutputTensors(const lite::Model *model) {
289 MS_ASSERT(model != nullptr);
290 MS_ASSERT(this->outputs_.empty());
291 auto graph_out_size = model->graph_.output_indices_.size();
292 for (size_t i = 0; i < graph_out_size; ++i) {
293 auto out_tensor_idx = model->graph_.output_indices_[i];
294 MS_ASSERT(out_tensor_idx < this->tensors_.size());
295 auto *out_tensor = this->tensors_.at(out_tensor_idx);
296 MS_ASSERT(out_tensor != nullptr);
297 this->outputs_.emplace_back(out_tensor);
298 }
299 }
300
InitGraphInputMap(const lite::Model * model)301 void LiteSession::InitGraphInputMap(const lite::Model *model) {
302 MS_ASSERT(model != nullptr);
303 MS_ASSERT(this->input_map_.empty());
304 auto graph_input_node_indexes = GetGraphInputNodes(model);
305 auto graph_in_size = model->graph_.input_indices_.size();
306 for (auto in_node_index : graph_input_node_indexes) {
307 auto in_node = model->graph_.all_nodes_[in_node_index];
308 MS_ASSERT(in_node != nullptr);
309 auto in_size = in_node->input_indices_.size();
310 for (size_t i = 0; i < in_size; ++i) {
311 MS_ASSERT(this->input_map_.find(in_node->name_ + std::to_string(i)) == this->input_map_.end());
312 auto in_tensor_index = size_t(in_node->input_indices_[i]);
313 bool is_graph_input = false;
314 for (size_t j = 0; j < graph_in_size; ++j) {
315 if (in_tensor_index == model->graph_.input_indices_[j]) {
316 is_graph_input = true;
317 break;
318 }
319 }
320 if (!is_graph_input) {
321 continue;
322 }
323 MS_ASSERT(in_tensor_index < this->tensors_.size());
324 auto *in_tensor = this->tensors_.at(in_tensor_index);
325 if (in_tensor == nullptr) {
326 MS_LOG(ERROR) << "in_tensor is null!";
327 return;
328 }
329 auto tensor_name = in_node->name_ + std::to_string(i);
330 this->input_map_[tensor_name] = in_tensor;
331 if (!in_tensor->tensor_name().empty()) {
332 this->input_map_[in_tensor->tensor_name()] = in_tensor;
333 }
334 }
335 }
336 }
337
InitGraphOutputNodeMap(const lite::Model * model)338 void LiteSession::InitGraphOutputNodeMap(const lite::Model *model) {
339 MS_ASSERT(model != nullptr);
340 auto graph_output_node_indexes = GetGraphOutputNodes(model);
341 auto graph_out_size = model->graph_.output_indices_.size();
342 for (auto out_node_index : graph_output_node_indexes) {
343 auto out_node = model->graph_.all_nodes_[out_node_index];
344 MS_ASSERT(out_node != nullptr);
345 auto out_size = out_node->output_indices_.size();
346 for (size_t i = 0; i < out_size; ++i) {
347 auto out_tensor_index = out_node->output_indices_[i];
348 bool is_graph_output = false;
349 for (size_t j = 0; j < graph_out_size; ++j) {
350 if (out_tensor_index == model->graph_.output_indices_[j]) {
351 is_graph_output = true;
352 break;
353 }
354 }
355 if (!is_graph_output) {
356 continue;
357 }
358 MS_ASSERT(out_tensor_index < this->tensors_.size());
359 auto *out_tensor = this->tensors_.at(out_tensor_index);
360 if (out_tensor == nullptr) {
361 MS_LOG(ERROR) << "out_tensor is null!";
362 return;
363 }
364 this->output_node_map_[out_node->name_].emplace_back(out_tensor);
365 }
366 }
367 }
368
InitGraphOutputTensorMap(const lite::Model * model)369 void LiteSession::InitGraphOutputTensorMap(const lite::Model *model) {
370 MS_ASSERT(model != nullptr);
371 MS_ASSERT(this->output_tensor_map_.empty());
372 auto graph_out_size = model->graph_.output_indices_.size();
373 for (size_t i = 0; i < graph_out_size; ++i) {
374 size_t graph_out_index = model->graph_.output_indices_[i];
375 MS_ASSERT(graph_out_index < this->tensors_.size());
376 auto *out_tensor = this->tensors_.at(graph_out_index);
377 if (out_tensor == nullptr) {
378 MS_LOG(ERROR) << "out_tensor is null!";
379 return;
380 }
381 if (!out_tensor->tensor_name().empty()) {
382 this->output_tensor_map_.insert(std::make_pair(out_tensor->tensor_name(), out_tensor));
383 this->output_tensor_names_.emplace_back(out_tensor->tensor_name());
384 } else {
385 this->output_tensor_map_.insert(std::make_pair(std::to_string(graph_out_index), out_tensor));
386 this->output_tensor_names_.emplace_back(std::to_string(graph_out_index));
387 }
388 }
389 }
390
AdjustModelOutputTensorInitRefCount(const lite::Model * model)391 void LiteSession::AdjustModelOutputTensorInitRefCount(const lite::Model *model) {
392 MS_ASSERT(model != nullptr);
393 auto graph_out_size = model->graph_.output_indices_.size();
394 for (size_t i = 0; i < graph_out_size; ++i) {
395 size_t graph_out_index = model->graph_.output_indices_[i];
396 MS_ASSERT(graph_out_index < this->tensors_.size());
397 auto *out_tensor = this->tensors_.at(graph_out_index);
398 if (out_tensor == nullptr) {
399 MS_LOG(ERROR) << "out_tensor is null!";
400 return;
401 }
402 out_tensor->set_init_ref_count(out_tensor->init_ref_count() + 1);
403 }
404 }
405
InitGraphInOutTensorsMap(const lite::Model * model)406 void LiteSession::InitGraphInOutTensorsMap(const lite::Model *model) {
407 InitGraphInputMSTensors();
408 InitGraphInputMap(model);
409 InitGraphOutputNodeMap(model);
410 InitGraphOutputTensorMap(model);
411 }
412
IsolateOutputTensor()413 int LiteSession::IsolateOutputTensor() {
414 for (Tensor *src_tensor : outputs_) {
415 if (src_tensor->IsGraphInput()) {
416 continue;
417 }
418 Tensor *new_tensor =
419 new Tensor(src_tensor->data_type(), src_tensor->shape(), src_tensor->format(), Tensor::GRAPH_OUTPUT);
420 if (new_tensor == nullptr) {
421 MS_LOG(ERROR) << "duplicate new outptu failed.";
422 return RET_NULL_PTR;
423 }
424 new_tensor->set_allocator(src_tensor->allocator()); /* GPU use opencl allocator */
425 new_tensor->set_tensor_name(src_tensor->tensor_name() + "_duplicate");
426 for (LiteQuantParam quant : src_tensor->quant_params()) {
427 new_tensor->AddQuantParam(quant);
428 }
429 new_tensor->set_init_ref_count(src_tensor->init_ref_count());
430
431 /* src tensor set for graph calculate */
432 if (src_tensor->data_type() == kNumberTypeFloat16) {
433 src_tensor->set_data_type(kNumberTypeFloat32);
434 }
435 src_tensor->set_ref_count(1);
436
437 graph_output_map_.insert(std::make_pair(new_tensor, src_tensor));
438
439 /* set new tensor for calculate */
440 for (auto subgraph : kernels_) {
441 /* subgraph input and output */
442 for (size_t i = 0; i < subgraph->in_tensors().size(); i++) {
443 if (subgraph->in_tensors()[i] == src_tensor) {
444 subgraph->set_in_tensor(new_tensor, i);
445 }
446 }
447 for (size_t i = 0; i < subgraph->out_tensors().size(); i++) {
448 if (subgraph->out_tensors()[i] == src_tensor) {
449 subgraph->set_out_tensor(new_tensor, i);
450 }
451 }
452 #ifndef DELEGATE_CLIP
453 if (subgraph->desc().arch == kernel::kDelegate) {
454 continue;
455 }
456 #endif
457 /* node input and output */
458 auto nodes = reinterpret_cast<kernel::SubGraphKernel *>(subgraph)->nodes();
459 for (size_t i = 0; i < nodes.size(); i++) {
460 auto node = nodes[i];
461 for (size_t j = 0; j < node->out_tensors().size(); j++) {
462 if (node->out_tensors()[j] == src_tensor) {
463 node->set_out_tensor(new_tensor, j);
464 }
465 }
466 for (size_t j = 0; j < node->in_tensors().size(); j++) {
467 if (node->in_tensors()[j] == src_tensor) {
468 node->set_in_tensor(new_tensor, j);
469 }
470 }
471 }
472 }
473 }
474 return RET_OK;
475 }
476
FreePackOpWeight(const std::vector<kernel::LiteKernel * > & kernels)477 void LiteSession::FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kernels) {
478 for (auto *kernel : kernels) {
479 MS_ASSERT(kernel != nullptr);
480 if (kernel->subgraph_type() == kernel::kNotSubGraph) {
481 if (!IsPackedOp(kernel->type())) {
482 continue;
483 }
484 } else {
485 auto subgraph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
486 FreePackOpWeight(subgraph->nodes());
487 }
488 auto inputs = kernel->in_tensors();
489 for (auto *tensor : inputs) {
490 MS_ASSERT(tensor != nullptr);
491 if (!tensor->IsConst()) {
492 continue;
493 }
494 tensor->FreeData();
495 }
496 }
497 }
498
CompileGraph(Model * model)499 int LiteSession::CompileGraph(Model *model) {
500 bool expected = false;
501 if (!is_running_.compare_exchange_strong(expected, true)) {
502 MS_LOG(ERROR) << "Not support multi-threading";
503 return RET_ERROR;
504 }
505 // model.MetaGraph ==> kernels
506 if (model == nullptr) {
507 MS_LOG(ERROR) << "The input model is nullptr.";
508 is_running_.store(false);
509 return RET_PARAM_INVALID;
510 }
511 if (model->buf == nullptr) {
512 MS_LOG(ERROR) << "The input model buf is nullptr.";
513 is_running_.store(false);
514 return RET_PARAM_INVALID;
515 }
516 if (!reinterpret_cast<LiteModel *>(model)->ModelVerify()) {
517 MS_LOG(ERROR) << "wrong model input, please check";
518 is_running_.store(false);
519 return RET_ERROR;
520 }
521
522 auto ret = ConvertTensors(model);
523 if (ret != RET_OK) {
524 MS_LOG(ERROR) << "ConvertTensors failed: " << ret;
525 is_running_.store(false);
526 return ret;
527 }
528 InitGraphInputTensors(model);
529 InitGraphOutputTensors(model);
530 #ifndef ENABLE_FP16
531 if (context_->GetCpuInfo().enable_float16_) {
532 MS_LOG(WARNING) << unsupport_fp16_log;
533 }
534 #endif
535 // scheduler kernels
536 Scheduler scheduler(context_, ms_context_, model, &tensors_, inputs_, outputs_, is_train_session_, execution_plan_,
537 delegate_, delegate_device_type_);
538 scheduler.SetupSchedulerCb(std::move(sched_cb_));
539 ret = scheduler.Schedule(&kernels_);
540 if (ret != RET_OK) {
541 MS_LOG(ERROR) << "Schedule kernels failed: " << ret;
542 is_running_.store(false);
543 return ret;
544 }
545 InitGraphInOutTensorsMap(model);
546
547 ret = PrepareKernels(model);
548 if (ret != RET_OK) {
549 MS_LOG(ERROR) << "Prepare kernels failed: " << ret;
550 is_running_.store(false);
551 return ret;
552 }
553
554 if (is_train_session_) {
555 is_running_.store(false);
556 return RET_OK;
557 }
558
559 #ifdef ENABLE_MINDRT
560 ret = IsolateOutputTensor();
561 if (ret != RET_OK) {
562 MS_LOG(ERROR) << "Isolate output tensor failed.";
563 is_running_.store(false);
564 return ret;
565 }
566 executor_ = new (std::nothrow) MindrtExecutor(&graph_output_map_);
567 #else
568 executor_ = new (std::nothrow) Executor();
569 #endif
570 if (executor_ == nullptr) {
571 MS_LOG(ERROR) << "New Executor failed";
572 is_running_.store(false);
573 return RET_ERROR;
574 }
575
576 ret = executor_->Prepare(this->kernels_, this->inputs_, this->outputs_, context_);
577 if (ret != RET_OK) {
578 MS_LOG(ERROR) << "Prepare executor failed: " << ret;
579 is_running_.store(false);
580 return ret;
581 }
582
583 // For reducing runtime RAM, free packop weight because packop will pack weight and will not access to origin weight
584 FreePackOpWeight(kernels_);
585
586 is_running_.store(false);
587 return RET_OK;
588 }
589
IsIsolatedSubGraph(kernel::LiteKernel * kernel)590 bool LiteSession::IsIsolatedSubGraph(kernel::LiteKernel *kernel) {
591 auto cur_in_tensors = kernel->in_tensors();
592 for (auto cur_kernel : this->kernels_) {
593 if (cur_kernel == kernel) {
594 continue;
595 }
596 auto out_tensors = cur_kernel->out_tensors();
597 for (auto tensor : cur_in_tensors) {
598 if (IsContain(out_tensors, tensor)) {
599 return false;
600 }
601 }
602 }
603 return true;
604 }
605
SetAllocatorForDelegateKernels(const kernel::LiteKernel * kernel)606 int LiteSession::SetAllocatorForDelegateKernels(const kernel::LiteKernel *kernel) {
607 if (kernel == nullptr) {
608 return RET_NULL_PTR;
609 }
610 for (auto input : kernel->in_tensors()) {
611 CHECK_NULL_RETURN(input);
612 input->set_allocator(this->context_->allocator);
613 }
614 for (auto output : kernel->out_tensors()) {
615 CHECK_NULL_RETURN(output);
616 output->set_allocator(this->context_->allocator);
617 }
618 return RET_OK;
619 }
620
PrepareKernels(Model * model)621 int LiteSession::PrepareKernels(Model *model) {
622 std::vector<kernel::LiteKernel *> all_kernels;
623 for (auto kernel : this->kernels_) {
624 #ifndef DELEGATE_CLIP
625 if (kernel->desc().arch == kernel::kDelegate) {
626 all_kernels.push_back(kernel);
627 continue;
628 }
629 #endif
630 auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
631 MS_ASSERT(sub_graph != nullptr);
632 auto kernel_in_subgraph = sub_graph->nodes();
633 all_kernels.insert(all_kernels.end(), kernel_in_subgraph.begin(), kernel_in_subgraph.end());
634 }
635
636 // find in_kernels and out_kernels for kernels
637 kernel::LiteKernelUtil::FindAllInoutKernels(all_kernels);
638
639 // find in_sub and out_sub for subgraph
640 kernel::LiteKernelUtil::FindAllInoutKernels(this->kernels_);
641
642 // init init_ref_count for subgraphs and kernels
643 for (auto *kernel : this->kernels_) {
644 kernel->InitOutTensorInitRefCount();
645 #ifndef DELEGATE_CLIP
646 if (kernel->desc().arch == kernel::kDelegate) {
647 continue;
648 }
649 #endif
650 if (IsIsolatedSubGraph(kernel)) {
651 static_cast<kernel::SubGraphKernel *>(kernel)->InitInputTensorInitRefCount();
652 }
653 }
654 AdjustModelOutputTensorInitRefCount(model);
655 for (auto kernel : this->kernels_) {
656 if (kernel->desc().arch == kernel::kDelegate) {
657 auto ret = SetAllocatorForDelegateKernels(kernel);
658 if (ret != RET_OK) {
659 MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret;
660 return ret;
661 }
662 }
663 auto ret = kernel->Prepare();
664 if (ret != RET_OK) {
665 MS_LOG(ERROR) << "Prepare kernel " << kernel->name() << " failed: " << ret;
666 return ret;
667 }
668 }
669 return RET_OK;
670 }
671
GetInputs() const672 std::vector<mindspore::tensor::MSTensor *> LiteSession::GetInputs() const { return this->input_vec_; }
673
RunGraph(const KernelCallBack & before,const KernelCallBack & after)674 int LiteSession::RunGraph(const KernelCallBack &before, const KernelCallBack &after) {
675 bool expected = false;
676 if (!is_running_.compare_exchange_strong(expected, true)) {
677 MS_LOG(ERROR) << "Not support multi-threading";
678 return RET_ERROR;
679 }
680 STATUS ret = CheckTensorsInvalid(inputs_);
681 if (ret != RET_OK) {
682 MS_LOG(ERROR) << "CheckInputs failed.";
683 return ret;
684 }
685 MS_ASSERT(this->context_ != nullptr);
686 if (before == nullptr && after == nullptr) {
687 ret = executor_->Run(this->inputs_, this->outputs_, this->kernels_);
688 } else {
689 ret = executor_->Run(this->inputs_, this->outputs_, this->kernels_, before, after);
690 }
691 if (ret != RET_OK) {
692 MS_LOG(ERROR) << "RunGraph failed : " << ret;
693 }
694 is_running_.store(false);
695 return ret;
696 }
697
Init(InnerContext * context)698 int LiteSession::Init(InnerContext *context) {
699 bool expected = false;
700 if (!is_running_.compare_exchange_strong(expected, true)) {
701 MS_LOG(ERROR) << "Not support multi-threading";
702 return RET_ERROR;
703 }
704 if (context == nullptr) {
705 MS_LOG(ERROR) << "context is nullptr";
706 is_running_.store(false);
707 return RET_NULL_PTR;
708 }
709 this->context_ = context;
710
711 auto ret = this->context_->Init();
712 if (ret != RET_OK) {
713 MS_LOG(ERROR) << "Init Context failed";
714 is_running_.store(false);
715 return ret;
716 }
717
718 #ifdef MS_COMPILE_IOS
719 context_->thread_pool()->SetMaxSpinCount(kDefaulLiteIosSpinCount);
720 context_->thread_pool()->SetMinSpinCount(kDefaulLiteIosSpinCount);
721 #endif
722
723 if (context->delegate != nullptr) {
724 #ifndef DELEGATE_CLIP
725 delegate_ = context->delegate;
726 delegate_device_type_ = -1;
727 #else
728 MS_LOG(ERROR) << unsupport_delegate_log;
729 is_running_.store(false);
730 return RET_NOT_SUPPORT;
731 #endif
732 }
733 ms_context_ = MSContextFromContext(context);
734 if (ms_context_ == nullptr) {
735 MS_LOG(ERROR) << "transfer context to ms context failed.";
736 is_running_.store(false);
737 return RET_NULL_PTR;
738 }
739 #ifndef DELEGATE_CLIP
740 #if SUPPORT_NPU
741 if (delegate_ == nullptr && context_->IsNpuEnabled()) {
742 delegate_ = std::make_shared<NPUDelegate>(context_->GetNpuInfo());
743 if (delegate_ == nullptr) {
744 MS_LOG(ERROR) << "New delegate_ failed";
745 return RET_ERROR;
746 }
747 delegate_device_type_ = DT_NPU;
748 this->context_->delegate = delegate_;
749 }
750 #endif
751 #if GPU_TENSORRT
752 if (delegate_ == nullptr && context_->IsGpuEnabled()) {
753 delegate_ = std::make_shared<TensorRTDelegate>(ms_context_);
754 if (delegate_ == nullptr) {
755 MS_LOG(ERROR) << "New tensorrt delegate_ failed";
756 return RET_ERROR;
757 }
758 delegate_device_type_ = DT_GPU;
759 this->context_->delegate = delegate_;
760 }
761 #endif
762 #ifdef SUPPORT_NNRT
763 if (delegate_ == nullptr && context_->IsNNRtEnabled()) {
764 delegate_ = std::make_shared<NNRTDelegate>();
765 if (delegate_ == nullptr) {
766 MS_LOG(ERROR) << "New NNRt delegate failed";
767 return RET_ERROR;
768 }
769 delegate_device_type_ = DT_NNRT;
770 this->context_->delegate = delegate_;
771 }
772 #endif
773 if (delegate_ != nullptr) {
774 auto delegate_ret = delegate_->Init();
775 if (delegate_ret == mindspore::kLiteNotSupport) {
776 MS_LOG(DEBUG) << "Delegate is unsupported";
777 delegate_.reset();
778 delegate_ = nullptr;
779 } else if (delegate_ret == mindspore::kSuccess) {
780 MS_LOG(INFO) << "Delegate init successfully";
781 } else {
782 MS_LOG(ERROR) << "Delegate init failed";
783 return RET_ERROR;
784 }
785 }
786 #endif
787 ret = InitGPURuntime();
788 if (ret != RET_OK) {
789 MS_LOG(ERROR) << "Init GPU runtime failed.";
790 is_running_.store(false);
791 return ret;
792 }
793 is_running_.store(false);
794 return RET_OK;
795 }
796
BindThread(bool if_bind)797 void LiteSession::BindThread(bool if_bind) {
798 // Abandoned code
799 // Bind thread in executor
800 return;
801 }
802
~LiteSession()803 LiteSession::~LiteSession() {
804 delegate_.reset();
805 bool expected = false;
806 if (!is_running_.compare_exchange_strong(expected, true)) {
807 MS_LOG(ERROR) << "Not support multi-threading";
808 return;
809 }
810 for (auto *kernel : kernels_) {
811 delete kernel;
812 kernel = nullptr;
813 }
814 for (auto tensor : tensors_) {
815 if (tensor == nullptr) {
816 continue;
817 }
818 // Data of const tensor which doesn't own data will not freed.
819 // Such as const data from meta_graph which will be freed when freeing meta_graph.
820 if (tensor->IsConst() && !tensor->own_data()) {
821 tensor->set_data(nullptr);
822 }
823
824 /* situation : user set graph-output-tensor data */
825 if (tensor->IsGraphOutput() && tensor->allocator() == nullptr) {
826 tensor->set_data(nullptr);
827 }
828 delete tensor;
829 tensor = nullptr;
830 }
831
832 for (auto item : graph_output_map_) {
833 auto isolate_output_tensor = item.first;
834 isolate_output_tensor->set_data(nullptr);
835 delete isolate_output_tensor;
836 isolate_output_tensor = nullptr;
837 }
838
839 // Tensor * in input_map output_map are freed in tensors
840 input_map_.clear();
841 output_node_map_.clear();
842 output_tensor_map_.clear();
843 input_vec_.clear();
844 graph_output_map_.clear();
845
846 delete this->executor_;
847 this->executor_ = nullptr;
848 #if GPU_OPENCL
849 delete opencl_runtime_wrapper_;
850 #endif
851 delete ms_context_;
852 ms_context_ = nullptr;
853 delete this->context_;
854 this->context_ = nullptr;
855 delete (model_);
856 is_running_.store(false);
857 }
858
GetInputsByTensorName(const std::string & name) const859 mindspore::tensor::MSTensor *LiteSession::GetInputsByTensorName(const std::string &name) const {
860 auto ret = input_map_.find(name);
861 if (ret == input_map_.end()) {
862 MS_LOG(WARNING) << "Tensor " << name << " is not exist";
863 return nullptr;
864 }
865 return ret->second;
866 }
867
GetOutputsByNodeName(const std::string & node_name) const868 std::vector<mindspore::tensor::MSTensor *> LiteSession::GetOutputsByNodeName(const std::string &node_name) const {
869 auto ret = output_node_map_.find(node_name);
870 if (ret == output_node_map_.end()) {
871 MS_LOG(WARNING) << "Node " << node_name << " is not an output node";
872 std::vector<mindspore::tensor::MSTensor *> empty_ret;
873 return empty_ret;
874 }
875 return ret->second;
876 }
877
GetOutputTensorNames() const878 std::vector<std::string> LiteSession::GetOutputTensorNames() const { return this->output_tensor_names_; }
879
GetOutputByTensorName(const std::string & tensor_name) const880 mindspore::tensor::MSTensor *LiteSession::GetOutputByTensorName(const std::string &tensor_name) const {
881 auto ret = output_tensor_map_.find(tensor_name);
882 if (ret == output_tensor_map_.end()) {
883 MS_LOG(WARNING) << "Tensor " << tensor_name << " is not an output node";
884 return nullptr;
885 }
886 return ret->second;
887 }
888
GetOutputs() const889 std::unordered_map<std::string, mindspore::tensor::MSTensor *> LiteSession::GetOutputs() const {
890 return this->output_tensor_map_;
891 }
892
ResizeInputs(const std::vector<mindspore::tensor::MSTensor * > & inputs,const std::vector<std::vector<int>> & dims)893 int LiteSession::ResizeInputs(const std::vector<mindspore::tensor::MSTensor *> &inputs,
894 const std::vector<std::vector<int>> &dims) {
895 if (inputs.size() != inputs_.size()) {
896 MS_LOG(ERROR) << "Inputs size " << inputs.size() << " is not equal to " << inputs_.size();
897 return RET_PARAM_INVALID;
898 }
899
900 if (dims.size() != inputs.size()) {
901 MS_LOG(ERROR) << "Input dims size " << dims.size() << " is not equal to the inputs size " << inputs.size();
902 return RET_PARAM_INVALID;
903 }
904
905 for (size_t i = 0; i < inputs.size(); ++i) {
906 if (inputs[i] != inputs_[i]) {
907 MS_LOG(ERROR) << "Input[" << i << "] tensor is not equal to the inputs have been saved!";
908 return RET_PARAM_INVALID;
909 }
910 inputs_[i]->FreeData();
911 inputs_[i]->set_shape(dims[i]);
912 }
913 if (!is_train_session_) {
914 executor_->Resize(inputs, dims);
915 }
916 return RET_OK;
917 }
918
ResetInputsShape(const std::vector<std::vector<int>> & dims)919 void LiteSession::ResetInputsShape(const std::vector<std::vector<int>> &dims) {
920 for (size_t i = 0; i < inputs_.size(); ++i) {
921 inputs_[i]->FreeData();
922 inputs_[i]->set_shape(dims[i]);
923 }
924 }
925
ReSizeKernels(const std::vector<kernel::LiteKernel * > & kernels)926 int LiteSession::ReSizeKernels(const std::vector<kernel::LiteKernel *> &kernels) {
927 for (auto kernel : kernels) {
928 if (kernel == nullptr) {
929 MS_LOG(ERROR) << "input kernel is nullptr!";
930 return RET_ERROR;
931 }
932 auto ret = RET_OK;
933 #ifndef DELEGATE_CLIP
934 if (kernel->desc().arch == kernel::kDelegate) {
935 ret = kernel->ReSize();
936 } else {
937 #endif
938 if (kernel->subgraph_type() == kernel::kGpuFp16SubGraph || kernel->subgraph_type() == kernel::kGpuFp32SubGraph) {
939 #if GPU_OPENCL
940 auto sub_graph = reinterpret_cast<kernel::OpenCLSubGraph *>(kernel);
941 ret = sub_graph->ReSize(false);
942 #endif
943 } else {
944 auto sub_graph = reinterpret_cast<kernel::SubGraphKernel *>(kernel);
945 ret = sub_graph->ReSize();
946 }
947 #ifndef DELEGATE_CLIP
948 }
949 #endif
950 if (ret == RET_INFER_INVALID) {
951 MS_LOG(INFO) << "InferShape is interrupted";
952 continue;
953 }
954 if (ret != RET_OK) {
955 MS_LOG(ERROR) << "ReSize node " << kernel->name() << " failed";
956 return RET_ERROR;
957 }
958 }
959 return RET_OK;
960 }
961
Resize(const std::vector<mindspore::tensor::MSTensor * > & inputs,const std::vector<std::vector<int>> & dims)962 int LiteSession::Resize(const std::vector<mindspore::tensor::MSTensor *> &inputs,
963 const std::vector<std::vector<int>> &dims) {
964 bool expected = false;
965 if (!is_running_.compare_exchange_strong(expected, true)) {
966 MS_LOG(ERROR) << "Not support multi-threading";
967 return RET_ERROR;
968 }
969 std::vector<std::vector<int>> old_dims;
970 for (size_t i = 0; i < inputs_.size(); ++i) {
971 old_dims.push_back(inputs_[i]->shape());
972 }
973 auto ret = ResizeInputs(inputs, dims);
974 if (ret != RET_OK) {
975 ResetInputsShape(old_dims);
976 is_running_.store(false);
977 return ret;
978 }
979
980 ret = ReSizeKernels(kernels_);
981 if (ret != RET_OK) {
982 ResetInputsShape(old_dims);
983 auto resize_ret = ReSizeKernels(kernels_);
984 if (resize_ret != RET_OK) {
985 MS_LOG(ERROR) << "restore kernel size fail!ret: " << resize_ret;
986 }
987 is_running_.store(false);
988 return ret;
989 }
990 is_running_.store(false);
991 return RET_OK;
992 }
993
InitGPURuntime()994 int LiteSession::InitGPURuntime() {
995 if (context_->IsCpuEnabled()) {
996 CpuBindMode cpu_bind_mode = context_->GetCpuDeviceInfo()->cpu_bind_mode_;
997 ThreadPool *thread_pool = this->context_->thread_pool();
998 if (thread_pool == nullptr) {
999 MS_LOG(ERROR) << "thread pool is nullptr";
1000 is_running_.store(false);
1001 return RET_NULL_PTR;
1002 }
1003 thread_pool->SetProcessAffinity(static_cast<BindMode>(cpu_bind_mode));
1004 }
1005 #if GPU_OPENCL
1006 if (this->context_->IsGpuEnabled()) {
1007 opencl_runtime_wrapper_ = new (std::nothrow) opencl::OpenCLRuntimeInnerWrapper();
1008 if (opencl_runtime_wrapper_ == nullptr) {
1009 MS_LOG(ERROR) << "create OpenCLRuntimeInnerWrapper failed";
1010 return RET_ERROR;
1011 }
1012 auto gpu_device_info = this->context_->GetGpuInfo();
1013 auto opencl_runtime = opencl_runtime_wrapper_->GetInstance();
1014 opencl_runtime->SetFp16Enable(gpu_device_info.enable_float16_);
1015 if (opencl_runtime->Init() != RET_OK) {
1016 this->context_->device_list_ = {{DT_CPU, {gpu_device_info.enable_float16_, MID_CPU}}};
1017 MS_LOG(WARNING) << "Init OpenCL runtime failed, change to CPU mode.";
1018 } else {
1019 MS_LOG(INFO) << "Init OpenCL runtime success.";
1020 }
1021
1022 /* check chip support shared memory */
1023 auto enable_arm_import_memory = opencl_runtime->isExtensionEnable(EXT_ARM_IMPORT_MEMORY_HOST);
1024 if (!enable_arm_import_memory) {
1025 MS_LOG(WARNING) << "GPU do not support shared memory!";
1026 }
1027 }
1028 #endif
1029 // Setting the binding core will affect the opencl drive scheduling.
1030 if (context_->IsCpuEnabled()) {
1031 ThreadPool *thread_pool = this->context_->thread_pool();
1032 thread_pool->SetProcessAffinity(static_cast<BindMode>(NO_BIND));
1033 }
1034 return RET_OK;
1035 }
1036 } // namespace lite
1037
CreateSession(const lite::Context * context)1038 session::LiteSession *session::LiteSession::CreateSession(const lite::Context *context) {
1039 if (context == nullptr) {
1040 return nullptr;
1041 }
1042
1043 auto session = new (std::nothrow) lite::LiteSession();
1044 if (session == nullptr) {
1045 MS_LOG(ERROR) << "create session failed";
1046 return nullptr;
1047 }
1048
1049 mindspore::lite::InnerContext *inner_context = new (std::nothrow) mindspore::lite::InnerContext(context);
1050 if (inner_context == nullptr) {
1051 MS_LOG(ERROR) << "new inner context failed";
1052 delete session;
1053 return nullptr;
1054 }
1055
1056 auto ret = session->Init(inner_context);
1057 if (ret != mindspore::lite::RET_OK) {
1058 MS_LOG(ERROR) << "init session failed";
1059 delete session;
1060 return nullptr;
1061 }
1062 return session;
1063 }
1064
CreateSession(const char * model_buf,size_t size,const lite::Context * context)1065 session::LiteSession *session::LiteSession::CreateSession(const char *model_buf, size_t size,
1066 const lite::Context *context) {
1067 auto *session = LiteSession::CreateSession(context);
1068 if (session == nullptr) {
1069 MS_LOG(ERROR) << "Create session failed";
1070 return nullptr;
1071 }
1072 auto ret = reinterpret_cast<lite::LiteSession *>(session)->LoadModelAndCompileByBuf(model_buf, size);
1073 if (ret != RET_OK) {
1074 MS_LOG(ERROR) << "Init session failed";
1075 delete session;
1076 return nullptr;
1077 }
1078 return session;
1079 }
1080
CreateSession(const std::string & model_path,const lite::Context * context)1081 session::LiteSession *lite::LiteSession::CreateSession(const std::string &model_path, const lite::Context *context) {
1082 auto *session = session::LiteSession::CreateSession(context);
1083 if (session == nullptr) {
1084 MS_LOG(ERROR) << "Create session failed";
1085 return nullptr;
1086 }
1087 auto ret = reinterpret_cast<lite::LiteSession *>(session)->LoadModelAndCompileByPath(model_path);
1088 if (ret != RET_OK) {
1089 MS_LOG(ERROR) << "Init session failed";
1090 delete session;
1091 return nullptr;
1092 }
1093 return session;
1094 }
1095
LoadModelAndCompileByBuf(const char * model_buf,size_t buf_size)1096 int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, size_t buf_size) {
1097 auto *model = lite::ImportFromBuffer(model_buf, buf_size, true);
1098 if (model == nullptr) {
1099 MS_LOG(ERROR) << "Import model failed";
1100 return RET_ERROR;
1101 }
1102 auto ret = CompileGraph(model);
1103 if (ret != lite::RET_OK) {
1104 MS_LOG(ERROR) << "Compile model failed";
1105 model->buf = nullptr;
1106 delete model;
1107 return RET_ERROR;
1108 }
1109 model->buf = nullptr;
1110 set_model(model);
1111 return RET_OK;
1112 }
1113
LoadModelAndCompileByPath(const std::string & model_path)1114 int lite::LiteSession::LoadModelAndCompileByPath(const std::string &model_path) {
1115 size_t model_size;
1116 auto model_buf = lite::ReadFile(model_path.c_str(), &model_size);
1117 if (model_buf == nullptr) {
1118 MS_LOG(ERROR) << "Read model file failed";
1119 return RET_ERROR;
1120 }
1121 auto *model = lite::ImportFromBuffer(model_buf, model_size, true);
1122 if (model == nullptr) {
1123 MS_LOG(ERROR) << "Import model failed";
1124 return RET_ERROR;
1125 }
1126 (reinterpret_cast<lite::LiteModel *>(model))->set_keep_model_buf(true);
1127 auto ret = CompileGraph(model);
1128 if (ret != lite::RET_OK) {
1129 MS_LOG(ERROR) << "Compile model failed";
1130 return RET_ERROR;
1131 }
1132 set_model(model);
1133 return RET_OK;
1134 }
1135
1136 } // namespace mindspore
1137