1 /**
2 * Copyright 2019-2022 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <vector>
18 #include <string>
19 #include <memory>
20 #include <map>
21 #include <algorithm>
22
23 #include "extendrt/delegate/graph_executor/litert/graph_executor.h"
24 #include "tools/converter/converter_metagraph.h"
25 #include "src/litert/lite_model.h"
26 #include "src/litert/cpu_info.h"
27 #include "include/errorcode.h"
28 #include "flatbuffers/flatbuffers.h"
29 #include "extendrt/mock/lite_runtime/converters.h"
30 #include "extendrt/delegate/factory.h"
31
32 #include "tools/common/meta_graph_serializer.h"
33 #include "extendrt/utils/tensor_utils.h"
34 #include "src/common/helper/external_tensor/memory_helper.h"
35 #include "src/executor/kernel_exec.h"
36 #include "src/extendrt/delegate/graph_executor/litert/func_graph_reuse_manager.h"
37
38 namespace mindspore {
39 namespace {
40 // leave 200MB for the model struct to make sure the model will not large than 2GB
41 const size_t kOnlineExtractDataSize = 1800 * 1024 * 1024;
42 const int64_t kBufferSize = 1024;
43
LiteTensorToMSTensor(lite::Tensor * srcTensor,MSTensor * dstTensor,bool fromSession)44 Status LiteTensorToMSTensor(lite::Tensor *srcTensor, MSTensor *dstTensor, bool fromSession) {
45 auto impl = std::make_shared<LiteTensorImpl>(srcTensor);
46 if (impl == nullptr || impl->lite_tensor() == nullptr) {
47 MS_LOG(ERROR) << "Create tensor failed.";
48 return kLiteError;
49 }
50 impl->set_from_session(fromSession);
51 auto tensor = MSTensor(impl);
52 if (tensor == nullptr) {
53 MS_LOG(ERROR) << "Create tensor failed.";
54 return kLiteError;
55 }
56 *dstTensor = tensor;
57 return kSuccess;
58 }
59
LiteTensorsToMSTensors(const std::vector<mindspore::lite::Tensor * > & srcTensors,bool fromSession)60 std::vector<MSTensor> LiteTensorsToMSTensors(const std::vector<mindspore::lite::Tensor *> &srcTensors,
61 bool fromSession) {
62 std::vector<MSTensor> dstTensors;
63 dstTensors.reserve(srcTensors.size());
64 for (auto inTensor : srcTensors) {
65 MSTensor tensor;
66 auto status = LiteTensorToMSTensor(inTensor, &tensor, fromSession);
67 if (status != kSuccess) {
68 return {};
69 }
70 dstTensors.emplace_back(tensor);
71 }
72 return dstTensors;
73 }
74 } // namespace
75 const char litert_provider[] = "litert";
76
LiteRTGraphExecutor(const std::shared_ptr<mindspore::Context> & context,const ConfigInfos & config_infos)77 LiteRTGraphExecutor::LiteRTGraphExecutor(const std::shared_ptr<mindspore::Context> &context,
78 const ConfigInfos &config_infos)
79 : context_(context), config_infos_(config_infos) {
80 lite_session_ = CreateLiteSession(ContextUtils::Convert(context_.get()), config_infos_);
81 }
82
CompileGraph(const void * model_data,size_t data_size,const std::map<string,string> & compile_options,uint32_t * graph_id)83 bool LiteRTGraphExecutor::CompileGraph(const void *model_data, size_t data_size,
84 const std::map<string, string> &compile_options, uint32_t *graph_id) {
85 MS_EXCEPTION_IF_NULL(model_data);
86 MS_EXCEPTION_IF_NULL(graph_id);
87 *graph_id = 0;
88
89 if (!PlatformInstructionSetSupportCheck()) {
90 MS_LOG(ERROR) << "The platform exist don't support's instruction.";
91 return false;
92 }
93 if (lite_session_ == nullptr) {
94 MS_LOG(ERROR) << "lite session is nullptr.";
95 return false;
96 }
97 int ret = lite_session_->LoadModelAndCompileByBuf(reinterpret_cast<const char *>(model_data), kMindIR_Lite, data_size,
98 helpers_.get());
99 if (ret != lite::RET_OK) {
100 MS_LOG(ERROR) << "Load model by meta graph failed";
101 return false;
102 }
103 return true;
104 }
105
CompileGraph(const FuncGraphPtr & graph,const std::map<string,string> & compile_options,uint32_t * graph_id)106 bool LiteRTGraphExecutor::CompileGraph(const FuncGraphPtr &graph, const std::map<string, string> &compile_options,
107 uint32_t *graph_id) {
108 MS_EXCEPTION_IF_NULL(graph);
109 MS_EXCEPTION_IF_NULL(graph_id);
110 *graph_id = 0;
111
112 if (!PlatformInstructionSetSupportCheck()) {
113 MS_LOG(ERROR) << "The platform exist don't support's instruction.";
114 return false;
115 }
116 size_t data_size;
117 auto pair_result = FuncGraphReuseManager::GetInstance()->GetFbModelBuf(&data_size, &is_shared_fb_buf_, config_infos_);
118 fb_model_buf_ = pair_result.first;
119 helpers_ = pair_result.second;
120 schema::MetaGraphT *meta_graph = nullptr;
121 if (fb_model_buf_ == nullptr) {
122 auto param = std::make_shared<ConverterPara>();
123 param->fmk_type = converter::kFmkTypeMs;
124 param->save_type = kMindIR;
125 auto mutable_graph = std::const_pointer_cast<FuncGraph>(graph);
126 meta_graph = lite::ConverterToMetaGraph::Build(param, mutable_graph);
127 if (meta_graph == nullptr) {
128 MS_LOG(ERROR) << "func graph convert to meta graph failed.";
129 return false;
130 }
131 if (this->IsNeedExtractTensorData(meta_graph)) {
132 if (!this->ExtractTensorData(meta_graph)) {
133 MS_LOG(ERROR) << "Compile Large Graph failed, extract tensor data error.";
134 return false;
135 }
136 }
137 flatbuffers::FlatBufferBuilder builder(kBufferSize);
138 auto buffer = lite::MetaGraphSerializer::GetMetaGraphPackedBuff(&builder, *meta_graph, &data_size);
139 fb_model_buf_ = malloc(data_size);
140 memcpy(fb_model_buf_, buffer, data_size);
141 FuncGraphReuseManager::GetInstance()->StoreFbModelBuf(fb_model_buf_, data_size, helpers_, config_infos_);
142 } else {
143 MS_LOG(INFO) << "the graph is the same as the last time. We do not need to convert, and we can directly use the "
144 "cached model buf.";
145 }
146 if (lite_session_ == nullptr) {
147 MS_LOG(ERROR) << "lite session is nullptr.";
148 return false;
149 }
150 int ret = lite_session_->LoadModelAndCompileByBuf(reinterpret_cast<char *>(fb_model_buf_), kMindIR_Lite, data_size,
151 helpers_.get());
152 delete meta_graph;
153 meta_graph = nullptr;
154 if (ret != lite::RET_OK) {
155 MS_LOG(ERROR) << "Load model by meta graph failed";
156 return false;
157 }
158 return true;
159 }
160
RunGraph(uint32_t,const std::vector<tensor::Tensor> & inputs,std::vector<tensor::Tensor> * outputs,const std::map<string,string> & compile_options)161 bool LiteRTGraphExecutor::RunGraph(uint32_t, const std::vector<tensor::Tensor> &inputs,
162 std::vector<tensor::Tensor> *outputs,
163 const std::map<string, string> &compile_options) {
164 MS_LOG(INFO) << "LiteRTGraphExecutor::RunGraph with input and outputs";
165 MS_EXCEPTION_IF_NULL(outputs);
166 MS_EXCEPTION_IF_NULL(lite_session_);
167
168 auto input_tensors = lite_session_->GetInputs();
169 if (input_tensors.empty()) {
170 MS_LOG(EXCEPTION) << "Failed to get input tensor.";
171 }
172 if (input_tensors.size() != inputs.size()) {
173 MS_LOG(EXCEPTION) << "Wrong input size.";
174 }
175
176 std::vector<void *> old_data;
177 for (size_t i = 0; i < inputs.size(); i++) {
178 auto input = input_tensors.at(i);
179 auto &user_input = inputs.at(i);
180 if (user_input.data_type() != input->data_type()) {
181 ResetTensorData(old_data, input_tensors);
182 MS_LOG(EXCEPTION) << "Tensor " << user_input.id() << " has a different data type from input"
183 << input->tensor_name() << ".";
184 }
185 if (user_input.data_c() == nullptr) {
186 ResetTensorData(old_data, input_tensors);
187 MS_LOG(EXCEPTION) << "Tensor " << user_input.id() << " has no data.";
188 }
189 old_data.push_back(input->data());
190 if (input->data_type() == kObjectTypeString) {
191 #ifndef STRING_KERNEL_CLIP
192 std::vector<int32_t> shape =
193 TruncateShape(user_input.shape_c(), input->data_type(), user_input.DataSize(), false);
194 if (shape.empty() && !(user_input.shape_c().empty())) {
195 ResetTensorData(old_data, input_tensors);
196 MS_LOG(EXCEPTION) << "Input dims of tensor " << user_input.id() << " is invalid.";
197 }
198 input->set_shape(shape);
199 input->set_data(user_input.data_c(), false);
200 #else
201 MS_LOG(ERROR) << unsupport_string_tensor_log;
202 return kLiteError;
203 #endif
204 } else {
205 if (user_input.data_c() != input->data()) {
206 if (input->Size() != user_input.Size()) {
207 ResetTensorData(old_data, input_tensors);
208 #ifndef ENABLE_LITE_ACL
209 MS_LOG(EXCEPTION) << "Tensor " << user_input.id() << " has wrong data size.";
210 #else
211 MS_LOG(WARNING) << "Please check tensor " << user_input.id()
212 << " has been modified data size by DVPP method.";
213 std::vector<int> truncate_shape = {static_cast<int>(user_input.DataSize())};
214 input->set_shape(truncate_shape);
215 #endif
216 }
217 input->set_data(user_input.data_c(), false);
218 }
219 }
220 }
221 lite::KernelCallBack before_call_back = nullptr;
222 lite::KernelCallBack after_call_back = nullptr;
223 if (before_ != nullptr) {
224 before_call_back = [&](const std::vector<mindspore::lite::Tensor *> &before_inputs,
225 const std::vector<mindspore::lite::Tensor *> &before_outputs,
226 const MSCallBackParam &call_param) {
227 std::vector<MSTensor> inputs = LiteTensorsToMSTensors(before_inputs, true);
228 std::vector<MSTensor> outputs = LiteTensorsToMSTensors(before_outputs, true);
229 return before_(inputs, outputs, call_param);
230 };
231 }
232
233 if (after_ != nullptr) {
234 after_call_back = [&](const std::vector<mindspore::lite::Tensor *> &before_inputs,
235 const std::vector<mindspore::lite::Tensor *> &before_outputs,
236 const MSCallBackParam &call_param) {
237 std::vector<MSTensor> inputs = LiteTensorsToMSTensors(before_inputs, true);
238 std::vector<MSTensor> outputs = LiteTensorsToMSTensors(before_outputs, true);
239 return after_(inputs, outputs, call_param);
240 };
241 }
242 auto ret = lite_session_->RunGraph(before_call_back, after_call_back);
243 if (ret != kSuccess) {
244 MS_LOG(ERROR) << "Run graph failed.";
245 return false;
246 }
247 MS_LOG(DEBUG) << "Run graph success.";
248 auto res = GetLiteSessionOutputs();
249 if (res.empty()) {
250 MS_LOG(DEBUG) << "Empty outputs.";
251 return false;
252 }
253 outputs->clear();
254 *outputs = TensorUtils::MSTensorToTensor(res);
255 return true;
256 }
257
Resize(uint32_t,const std::vector<tensor::Tensor> & inputs,const std::vector<std::vector<int64_t>> & dims)258 bool LiteRTGraphExecutor::Resize(uint32_t, const std::vector<tensor::Tensor> &inputs,
259 const std::vector<std::vector<int64_t>> &dims) {
260 auto input_tensors = lite_session_->GetInputs();
261 if (input_tensors.empty()) {
262 MS_LOG(EXCEPTION) << "Failed to get input tensor.";
263 }
264 if (input_tensors.size() != inputs.size()) {
265 MS_LOG(EXCEPTION) << "Wrong input size.";
266 }
267 std::vector<std::vector<int>> user_shapes;
268 std::transform(dims.begin(), dims.end(), std::back_inserter(user_shapes), [](auto &input) {
269 std::vector<int> shape;
270 std::transform(input.begin(), input.end(), std::back_inserter(shape), [](auto s) { return static_cast<int>(s); });
271 return shape;
272 });
273 auto ret = lite_session_->Resize(input_tensors, user_shapes);
274 if (ret != kSuccess) {
275 MS_LOG(ERROR) << "lite session resize failed";
276 return false;
277 }
278 return true;
279 }
280
GetInputInfos(uint32_t)281 std::vector<tensor::Tensor> LiteRTGraphExecutor::GetInputInfos(uint32_t) {
282 if (lite_session_ == nullptr) {
283 MS_LOG(ERROR) << "Session is null.";
284 return {};
285 }
286 auto inputs = lite_session_->GetInputs();
287 std::vector<tensor::Tensor> input_tensors;
288 for (size_t i = 0; i < inputs.size(); ++i) {
289 auto type_id = inputs[i]->data_type();
290 auto shape = inputs[i]->shape();
291 std::vector<int64_t> lite_shape;
292 std::transform(shape.begin(), shape.end(), std::back_inserter(lite_shape),
293 [](int c) { return static_cast<int64_t>(c); });
294 auto tmp = tensor::Tensor(type_id, lite_shape);
295 tmp.set_name(inputs[i]->tensor_name());
296 input_tensors.push_back(tmp);
297 }
298 return input_tensors;
299 }
300
GetOutputInfos(uint32_t)301 std::vector<tensor::Tensor> LiteRTGraphExecutor::GetOutputInfos(uint32_t) {
302 auto outputs = GetLiteSessionOutputs();
303 std::vector<tensor::Tensor> output_tensors;
304 for (size_t i = 0; i < outputs.size(); ++i) {
305 auto type_id = static_cast<enum TypeId>(outputs[i].DataType());
306 auto tmp = tensor::Tensor(type_id, outputs[i].Shape());
307 tmp.set_name(outputs[i].Name());
308 output_tensors.push_back(tmp);
309 }
310 return output_tensors;
311 }
312
ResetTensorData(std::vector<void * > old_data,const std::vector<lite::Tensor * > & tensors)313 void LiteRTGraphExecutor::ResetTensorData(std::vector<void *> old_data, const std::vector<lite::Tensor *> &tensors) {
314 for (size_t j = 0; j < old_data.size(); j++) {
315 tensors.at(j)->set_data(old_data.at(j));
316 }
317 }
318
GetLiteSessionOutputs()319 std::vector<MSTensor> LiteRTGraphExecutor::GetLiteSessionOutputs() {
320 std::vector<MSTensor> empty;
321 if (lite_session_ == nullptr) {
322 MS_LOG(ERROR) << "Session is null.";
323 return empty;
324 }
325 std::vector<MSTensor> res;
326 auto names = lite_session_->GetOutputTensorNames();
327 if (names.empty()) {
328 MS_LOG(ERROR) << "The output tensor name of this model is null.";
329 return empty;
330 }
331 auto outputs = lite_session_->GetOutputs();
332 if (outputs.empty()) {
333 MS_LOG(ERROR) << "The outputs of model is null.";
334 return empty;
335 }
336 if (names.size() != outputs.size()) {
337 MS_LOG(ERROR) << "The size of outputs dose not match the size of names.";
338 return empty;
339 }
340 res.resize(names.size());
341 for (size_t i = 0; i < names.size(); i++) {
342 auto impl = std::make_shared<LiteTensorImpl>(outputs[names[i]]);
343 if (impl == nullptr || impl->lite_tensor() == nullptr) {
344 MS_LOG(ERROR) << "Create tensor failed.";
345 return empty;
346 }
347 auto tensor = MSTensor(impl);
348 if (tensor == nullptr) {
349 MS_LOG(ERROR) << "Create tensor failed.";
350 return empty;
351 }
352 res[i] = tensor;
353 }
354 return res;
355 }
356
TruncateShape(const std::vector<int64_t> & shape,TypeId type,size_t data_len,bool verify_size)357 std::vector<int32_t> LiteRTGraphExecutor::TruncateShape(const std::vector<int64_t> &shape, TypeId type, size_t data_len,
358 bool verify_size) {
359 std::vector<int32_t> empty;
360 if (shape.empty()) {
361 return empty;
362 }
363 std::vector<int32_t> truncated_shape;
364 truncated_shape.resize(shape.size());
365 size_t element_size = lite::DataTypeSize(type);
366 for (size_t i = 0; i < shape.size(); i++) {
367 auto dim = shape[i];
368 if (dim < 0 || dim > INT_MAX || (dim != 0 && element_size > INT_MAX / static_cast<size_t>(dim))) {
369 MS_LOG(ERROR) << "Invalid shape!dim: " << dim << ", element_size: " << element_size;
370 return empty;
371 } else {
372 element_size *= static_cast<size_t>(dim);
373 truncated_shape[i] = static_cast<int32_t>(dim);
374 }
375 }
376 if (verify_size) {
377 if (element_size != data_len) {
378 MS_LOG(ERROR) << "Invalid data size!element_size: " << element_size << ", data_len: " << data_len;
379 return empty;
380 }
381 }
382 return truncated_shape;
383 }
384
CreateLiteSession(const std::shared_ptr<lite::InnerContext> & context,const ConfigInfos & config_infos)385 std::shared_ptr<lite::LiteSession> LiteRTGraphExecutor::CreateLiteSession(
386 const std::shared_ptr<lite::InnerContext> &context, const ConfigInfos &config_infos) {
387 auto session = std::make_shared<lite::LiteSession>();
388 if (session == nullptr) {
389 MS_LOG(ERROR) << "create session failed";
390 return nullptr;
391 }
392 session->SetConfigInfo(&config_infos);
393
394 session->SetKeepModelBuf(true);
395 auto ret = session->Init(context);
396 if (ret != mindspore::lite::RET_OK) {
397 MS_LOG(ERROR) << "init session failed";
398 return nullptr;
399 }
400 return session;
401 }
402
ExtractTensorData(mindspore::schema::MetaGraphT * meta_graph_t)403 bool LiteRTGraphExecutor::ExtractTensorData(mindspore::schema::MetaGraphT *meta_graph_t) {
404 MS_EXCEPTION_IF_NULL(meta_graph_t);
405 helpers_ = std::make_shared<mindspore::infer::helper::InferHelpers>();
406 if (helpers_ == nullptr) {
407 MS_LOG(ERROR) << "Create InferHelpers failed.";
408 return false;
409 }
410 auto tensor_helper = new (std::nothrow) mindspore::infer::helper::MemoryExternalTensorHelper();
411 if (tensor_helper == nullptr) {
412 MS_LOG(ERROR) << "Create Memory External TensorHelper failed.";
413 return false;
414 }
415 int64_t cur_offset = 0;
416 size_t size = 0;
417 uint8_t *data = nullptr;
418 for (const auto &tensor : meta_graph_t->allTensors) {
419 if (tensor->nodeType == mindspore::lite::NodeType_CNode) {
420 continue;
421 }
422 if (tensor->dataType == kObjectTypeTensorType) { // not support control-flow now
423 continue;
424 }
425 auto *external_data_t = new (std::nothrow) schema::ExternalDataT;
426 if (external_data_t == nullptr) {
427 MS_LOG(ERROR) << "Create ExternalDataT failed";
428 return false;
429 }
430 data = tensor->data.data();
431 size = tensor->data.size();
432 external_data_t->location = "MEM: " + tensor->name;
433 external_data_t->offset = cur_offset;
434 external_data_t->length = static_cast<int64_t>(size);
435 if (data != nullptr && size > 0) {
436 std::stringstream oss;
437 oss << std::hash<char>()(data[0]);
438 external_data_t->checkSum = oss.str();
439 cur_offset += static_cast<int64_t>(size);
440 flatbuffers::FlatBufferBuilder builder(kBufferSize);
441 auto offset = mindspore::schema::ExternalData::Pack(builder, external_data_t);
442 builder.Finish(offset);
443 auto external_data = flatbuffers::GetRoot<mindspore::schema::ExternalData>(builder.GetBufferPointer());
444 tensor_helper->SetExternalTensorData(external_data, static_cast<void *>(data));
445 }
446 tensor->data.clear();
447 tensor->externalData.emplace_back(external_data_t);
448 }
449 helpers_->SetExternalTensorHelper(tensor_helper);
450 return true;
451 }
452
IsNeedExtractTensorData(mindspore::schema::MetaGraphT * meta_graph_t)453 bool LiteRTGraphExecutor::IsNeedExtractTensorData(mindspore::schema::MetaGraphT *meta_graph_t) {
454 MS_EXCEPTION_IF_NULL(meta_graph_t);
455 size_t size = 0;
456 for (auto &tensor : meta_graph_t->allTensors) {
457 size += tensor->data.size();
458 }
459 if (size >= kOnlineExtractDataSize) {
460 return true;
461 }
462 return false;
463 }
464
LiteRTGraphExecutorCreator(const std::shared_ptr<Context> & ctx,const ConfigInfos & config_infos)465 static std::shared_ptr<device::GraphExecutor> LiteRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx,
466 const ConfigInfos &config_infos) {
467 return std::make_shared<LiteRTGraphExecutor>(ctx, config_infos);
468 }
469
470 REG_DELEGATE(kCPU, litert_provider, LiteRTGraphExecutorCreator);
471 } // namespace mindspore
472