• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_LITE_SRC_RUNTIME_LITE_SESSION_H_
18 #define MINDSPORE_LITE_SRC_RUNTIME_LITE_SESSION_H_
19 
20 #include <memory>
21 #include <vector>
22 #include <string>
23 #include <unordered_map>
24 #include <map>
25 #include <atomic>
26 #include "src/executor/kernel_exec.h"
27 #include "src/litert/lite_model.h"
28 #include "src/litert/inner_context.h"
29 #include "src/litert/runtime_allocator.h"
30 #include "schema/model_generated.h"
31 #include "src/litert/executor.h"
32 #include "src/tensor.h"
33 #include "src/tensorlist.h"
34 #include "src/common/dynamic_library_loader.h"
35 #include "include/api/delegate.h"
36 #if GPU_OPENCL
37 #include "src/litert/kernel/gpu/opencl/opencl_runtime.h"
38 #endif
39 #include "src/litert/scheduler_cb.h"
40 #include "src/executor/sub_graph_kernel.h"
41 
42 #ifdef ENABLE_LITE_HELPER
43 #include "src/common/helper/infer_helpers.h"
44 #endif
45 
46 namespace mindspore {
47 namespace lite {
48 class MS_API LiteSession {
49  public:
50   LiteSession();
51   virtual ~LiteSession();
52   static LiteSession *CreateSession(const std::shared_ptr<InnerContext> &context);
53   static LiteSession *CreateSession(const char *model_buf, size_t size, const std::shared_ptr<InnerContext> &context);
54 
55 #ifdef ENABLE_LITE_HELPER
56   int LoadModelAndCompileByBuf(const char *model_buf, mindspore::ModelType model_type, const size_t &buf_size,
57                                mindspore::infer::helper::InferHelpers *infer_helpers = nullptr);
58 #else
59   int LoadModelAndCompileByBuf(const char *model_buf, mindspore::ModelType model_type, const size_t &buf_size);
60 #endif
61   virtual int LoadModelAndCompileByPath(const std::string &model_path, mindspore::ModelType model_type);
62   mindspore::ModelType LoadModelByBuff(const char *model_buf, const size_t &buf_size, char **lite_buf, size_t *size,
63                                        mindspore::ModelType model_type);
64   virtual const char *LoadModelByPath(const std::string &file, mindspore::ModelType model_type, size_t *size, bool use_mmap);
65   virtual int Init(const std::shared_ptr<InnerContext> &context);
66   virtual int CompileGraph(Model *model);
67   virtual int Resize(const std::vector<mindspore::lite::Tensor *> &inputs, const std::vector<std::vector<int>> &dims);
68 
69   virtual void BindThread(bool if_bind);
70   virtual std::vector<mindspore::lite::Tensor *> GetInputs() const;
71   virtual mindspore::lite::Tensor *GetInputsByTensorName(const std::string &name) const;
72   virtual int RunGraph(const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr);
73   virtual std::vector<mindspore::lite::Tensor *> GetOutputsByNodeName(const std::string &node_name) const;
74   virtual std::vector<std::string> GetOutputTensorNames() const;
75   virtual mindspore::lite::Tensor *GetOutputByTensorName(const std::string &tensor_name) const;
76   virtual std::unordered_map<std::string, mindspore::lite::Tensor *> GetOutputs() const;
77   virtual int BindGLTexture2DMemory(const std::map<std::string, unsigned int> &inputGLTexture,
78                                     std::map<std::string, unsigned int> *outputGLTexture);
InitExecutionConfig(std::map<std::string,TypeId> * config)79   void InitExecutionConfig(std::map<std::string, TypeId> *config) { execution_plan_ = config; }
set_model(Model * model)80   void set_model(Model *model) { this->model_ = model; }
get_kernels()81   const std::vector<kernel::KernelExec *> &get_kernels() const { return this->kernels_; }
get_delegate()82   const Delegate *get_delegate() const { return this->delegate_.get(); }
SetConfigInfo(const std::map<std::string,std::map<std::string,std::string>> * config_info)83   void SetConfigInfo(const std::map<std::string, std::map<std::string, std::string>> *config_info) {
84     config_info_ = config_info;
85   }
SetPrepareSessionFlag(bool is_prepare_session)86   void SetPrepareSessionFlag(bool is_prepare_session) { is_prepare_session_ = is_prepare_session; }
GetTensors()87   const std::vector<Tensor *> &GetTensors() const { return this->tensors_; }
88 
Train()89   virtual int Train() { return mindspore::lite::RET_ERROR; }
IsTrain()90   virtual bool IsTrain() { return false; }
Eval()91   virtual int Eval() { return mindspore::lite::RET_OK; }
IsEval()92   virtual bool IsEval() { return true; }
SetLearningRate(float learning_rate)93   virtual int SetLearningRate(float learning_rate) { return mindspore::lite::RET_ERROR; }
GetLearningRate()94   virtual float GetLearningRate() { return 0.0; }
95   virtual int SetupVirtualBatch(int virtual_batch_multiplier, float lr = -1.0f, float momentum = -1.0f) {
96     return mindspore::lite::RET_ERROR;
97   }
GetPredictions()98   virtual std::vector<lite::Tensor *> GetPredictions() const {
99     std::vector<lite::Tensor *> outputs;
100     return outputs;
101   }
102   virtual int Export(const std::string &file_name, lite::ModelType model_type = lite::MT_TRAIN,
103                      lite::QuantizationType quant_type = lite::QT_DEFAULT, lite::FormatType = lite::FT_FLATBUFFERS,
104                      std::vector<std::string> out_put_tensor_name = {}) {
105     return mindspore::lite::RET_ERROR;
106   }
107   virtual int Export(Buffer *model_buffer, lite::ModelType model_type = lite::MT_TRAIN,
108                      lite::QuantizationType quant_type = lite::QT_DEFAULT, lite::FormatType = lite::FT_FLATBUFFERS,
109                      std::vector<std::string> out_put_tensor_name = {}) {
110     return mindspore::lite::RET_ERROR;
111   }
112   virtual int ExportWeightsCollaborateWithMicro(const std::string &file_name,
113                                                 lite::ModelType model_type = lite::MT_TRAIN,
114                                                 lite::FormatType = lite::FT_FLATBUFFERS, bool enable_fp16 = false,
115                                                 const std::vector<std::string> &changeable_weights_name = {}) {
116     return mindspore::lite::RET_ERROR;
117   }
GetFeatureMaps()118   virtual std::vector<lite::Tensor *> GetFeatureMaps() const {
119     std::vector<lite::Tensor *> features;
120     return features;
121   }
GetTrainableParams()122   virtual std::vector<lite::Tensor *> GetTrainableParams() const {
123     std::vector<lite::Tensor *> train_params;
124     return train_params;
125   }
UpdateFeatureMaps(const std::vector<lite::Tensor * > & features)126   virtual int UpdateFeatureMaps(const std::vector<lite::Tensor *> &features) { return mindspore::lite::RET_ERROR; }
GetGradients()127   virtual std::vector<lite::Tensor *> GetGradients() const {
128     std::vector<lite::Tensor *> gradients;
129     return gradients;
130   }
ApplyGradients(const std::vector<lite::Tensor * > & gradients)131   virtual int ApplyGradients(const std::vector<lite::Tensor *> &gradients) { return mindspore::lite::RET_ERROR; }
GetOptimizerParams()132   virtual std::vector<lite::Tensor *> GetOptimizerParams() const {
133     std::vector<lite::Tensor *> params;
134     return params;
135   }
SetOptimizerParams(const std::vector<lite::Tensor * > & params)136   virtual int SetOptimizerParams(const std::vector<lite::Tensor *> &params) { return mindspore::lite::RET_ERROR; }
137 
GetKeepModelBuf()138   bool GetKeepModelBuf() { return keep_model_buf_; }
139 
SetKeepModelBuf(bool keep_model_buf)140   void SetKeepModelBuf(bool keep_model_buf) { keep_model_buf_ = keep_model_buf; }
141 
SetModelId(std::string id)142   void SetModelId(std::string id) { model_id_ = id; }
143   int UpdateWeights(std::vector<lite::Tensor *> modify_tensors);
144 
145  protected:
146   static void ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lite::Tensor *dst_tensor);
147   int CheckTensorValid(lite::Tensor *dst_tensor);
148   int ConvertTensorsData(const lite::LiteModel *model, size_t tensor_index, lite::Tensor *dst_tensor);
149   lite::Tensor *ConvertTensor(const schema::Tensor &src_tensor);
150   int ConvertTensors(const lite::Model *model);
151   void InitGraphInOutTensorsMap(const lite::Model *model);
152   void InitGraphInputTensors(const lite::Model *model);
153   void InitGraphInputMSTensors();
154   void InitGraphOutputTensors(const lite::Model *model);
155   void InitGraphInputMap(const lite::Model *model);
156   void InitGraphOutputNodeMap(const lite::Model *model);
157   void InitGraphOutputTensorMap(const lite::Model *model);
158   int UpdateInputShapeMap();
159   int ResizeInputs(const std::vector<mindspore::lite::Tensor *> &inputs, const std::vector<std::vector<int>> &dims);
160   int SetAllocatorForDelegateKernels(const kernel::KernelExec *kernel);
161   int PrepareKernels(const Model *model);
162   static int DrawGraph(kernel::SubGraphKernel *graph);
163   int SetTensorInitRefCount();
164   int SetNonTaiCallSubgraphOutputInitRefCount();
165   void SetInitRefCountOfPartialSubgraphInputs(const Model *model);
166   static int ReSizeKernels(
167     const std::vector<kernel::KernelExec *> &kernels,
168     const std::unordered_map<Tensor *, Tensor *> &isolate_input_map = std::unordered_map<Tensor *, Tensor *>());
169   static void FreePackOpWeight(const std::vector<kernel::KernelExec *> &kernels);
170   static void MarkSharedWeight(const std::vector<kernel::KernelExec *> &kernels);
171   std::string ParseWeightPath();
172   bool IsMmapEnable() const;
173   virtual int InitExecutor();
174 
175  private:
176   int PreCheck(Model *model);
177   void ResetInputsShape(const std::vector<std::vector<int>> &dims);
178   int InitContext(const std::shared_ptr<InnerContext> &context);
179   int CreateTensorRTDelegate();
180   int CreateNPUDelegate();
181   int CreateNNAPIDelegate();
182   int CreateCoreMLDelegate();
183   int CreateNNRTDelegate();
184   int InitDelegate();
185   int InitGPURuntime();
186   int InitSharedThreadPool();
187   int ReshapeWeightTensor(lite::Tensor *orig_tensor, lite::Tensor *new_tensor);
188 
189  private:
190   int IsolateOutputTensor();
191   bool IsIsolatedSubGraph(const kernel::KernelExec *kernel);
192   void UpdateGraphOutputMap(const std::vector<kernel::KernelExec *> &kernel);
193   void UpdateLinkInfoForIsolateOutput();
194   void SynIsolateInOutputDataType();
195   std::unordered_map<Tensor *, Tensor *> isolate_graph_output_map_; /* <calculate-tensor,  graph-output-tensor> */
196   std::unordered_map<Tensor *, Tensor *> isolate_input_map_;        /* <calculate-tensor,  src-subgraph-input-tensor> */
197 
198  private:
199   int InitRuntimeAllocator();
200   int RuntimeAllocatorSetData();
201   void RuntimeAllocatorInitGraphOutput();
202   void RuntimeAllocatorInitSubgraph();
203   virtual int RuntimeAllocatorValid();
204   RuntimeAllocatorPtr runtime_allocator_ = nullptr;
205 
206  private:
207   int InitAscend(const std::shared_ptr<InnerContext> &context);
208 
209  protected:
210   std::shared_ptr<InnerContext> context_ = nullptr;
211   mindspore::Context *ms_context_ = nullptr;
212   std::vector<kernel::KernelExec *> kernels_;
213   std::vector<Tensor *> tensors_;
214   // graph input tensors
215   std::vector<Tensor *> inputs_;
216   // graph output tensors
217   std::vector<Tensor *> outputs_;
218   // graph input MSTensors
219   std::vector<mindspore::lite::Tensor *> input_vec_;
220   // graph input tensor name -- input tensors
221   std::unordered_map<std::string, mindspore::lite::Tensor *> input_map_;
222   // graph input tensor -- input tensor shape
223   std::unordered_map<Tensor *, std::vector<int>> input_shape_map_;
224   // graph output node name -- output tensors
225   std::unordered_map<std::string, std::vector<mindspore::lite::Tensor *>> output_node_map_;
226 
227   std::vector<std::string> output_tensor_names_;
228   // graph output tensor name -- output tensor
229   std::unordered_map<std::string, mindspore::lite::Tensor *> output_tensor_map_;
230 
231   Executor *executor_ = nullptr;
232   Model *model_ = nullptr;
233   std::atomic<bool> is_running_ = {false};
234   bool is_train_session_ = false;
235   bool is_prepare_session_ = false;
236   friend class TransferSession;
237 #if GPU_OPENCL
238   opencl::OpenCLRuntimeInnerWrapper *opencl_runtime_wrapper_{nullptr};
239 #endif
240 
241   // In the dynamic shape scene, the flag is to indicate when to do shape-infer for kernel. If true, the shape-infer
242   // will not be called when calling 'Resize', but be done along with running. And we will decide whether to call
243   // shape-infer by judging whether existing input has changed. If false, the shape-infer will be pre-called when
244   // calling 'Resize'. And we will judge the outputs to decide whether to call shape-infer when running. Currently, the
245   // value is true only in the pure CPU scenario, at the meantime, both of 'is_control_flow_' and 'is_train_session_'
246   // are false and 'runtime_allocator_' is a nullptr.
247   bool infer_along_running_{true};
248   int is_infershape_{RET_ERROR};
249   bool is_control_flow_ = false;
250   bool keep_model_buf_ = false;
251   std::unique_ptr<SchedulerCb> sched_cb_;
252   std::shared_ptr<Delegate> delegate_ = nullptr;
253   int delegate_device_type_ = -1;  // -1: not specified; 0: CPU; 1: GPU; 2: NPU
254   std::map<std::string, TypeId> *execution_plan_ = nullptr;
255   const std::map<std::string, std::map<std::string, std::string>> *config_info_ = nullptr;
256   std::vector<kernel::KernelExec *> non_tail_call_kernels_;
257   std::string model_id_;
258   std::string runner_id_;
259   int worker_id_;
260   bool is_shared_weight_ = false;
261   bool model_buff_changed_ = false;
262 };
263 }  // namespace lite
264 }  // namespace mindspore
265 #endif  // MINDSPORE_LITE_SRC_RUNTIME_LITE_SESSION_H_
266