1 /** 2 * Copyright 2020-2023 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_LITE_SRC_RUNTIME_LITE_SESSION_H_ 18 #define MINDSPORE_LITE_SRC_RUNTIME_LITE_SESSION_H_ 19 20 #include <memory> 21 #include <vector> 22 #include <string> 23 #include <unordered_map> 24 #include <map> 25 #include <atomic> 26 #include "src/executor/kernel_exec.h" 27 #include "src/litert/lite_model.h" 28 #include "src/litert/inner_context.h" 29 #include "src/litert/runtime_allocator.h" 30 #include "schema/model_generated.h" 31 #include "src/litert/executor.h" 32 #include "src/tensor.h" 33 #include "src/tensorlist.h" 34 #include "src/common/dynamic_library_loader.h" 35 #include "include/api/delegate.h" 36 #if GPU_OPENCL 37 #include "src/litert/kernel/gpu/opencl/opencl_runtime.h" 38 #endif 39 #include "src/litert/scheduler_cb.h" 40 #include "src/executor/sub_graph_kernel.h" 41 42 #ifdef ENABLE_LITE_HELPER 43 #include "src/common/helper/infer_helpers.h" 44 #endif 45 46 namespace mindspore { 47 namespace lite { 48 class MS_API LiteSession { 49 public: 50 LiteSession(); 51 virtual ~LiteSession(); 52 static LiteSession *CreateSession(const std::shared_ptr<InnerContext> &context); 53 static LiteSession *CreateSession(const char *model_buf, size_t size, const std::shared_ptr<InnerContext> &context); 54 55 #ifdef ENABLE_LITE_HELPER 56 int LoadModelAndCompileByBuf(const char *model_buf, mindspore::ModelType model_type, const size_t &buf_size, 57 mindspore::infer::helper::InferHelpers *infer_helpers = nullptr); 58 #else 59 int LoadModelAndCompileByBuf(const char *model_buf, mindspore::ModelType model_type, const size_t &buf_size); 60 #endif 61 virtual int LoadModelAndCompileByPath(const std::string &model_path, mindspore::ModelType model_type); 62 mindspore::ModelType LoadModelByBuff(const char *model_buf, const size_t &buf_size, char **lite_buf, size_t *size, 63 mindspore::ModelType model_type); 64 virtual const char *LoadModelByPath(const std::string &file, mindspore::ModelType model_type, size_t *size, bool use_mmap); 65 virtual int Init(const std::shared_ptr<InnerContext> &context); 66 virtual int CompileGraph(Model *model); 67 virtual int Resize(const std::vector<mindspore::lite::Tensor *> &inputs, const std::vector<std::vector<int>> &dims); 68 69 virtual void BindThread(bool if_bind); 70 virtual std::vector<mindspore::lite::Tensor *> GetInputs() const; 71 virtual mindspore::lite::Tensor *GetInputsByTensorName(const std::string &name) const; 72 virtual int RunGraph(const KernelCallBack &before = nullptr, const KernelCallBack &after = nullptr); 73 virtual std::vector<mindspore::lite::Tensor *> GetOutputsByNodeName(const std::string &node_name) const; 74 virtual std::vector<std::string> GetOutputTensorNames() const; 75 virtual mindspore::lite::Tensor *GetOutputByTensorName(const std::string &tensor_name) const; 76 virtual std::unordered_map<std::string, mindspore::lite::Tensor *> GetOutputs() const; 77 virtual int BindGLTexture2DMemory(const std::map<std::string, unsigned int> &inputGLTexture, 78 std::map<std::string, unsigned int> *outputGLTexture); InitExecutionConfig(std::map<std::string,TypeId> * config)79 void InitExecutionConfig(std::map<std::string, TypeId> *config) { execution_plan_ = config; } set_model(Model * model)80 void set_model(Model *model) { this->model_ = model; } get_kernels()81 const std::vector<kernel::KernelExec *> &get_kernels() const { return this->kernels_; } get_delegate()82 const Delegate *get_delegate() const { return this->delegate_.get(); } SetConfigInfo(const std::map<std::string,std::map<std::string,std::string>> * config_info)83 void SetConfigInfo(const std::map<std::string, std::map<std::string, std::string>> *config_info) { 84 config_info_ = config_info; 85 } SetPrepareSessionFlag(bool is_prepare_session)86 void SetPrepareSessionFlag(bool is_prepare_session) { is_prepare_session_ = is_prepare_session; } GetTensors()87 const std::vector<Tensor *> &GetTensors() const { return this->tensors_; } 88 Train()89 virtual int Train() { return mindspore::lite::RET_ERROR; } IsTrain()90 virtual bool IsTrain() { return false; } Eval()91 virtual int Eval() { return mindspore::lite::RET_OK; } IsEval()92 virtual bool IsEval() { return true; } SetLearningRate(float learning_rate)93 virtual int SetLearningRate(float learning_rate) { return mindspore::lite::RET_ERROR; } GetLearningRate()94 virtual float GetLearningRate() { return 0.0; } 95 virtual int SetupVirtualBatch(int virtual_batch_multiplier, float lr = -1.0f, float momentum = -1.0f) { 96 return mindspore::lite::RET_ERROR; 97 } GetPredictions()98 virtual std::vector<lite::Tensor *> GetPredictions() const { 99 std::vector<lite::Tensor *> outputs; 100 return outputs; 101 } 102 virtual int Export(const std::string &file_name, lite::ModelType model_type = lite::MT_TRAIN, 103 lite::QuantizationType quant_type = lite::QT_DEFAULT, lite::FormatType = lite::FT_FLATBUFFERS, 104 std::vector<std::string> out_put_tensor_name = {}) { 105 return mindspore::lite::RET_ERROR; 106 } 107 virtual int Export(Buffer *model_buffer, lite::ModelType model_type = lite::MT_TRAIN, 108 lite::QuantizationType quant_type = lite::QT_DEFAULT, lite::FormatType = lite::FT_FLATBUFFERS, 109 std::vector<std::string> out_put_tensor_name = {}) { 110 return mindspore::lite::RET_ERROR; 111 } 112 virtual int ExportWeightsCollaborateWithMicro(const std::string &file_name, 113 lite::ModelType model_type = lite::MT_TRAIN, 114 lite::FormatType = lite::FT_FLATBUFFERS, bool enable_fp16 = false, 115 const std::vector<std::string> &changeable_weights_name = {}) { 116 return mindspore::lite::RET_ERROR; 117 } GetFeatureMaps()118 virtual std::vector<lite::Tensor *> GetFeatureMaps() const { 119 std::vector<lite::Tensor *> features; 120 return features; 121 } GetTrainableParams()122 virtual std::vector<lite::Tensor *> GetTrainableParams() const { 123 std::vector<lite::Tensor *> train_params; 124 return train_params; 125 } UpdateFeatureMaps(const std::vector<lite::Tensor * > & features)126 virtual int UpdateFeatureMaps(const std::vector<lite::Tensor *> &features) { return mindspore::lite::RET_ERROR; } GetGradients()127 virtual std::vector<lite::Tensor *> GetGradients() const { 128 std::vector<lite::Tensor *> gradients; 129 return gradients; 130 } ApplyGradients(const std::vector<lite::Tensor * > & gradients)131 virtual int ApplyGradients(const std::vector<lite::Tensor *> &gradients) { return mindspore::lite::RET_ERROR; } GetOptimizerParams()132 virtual std::vector<lite::Tensor *> GetOptimizerParams() const { 133 std::vector<lite::Tensor *> params; 134 return params; 135 } SetOptimizerParams(const std::vector<lite::Tensor * > & params)136 virtual int SetOptimizerParams(const std::vector<lite::Tensor *> ¶ms) { return mindspore::lite::RET_ERROR; } 137 GetKeepModelBuf()138 bool GetKeepModelBuf() { return keep_model_buf_; } 139 SetKeepModelBuf(bool keep_model_buf)140 void SetKeepModelBuf(bool keep_model_buf) { keep_model_buf_ = keep_model_buf; } 141 SetModelId(std::string id)142 void SetModelId(std::string id) { model_id_ = id; } 143 int UpdateWeights(std::vector<lite::Tensor *> modify_tensors); 144 145 protected: 146 static void ConvertTensorsQuantParam(const schema::Tensor *src_tensor, lite::Tensor *dst_tensor); 147 int CheckTensorValid(lite::Tensor *dst_tensor); 148 int ConvertTensorsData(const lite::LiteModel *model, size_t tensor_index, lite::Tensor *dst_tensor); 149 lite::Tensor *ConvertTensor(const schema::Tensor &src_tensor); 150 int ConvertTensors(const lite::Model *model); 151 void InitGraphInOutTensorsMap(const lite::Model *model); 152 void InitGraphInputTensors(const lite::Model *model); 153 void InitGraphInputMSTensors(); 154 void InitGraphOutputTensors(const lite::Model *model); 155 void InitGraphInputMap(const lite::Model *model); 156 void InitGraphOutputNodeMap(const lite::Model *model); 157 void InitGraphOutputTensorMap(const lite::Model *model); 158 int UpdateInputShapeMap(); 159 int ResizeInputs(const std::vector<mindspore::lite::Tensor *> &inputs, const std::vector<std::vector<int>> &dims); 160 int SetAllocatorForDelegateKernels(const kernel::KernelExec *kernel); 161 int PrepareKernels(const Model *model); 162 static int DrawGraph(kernel::SubGraphKernel *graph); 163 int SetTensorInitRefCount(); 164 int SetNonTaiCallSubgraphOutputInitRefCount(); 165 void SetInitRefCountOfPartialSubgraphInputs(const Model *model); 166 static int ReSizeKernels( 167 const std::vector<kernel::KernelExec *> &kernels, 168 const std::unordered_map<Tensor *, Tensor *> &isolate_input_map = std::unordered_map<Tensor *, Tensor *>()); 169 static void FreePackOpWeight(const std::vector<kernel::KernelExec *> &kernels); 170 static void MarkSharedWeight(const std::vector<kernel::KernelExec *> &kernels); 171 std::string ParseWeightPath(); 172 bool IsMmapEnable() const; 173 virtual int InitExecutor(); 174 175 private: 176 int PreCheck(Model *model); 177 void ResetInputsShape(const std::vector<std::vector<int>> &dims); 178 int InitContext(const std::shared_ptr<InnerContext> &context); 179 int CreateTensorRTDelegate(); 180 int CreateNPUDelegate(); 181 int CreateNNAPIDelegate(); 182 int CreateCoreMLDelegate(); 183 int CreateNNRTDelegate(); 184 int InitDelegate(); 185 int InitGPURuntime(); 186 int InitSharedThreadPool(); 187 int ReshapeWeightTensor(lite::Tensor *orig_tensor, lite::Tensor *new_tensor); 188 189 private: 190 int IsolateOutputTensor(); 191 bool IsIsolatedSubGraph(const kernel::KernelExec *kernel); 192 void UpdateGraphOutputMap(const std::vector<kernel::KernelExec *> &kernel); 193 void UpdateLinkInfoForIsolateOutput(); 194 void SynIsolateInOutputDataType(); 195 std::unordered_map<Tensor *, Tensor *> isolate_graph_output_map_; /* <calculate-tensor, graph-output-tensor> */ 196 std::unordered_map<Tensor *, Tensor *> isolate_input_map_; /* <calculate-tensor, src-subgraph-input-tensor> */ 197 198 private: 199 int InitRuntimeAllocator(); 200 int RuntimeAllocatorSetData(); 201 void RuntimeAllocatorInitGraphOutput(); 202 void RuntimeAllocatorInitSubgraph(); 203 virtual int RuntimeAllocatorValid(); 204 RuntimeAllocatorPtr runtime_allocator_ = nullptr; 205 206 private: 207 int InitAscend(const std::shared_ptr<InnerContext> &context); 208 209 protected: 210 std::shared_ptr<InnerContext> context_ = nullptr; 211 mindspore::Context *ms_context_ = nullptr; 212 std::vector<kernel::KernelExec *> kernels_; 213 std::vector<Tensor *> tensors_; 214 // graph input tensors 215 std::vector<Tensor *> inputs_; 216 // graph output tensors 217 std::vector<Tensor *> outputs_; 218 // graph input MSTensors 219 std::vector<mindspore::lite::Tensor *> input_vec_; 220 // graph input tensor name -- input tensors 221 std::unordered_map<std::string, mindspore::lite::Tensor *> input_map_; 222 // graph input tensor -- input tensor shape 223 std::unordered_map<Tensor *, std::vector<int>> input_shape_map_; 224 // graph output node name -- output tensors 225 std::unordered_map<std::string, std::vector<mindspore::lite::Tensor *>> output_node_map_; 226 227 std::vector<std::string> output_tensor_names_; 228 // graph output tensor name -- output tensor 229 std::unordered_map<std::string, mindspore::lite::Tensor *> output_tensor_map_; 230 231 Executor *executor_ = nullptr; 232 Model *model_ = nullptr; 233 std::atomic<bool> is_running_ = {false}; 234 bool is_train_session_ = false; 235 bool is_prepare_session_ = false; 236 friend class TransferSession; 237 #if GPU_OPENCL 238 opencl::OpenCLRuntimeInnerWrapper *opencl_runtime_wrapper_{nullptr}; 239 #endif 240 241 // In the dynamic shape scene, the flag is to indicate when to do shape-infer for kernel. If true, the shape-infer 242 // will not be called when calling 'Resize', but be done along with running. And we will decide whether to call 243 // shape-infer by judging whether existing input has changed. If false, the shape-infer will be pre-called when 244 // calling 'Resize'. And we will judge the outputs to decide whether to call shape-infer when running. Currently, the 245 // value is true only in the pure CPU scenario, at the meantime, both of 'is_control_flow_' and 'is_train_session_' 246 // are false and 'runtime_allocator_' is a nullptr. 247 bool infer_along_running_{true}; 248 int is_infershape_{RET_ERROR}; 249 bool is_control_flow_ = false; 250 bool keep_model_buf_ = false; 251 std::unique_ptr<SchedulerCb> sched_cb_; 252 std::shared_ptr<Delegate> delegate_ = nullptr; 253 int delegate_device_type_ = -1; // -1: not specified; 0: CPU; 1: GPU; 2: NPU 254 std::map<std::string, TypeId> *execution_plan_ = nullptr; 255 const std::map<std::string, std::map<std::string, std::string>> *config_info_ = nullptr; 256 std::vector<kernel::KernelExec *> non_tail_call_kernels_; 257 std::string model_id_; 258 std::string runner_id_; 259 int worker_id_; 260 bool is_shared_weight_ = false; 261 bool model_buff_changed_ = false; 262 }; 263 } // namespace lite 264 } // namespace mindspore 265 #endif // MINDSPORE_LITE_SRC_RUNTIME_LITE_SESSION_H_ 266