• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2020-2022 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "src/litert/inner_context.h"
17 #include <algorithm>
18 #include <memory>
19 #include "include/errorcode.h"
20 #include "src/common/log_adapter.h"
21 #include "src/common/log_util.h"
22 #ifdef ENABLE_MINDRT
23 #include "thread/actor_threadpool.h"
24 #ifndef MS_COMPILE_IOS
25 #include "thread/parallel_threadpool.h"
26 #endif
27 #endif
28 #ifdef SUPPORT_NPU
29 #include "include/HiAiModelManagerType.h"
30 #endif
31 #ifdef GPU_OPENCL
32 #include "src/litert/kernel/gpu/opencl/opencl_runtime.h"
33 #endif
34 #include "src/litert/inner_allocator.h"
35 #include "nnacl/cxx_utils.h"
36 #include "src/litert/thread_pool_reuse_manager.h"
37 
38 namespace mindspore::lite {
39 namespace {
40 const constexpr int kMaxInnerContextDeviceNums = 3;
41 const constexpr int kNumCoreNumTimes = 5;
42 constexpr int kDefaultParallelNum = 2;
43 }  // namespace
44 
InnerContext()45 InnerContext::InnerContext() {
46 #if defined(ENABLE_ARM) && defined(ENABLE_FP16)
47   CpuInfo cpu_info;
48   device_and_pkg_support_fp16_ = cpu_info.ArmIsSupportFp16();
49 #endif
50 }
51 
InitExecEnv()52 void InnerContext::InitExecEnv() {
53   exec_env_.allocator_ = this->allocator.get();
54   exec_env_.thread_pool_ = this->thread_pool_;
55   exec_env_.Alloc = nnacl::DefaultAllocatorMalloc;
56   exec_env_.Free = nnacl::DefaultAllocatorFree;
57   exec_env_.ParallelLaunch = nnacl::DefaultThreadPoolParallelLunch;
58 }
59 
CreateThreadPool(bool is_control_flow)60 int InnerContext::CreateThreadPool(bool is_control_flow) {
61   if (this->thread_pool_ == nullptr) {
62     bind_mode_ = Power_NoBind;
63     if (this->IsDeviceTypeEnabled(DT_CPU)) {
64       bind_mode_ = static_cast<BindMode>(this->GetDeviceInfo(DT_CPU).cpu_device_info_.cpu_bind_mode_);
65     }
66     this->inter_op_parallel_num_ =
67       (!this->enable_parallel_ && this->inter_op_parallel_num_ > 1) ? this->inter_op_parallel_num_ : 1;
68     actor_thread_num_ = (inter_op_parallel_num_ > 1) ? 1 : (this->enable_parallel_ ? kDefaultParallelNum : 1);
69     thread_pool_ = ThreadPoolReuseManager::GetInstance()->GetThreadPool(
70       actor_thread_num_, inter_op_parallel_num_, thread_num_, bind_mode_, affinity_core_list_, runner_id_);
71     if (thread_pool_ == nullptr) {
72 #ifdef ENABLE_MINDRT
73 #ifndef MS_COMPILE_IOS
74       if (inter_op_parallel_num_ > 1) {
75         thread_pool_ = ParallelThreadPool::CreateThreadPool(this->inter_op_parallel_num_, this->thread_num_,
76                                                             this->affinity_core_list_, bind_mode_, runner_id_);
77       } else if (thread_num_ == 1 && !IsCpuFloat16Enabled() && !is_control_flow) {
78         thread_pool_ = ThreadPool::CreateThreadPool(thread_num_ - 1);
79         thread_pool_->SetCpuAffinity(static_cast<mindspore::BindMode>(bind_mode_));
80       } else {
81 #endif
82         thread_pool_ = ActorThreadPool::CreateThreadPool(actor_thread_num_, this->thread_num_,
83                                                          this->affinity_core_list_, bind_mode_);
84 #ifndef MS_COMPILE_IOS
85       }
86 #endif
87 #else
88       thread_pool_ = ThreadPool::CreateThreadPool(thread_num_ - 1);
89       thread_pool_->SetCpuAffinity(static_cast<mindspore::BindMode>(bind_mode_));
90 #endif
91     }
92     MS_CHECK_TRUE_MSG(thread_pool_ != nullptr, RET_NULL_PTR, "Create Allocator failed");
93     InitExecEnv();
94   }
95 
96   return RET_OK;
97 }
Init()98 int InnerContext::Init() {
99   if (this->IsValid() != RET_OK) {
100     MS_LOG(ERROR) << "Context is not valid";
101     return RET_NOT_SUPPORT;
102   }
103 
104   if (this->allocator == nullptr) {
105     this->allocator = mindspore::Allocator::Create();
106     CHECK_NULL_RETURN(this->allocator);
107   }
108   if (IsDeviceTypeEnabled(DT_NPU)) {
109     MS_LOG(DEBUG) << "NPU enabled.";
110 #ifdef SUPPORT_NPU
111     for (auto &device_ctx : this->device_list_) {
112       if (device_ctx.device_type_ == DT_NPU &&
113           device_ctx.device_info_.npu_device_info_.frequency_ != hiai::AiModelDescription_Frequency_LOW &&
114           device_ctx.device_info_.npu_device_info_.frequency_ != hiai::AiModelDescription_Frequency_MEDIUM &&
115           device_ctx.device_info_.npu_device_info_.frequency_ != hiai::AiModelDescription_Frequency_HIGH &&
116           device_ctx.device_info_.npu_device_info_.frequency_ != hiai::AiModelDescription_Frequency_EXTREME) {
117         MS_LOG(WARNING) << "NPU frequency set to 3, original value "
118                         << device_ctx.device_info_.npu_device_info_.frequency_;
119         device_ctx.device_info_.npu_device_info_.frequency_ = hiai::AiModelDescription_Frequency_HIGH;
120       }
121     }
122 #endif
123   }
124 
125   if (IsDeviceTypeEnabled(DT_NNRT)) {
126     MS_LOG(DEBUG) << "NNRT enabled.";
127   }
128 
129   if (CreateThreadPool(false)) {
130     MS_LOG(ERROR) << "CreateThreadPool failed.";
131     return RET_ERROR;
132   }
133 
134   return RET_OK;
135 }
136 
DeleteThreadPool()137 void InnerContext::DeleteThreadPool() {
138   MS_LOG(INFO) << "delete ThreadPool.";
139   if (thread_pool_ != nullptr) {
140     delete thread_pool_;
141     thread_pool_ = nullptr;
142   }
143 }
144 
~InnerContext()145 InnerContext::~InnerContext() {
146   MS_LOG(INFO) << "delete InnerContext.";
147   ThreadPoolReuseManager::GetInstance()->RetrieveThreadPool(actor_thread_num_, inter_op_parallel_num_, thread_num_,
148                                                             bind_mode_, affinity_core_list_, thread_pool_);
149   thread_pool_ = nullptr;
150   MS_LOG(INFO) << "delete InnerContext done.";
151 }
152 
IsValid()153 int InnerContext::IsValid() {
154   if (this->device_list_.empty()) {
155     MS_LOG(ERROR) << "Device list is empty.";
156     return RET_NOT_SUPPORT;
157   }
158   if (this->device_list_.size() > kMaxInnerContextDeviceNums) {
159     MS_LOG(ERROR) << "Not support device list more than " << kMaxInnerContextDeviceNums;
160     return RET_NOT_SUPPORT;
161   }
162   if (thread_num_ < 1) {
163     MS_LOG(ERROR) << "Thread num smaller than 1 is not allowed.";
164     return RET_NOT_SUPPORT;
165   }
166   int core_num = static_cast<int>(std::max<size_t>(1, std::thread::hardware_concurrency()));
167   int Threshold_thread_num = kNumCoreNumTimes * core_num;
168   if (thread_num_ > Threshold_thread_num) {
169     MS_LOG(WARNING) << "Thread num: " << thread_num_ << " is more than 5 times core num: " << Threshold_thread_num
170                     << ", change it to 5 times core num. Please check whether Thread num is reasonable.";
171     thread_num_ = Threshold_thread_num;
172   }
173 
174   if (inter_op_parallel_num_ < 1) {
175     MS_LOG(ERROR) << "InterOpParallelNum smaller than 1 is not allowed.";
176     return RET_NOT_SUPPORT;
177   }
178 
179   if (!IsAllDeviceTypeValid()) {
180     MS_LOG(ERROR) << "Device type should be one of DT_CPU, DT_GPU or DT_NPU.";
181     return RET_NOT_SUPPORT;
182   }
183 
184   if (IsCpuBindModeInvalid()) {
185     MS_LOG(ERROR) << "CPU bind mode should be one of NO_BIND, HIGHER_CPU or MID_CPU.";
186     return RET_NOT_SUPPORT;
187   }
188 
189 #ifndef SUPPORT_GPU
190   if (IsDeviceTypeEnabled(DT_GPU)) {
191     MS_LOG(ERROR) << "GPU is not supported.";
192     return RET_NOT_SUPPORT;
193   }
194 #endif
195 #if !defined(SUPPORT_NPU) && !defined(SUPPORT_NNAPI)
196   if (IsDeviceTypeEnabled(DT_NPU)) {
197     MS_LOG(ERROR) << "NPU is not supported.";
198     return RET_NOT_SUPPORT;
199   }
200 #endif
201 #ifdef DELEGATE_CLIP
202   if (this->delegate != nullptr) {
203     MS_LOG(ERROR) << unsupport_delegate_log;
204     return RET_NOT_SUPPORT;
205   }
206 #endif
207   return RET_OK;
208 }
209 
IsCpuFloat16Enabled() const210 bool InnerContext::IsCpuFloat16Enabled() const {
211   if (!IsDeviceTypeEnabled(DT_CPU)) {
212     return false;
213   }
214   if (!device_and_pkg_support_fp16_) {
215     return false;
216   }
217   return GetDeviceInfo(DT_CPU).cpu_device_info_.enable_float16_;
218 }
219 
IsGpuFloat16Enabled() const220 bool InnerContext::IsGpuFloat16Enabled() const {
221 #ifdef GPU_OPENCL
222   if (!IsDeviceTypeEnabled(DT_GPU)) {
223     return false;
224   }
225   opencl::OpenCLRuntimeInnerWrapper wrapper;
226   if (!wrapper.GetInstance()->GetFp16Enable()) {
227     return false;
228   }
229   return GetDeviceInfo(DT_GPU).gpu_device_info_.enable_float16_;
230 #else
231   return false;
232 #endif
233 }
234 
IsNpuFloat16Enabled() const235 bool InnerContext::IsNpuFloat16Enabled() const {
236   if (!IsDeviceTypeEnabled(DT_NPU)) {
237     return false;
238   }
239   if (!device_and_pkg_support_fp16_) {
240     return false;
241   }
242   return GetDeviceInfo(DT_NPU).npu_device_info_.enable_float16_;
243 }
244 
IsGLTextureEnabled() const245 bool InnerContext::IsGLTextureEnabled() const {
246 #ifdef GPU_OPENCL
247   if (!IsDeviceTypeEnabled(DT_GPU)) {
248     return false;
249   }
250   return GetDeviceInfo(DT_GPU).gpu_device_info_.enable_gl_texture_;
251 #else
252   return false;
253 #endif
254 }
255 
IsDeviceTypeEnabled(DeviceType type) const256 bool InnerContext::IsDeviceTypeEnabled(DeviceType type) const {
257   return device_list_.end() !=
258          std::find_if(device_list_.begin(), device_list_.end(),
259                       [type](const DeviceContext &device) { return device.device_type_ == type; });
260 }
261 
IsProviderEnabled() const262 bool InnerContext::IsProviderEnabled() const {
263   return this->device_list_.end() !=
264          std::find_if(this->device_list_.begin(), this->device_list_.end(),
265                       [](const DeviceContext &device) { return !device.provider_.empty(); });
266 }
267 
IsAllDeviceTypeValid() const268 bool InnerContext::IsAllDeviceTypeValid() const {
269   return std::all_of(this->device_list_.begin(), this->device_list_.end(), [](const DeviceContext &device) {
270     return device.device_type_ >= DT_CPU && device.device_type_ < DT_END;
271   });
272 }
273 
IsCpuBindModeInvalid() const274 bool InnerContext::IsCpuBindModeInvalid() const {
275   return this->device_list_.end() !=
276          std::find_if(this->device_list_.begin(), this->device_list_.end(), [](const DeviceContext &device) {
277            return device.device_type_ == DT_CPU && (device.device_info_.cpu_device_info_.cpu_bind_mode_ < NO_BIND ||
278                                                     device.device_info_.cpu_device_info_.cpu_bind_mode_ > MID_CPU);
279          });
280 }
281 
GetDelegateMode() const282 int InnerContext::GetDelegateMode() const { return delegate_mode_; }
283 
GetProviders() const284 std::set<std::string> InnerContext::GetProviders() const {
285   std::set<std::string> providers;
286   for (auto &&device : device_list_) {
287     if (!device.provider_.empty()) {
288       providers.insert(device.provider_);
289     }
290   }
291   return providers;
292 }
293 
GetDeviceInfo(DeviceType type) const294 DeviceInfo InnerContext::GetDeviceInfo(DeviceType type) const {
295   auto iter = std::find_if(device_list_.begin(), device_list_.end(),
296                            [type](const DeviceContext &device) { return device.device_type_ == type; });
297   if (iter == device_list_.end()) {
298     return {};
299   } else {
300     return iter->device_info_;
301   }
302 }
303 
GetLinkInfo(void * pre) const304 std::set<void *> InnerContext::GetLinkInfo(void *pre) const {
305   auto iter = link_info_.find(pre);
306   if (iter == link_info_.end()) {
307     MS_LOG(DEBUG) << "Not found precursor in link information.";
308     return {};
309   }
310   return iter->second;
311 }
312 
GetAllLinkInfo() const313 std::unordered_map<void *, std::set<void *>> InnerContext::GetAllLinkInfo() const { return link_info_; }
314 
SetLinkInfo(void * pre,void * suc)315 void InnerContext::SetLinkInfo(void *pre, void *suc) {
316   auto iter = link_info_.find(pre);
317   if (iter != link_info_.end()) {
318     (void)iter->second.insert(suc);
319     return;
320   }
321   std::set<void *> suc_set{suc};
322   link_info_[pre] = suc_set;
323 }
324 
SetAllLinkInfo(const std::unordered_map<void *,std::set<void * >> & all_link_info)325 void InnerContext::SetAllLinkInfo(const std::unordered_map<void *, std::set<void *>> &all_link_info) {
326   link_info_ = all_link_info;
327 }
328 
ReplaceLinkInfoReceiverWithNewOne(void * new_receiver,void * old_receiver)329 void InnerContext::ReplaceLinkInfoReceiverWithNewOne(void *new_receiver, void *old_receiver) {
330   for (auto &info : link_info_) {
331     auto &receivers = info.second;
332     auto iter = receivers.find(old_receiver);
333     if (iter != receivers.end()) {
334       (void)receivers.erase(iter);
335       (void)receivers.insert(new_receiver);
336     }
337   }
338 }
339 
ReplaceLinkInfoSenderWithNewOne(void * new_sender,void * old_sender)340 void InnerContext::ReplaceLinkInfoSenderWithNewOne(void *new_sender, void *old_sender) {
341   auto receiver_set = this->GetLinkInfo(old_sender);
342   for (auto item : receiver_set) {
343     this->SetLinkInfo(new_sender, item);
344   }
345 }
346 
ParallelLaunch(const InnerContext * context,const Func & func,Content content,int task_num)347 int ParallelLaunch(const InnerContext *context, const Func &func, Content content, int task_num) {
348   ThreadPool *pool = context->thread_pool_;
349   if (pool == nullptr) {
350     MS_LOG(ERROR) << "thread pool is nullptr";
351     return RET_NULL_PTR;
352   }
353   return pool->ParallelLaunch(func, content, task_num);
354 }
355 }  // namespace mindspore::lite
356