1 /**
2 * Copyright 2020-2022 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include "src/litert/inner_context.h"
17 #include <algorithm>
18 #include <memory>
19 #include "include/errorcode.h"
20 #include "src/common/log_adapter.h"
21 #include "src/common/log_util.h"
22 #ifdef ENABLE_MINDRT
23 #include "thread/actor_threadpool.h"
24 #ifndef MS_COMPILE_IOS
25 #include "thread/parallel_threadpool.h"
26 #endif
27 #endif
28 #ifdef SUPPORT_NPU
29 #include "include/HiAiModelManagerType.h"
30 #endif
31 #ifdef GPU_OPENCL
32 #include "src/litert/kernel/gpu/opencl/opencl_runtime.h"
33 #endif
34 #include "src/litert/inner_allocator.h"
35 #include "nnacl/cxx_utils.h"
36 #include "src/litert/thread_pool_reuse_manager.h"
37
38 namespace mindspore::lite {
39 namespace {
40 const constexpr int kMaxInnerContextDeviceNums = 3;
41 const constexpr int kNumCoreNumTimes = 5;
42 constexpr int kDefaultParallelNum = 2;
43 } // namespace
44
InnerContext()45 InnerContext::InnerContext() {
46 #if defined(ENABLE_ARM) && defined(ENABLE_FP16)
47 CpuInfo cpu_info;
48 device_and_pkg_support_fp16_ = cpu_info.ArmIsSupportFp16();
49 #endif
50 }
51
InitExecEnv()52 void InnerContext::InitExecEnv() {
53 exec_env_.allocator_ = this->allocator.get();
54 exec_env_.thread_pool_ = this->thread_pool_;
55 exec_env_.Alloc = nnacl::DefaultAllocatorMalloc;
56 exec_env_.Free = nnacl::DefaultAllocatorFree;
57 exec_env_.ParallelLaunch = nnacl::DefaultThreadPoolParallelLunch;
58 }
59
CreateThreadPool(bool is_control_flow)60 int InnerContext::CreateThreadPool(bool is_control_flow) {
61 if (this->thread_pool_ == nullptr) {
62 bind_mode_ = Power_NoBind;
63 if (this->IsDeviceTypeEnabled(DT_CPU)) {
64 bind_mode_ = static_cast<BindMode>(this->GetDeviceInfo(DT_CPU).cpu_device_info_.cpu_bind_mode_);
65 }
66 this->inter_op_parallel_num_ =
67 (!this->enable_parallel_ && this->inter_op_parallel_num_ > 1) ? this->inter_op_parallel_num_ : 1;
68 actor_thread_num_ = (inter_op_parallel_num_ > 1) ? 1 : (this->enable_parallel_ ? kDefaultParallelNum : 1);
69 thread_pool_ = ThreadPoolReuseManager::GetInstance()->GetThreadPool(
70 actor_thread_num_, inter_op_parallel_num_, thread_num_, bind_mode_, affinity_core_list_, runner_id_);
71 if (thread_pool_ == nullptr) {
72 #ifdef ENABLE_MINDRT
73 #ifndef MS_COMPILE_IOS
74 if (inter_op_parallel_num_ > 1) {
75 thread_pool_ = ParallelThreadPool::CreateThreadPool(this->inter_op_parallel_num_, this->thread_num_,
76 this->affinity_core_list_, bind_mode_, runner_id_);
77 } else if (thread_num_ == 1 && !IsCpuFloat16Enabled() && !is_control_flow) {
78 thread_pool_ = ThreadPool::CreateThreadPool(thread_num_ - 1);
79 thread_pool_->SetCpuAffinity(static_cast<mindspore::BindMode>(bind_mode_));
80 } else {
81 #endif
82 thread_pool_ = ActorThreadPool::CreateThreadPool(actor_thread_num_, this->thread_num_,
83 this->affinity_core_list_, bind_mode_);
84 #ifndef MS_COMPILE_IOS
85 }
86 #endif
87 #else
88 thread_pool_ = ThreadPool::CreateThreadPool(thread_num_ - 1);
89 thread_pool_->SetCpuAffinity(static_cast<mindspore::BindMode>(bind_mode_));
90 #endif
91 }
92 MS_CHECK_TRUE_MSG(thread_pool_ != nullptr, RET_NULL_PTR, "Create Allocator failed");
93 InitExecEnv();
94 }
95
96 return RET_OK;
97 }
Init()98 int InnerContext::Init() {
99 if (this->IsValid() != RET_OK) {
100 MS_LOG(ERROR) << "Context is not valid";
101 return RET_NOT_SUPPORT;
102 }
103
104 if (this->allocator == nullptr) {
105 this->allocator = mindspore::Allocator::Create();
106 CHECK_NULL_RETURN(this->allocator);
107 }
108 if (IsDeviceTypeEnabled(DT_NPU)) {
109 MS_LOG(DEBUG) << "NPU enabled.";
110 #ifdef SUPPORT_NPU
111 for (auto &device_ctx : this->device_list_) {
112 if (device_ctx.device_type_ == DT_NPU &&
113 device_ctx.device_info_.npu_device_info_.frequency_ != hiai::AiModelDescription_Frequency_LOW &&
114 device_ctx.device_info_.npu_device_info_.frequency_ != hiai::AiModelDescription_Frequency_MEDIUM &&
115 device_ctx.device_info_.npu_device_info_.frequency_ != hiai::AiModelDescription_Frequency_HIGH &&
116 device_ctx.device_info_.npu_device_info_.frequency_ != hiai::AiModelDescription_Frequency_EXTREME) {
117 MS_LOG(WARNING) << "NPU frequency set to 3, original value "
118 << device_ctx.device_info_.npu_device_info_.frequency_;
119 device_ctx.device_info_.npu_device_info_.frequency_ = hiai::AiModelDescription_Frequency_HIGH;
120 }
121 }
122 #endif
123 }
124
125 if (IsDeviceTypeEnabled(DT_NNRT)) {
126 MS_LOG(DEBUG) << "NNRT enabled.";
127 }
128
129 if (CreateThreadPool(false)) {
130 MS_LOG(ERROR) << "CreateThreadPool failed.";
131 return RET_ERROR;
132 }
133
134 return RET_OK;
135 }
136
DeleteThreadPool()137 void InnerContext::DeleteThreadPool() {
138 MS_LOG(INFO) << "delete ThreadPool.";
139 if (thread_pool_ != nullptr) {
140 delete thread_pool_;
141 thread_pool_ = nullptr;
142 }
143 }
144
~InnerContext()145 InnerContext::~InnerContext() {
146 MS_LOG(INFO) << "delete InnerContext.";
147 ThreadPoolReuseManager::GetInstance()->RetrieveThreadPool(actor_thread_num_, inter_op_parallel_num_, thread_num_,
148 bind_mode_, affinity_core_list_, thread_pool_);
149 thread_pool_ = nullptr;
150 MS_LOG(INFO) << "delete InnerContext done.";
151 }
152
IsValid()153 int InnerContext::IsValid() {
154 if (this->device_list_.empty()) {
155 MS_LOG(ERROR) << "Device list is empty.";
156 return RET_NOT_SUPPORT;
157 }
158 if (this->device_list_.size() > kMaxInnerContextDeviceNums) {
159 MS_LOG(ERROR) << "Not support device list more than " << kMaxInnerContextDeviceNums;
160 return RET_NOT_SUPPORT;
161 }
162 if (thread_num_ < 1) {
163 MS_LOG(ERROR) << "Thread num smaller than 1 is not allowed.";
164 return RET_NOT_SUPPORT;
165 }
166 int core_num = static_cast<int>(std::max<size_t>(1, std::thread::hardware_concurrency()));
167 int Threshold_thread_num = kNumCoreNumTimes * core_num;
168 if (thread_num_ > Threshold_thread_num) {
169 MS_LOG(WARNING) << "Thread num: " << thread_num_ << " is more than 5 times core num: " << Threshold_thread_num
170 << ", change it to 5 times core num. Please check whether Thread num is reasonable.";
171 thread_num_ = Threshold_thread_num;
172 }
173
174 if (inter_op_parallel_num_ < 1) {
175 MS_LOG(ERROR) << "InterOpParallelNum smaller than 1 is not allowed.";
176 return RET_NOT_SUPPORT;
177 }
178
179 if (!IsAllDeviceTypeValid()) {
180 MS_LOG(ERROR) << "Device type should be one of DT_CPU, DT_GPU or DT_NPU.";
181 return RET_NOT_SUPPORT;
182 }
183
184 if (IsCpuBindModeInvalid()) {
185 MS_LOG(ERROR) << "CPU bind mode should be one of NO_BIND, HIGHER_CPU or MID_CPU.";
186 return RET_NOT_SUPPORT;
187 }
188
189 #ifndef SUPPORT_GPU
190 if (IsDeviceTypeEnabled(DT_GPU)) {
191 MS_LOG(ERROR) << "GPU is not supported.";
192 return RET_NOT_SUPPORT;
193 }
194 #endif
195 #if !defined(SUPPORT_NPU) && !defined(SUPPORT_NNAPI)
196 if (IsDeviceTypeEnabled(DT_NPU)) {
197 MS_LOG(ERROR) << "NPU is not supported.";
198 return RET_NOT_SUPPORT;
199 }
200 #endif
201 #ifdef DELEGATE_CLIP
202 if (this->delegate != nullptr) {
203 MS_LOG(ERROR) << unsupport_delegate_log;
204 return RET_NOT_SUPPORT;
205 }
206 #endif
207 return RET_OK;
208 }
209
IsCpuFloat16Enabled() const210 bool InnerContext::IsCpuFloat16Enabled() const {
211 if (!IsDeviceTypeEnabled(DT_CPU)) {
212 return false;
213 }
214 if (!device_and_pkg_support_fp16_) {
215 return false;
216 }
217 return GetDeviceInfo(DT_CPU).cpu_device_info_.enable_float16_;
218 }
219
IsGpuFloat16Enabled() const220 bool InnerContext::IsGpuFloat16Enabled() const {
221 #ifdef GPU_OPENCL
222 if (!IsDeviceTypeEnabled(DT_GPU)) {
223 return false;
224 }
225 opencl::OpenCLRuntimeInnerWrapper wrapper;
226 if (!wrapper.GetInstance()->GetFp16Enable()) {
227 return false;
228 }
229 return GetDeviceInfo(DT_GPU).gpu_device_info_.enable_float16_;
230 #else
231 return false;
232 #endif
233 }
234
IsNpuFloat16Enabled() const235 bool InnerContext::IsNpuFloat16Enabled() const {
236 if (!IsDeviceTypeEnabled(DT_NPU)) {
237 return false;
238 }
239 if (!device_and_pkg_support_fp16_) {
240 return false;
241 }
242 return GetDeviceInfo(DT_NPU).npu_device_info_.enable_float16_;
243 }
244
IsGLTextureEnabled() const245 bool InnerContext::IsGLTextureEnabled() const {
246 #ifdef GPU_OPENCL
247 if (!IsDeviceTypeEnabled(DT_GPU)) {
248 return false;
249 }
250 return GetDeviceInfo(DT_GPU).gpu_device_info_.enable_gl_texture_;
251 #else
252 return false;
253 #endif
254 }
255
IsDeviceTypeEnabled(DeviceType type) const256 bool InnerContext::IsDeviceTypeEnabled(DeviceType type) const {
257 return device_list_.end() !=
258 std::find_if(device_list_.begin(), device_list_.end(),
259 [type](const DeviceContext &device) { return device.device_type_ == type; });
260 }
261
IsProviderEnabled() const262 bool InnerContext::IsProviderEnabled() const {
263 return this->device_list_.end() !=
264 std::find_if(this->device_list_.begin(), this->device_list_.end(),
265 [](const DeviceContext &device) { return !device.provider_.empty(); });
266 }
267
IsAllDeviceTypeValid() const268 bool InnerContext::IsAllDeviceTypeValid() const {
269 return std::all_of(this->device_list_.begin(), this->device_list_.end(), [](const DeviceContext &device) {
270 return device.device_type_ >= DT_CPU && device.device_type_ < DT_END;
271 });
272 }
273
IsCpuBindModeInvalid() const274 bool InnerContext::IsCpuBindModeInvalid() const {
275 return this->device_list_.end() !=
276 std::find_if(this->device_list_.begin(), this->device_list_.end(), [](const DeviceContext &device) {
277 return device.device_type_ == DT_CPU && (device.device_info_.cpu_device_info_.cpu_bind_mode_ < NO_BIND ||
278 device.device_info_.cpu_device_info_.cpu_bind_mode_ > MID_CPU);
279 });
280 }
281
GetDelegateMode() const282 int InnerContext::GetDelegateMode() const { return delegate_mode_; }
283
GetProviders() const284 std::set<std::string> InnerContext::GetProviders() const {
285 std::set<std::string> providers;
286 for (auto &&device : device_list_) {
287 if (!device.provider_.empty()) {
288 providers.insert(device.provider_);
289 }
290 }
291 return providers;
292 }
293
GetDeviceInfo(DeviceType type) const294 DeviceInfo InnerContext::GetDeviceInfo(DeviceType type) const {
295 auto iter = std::find_if(device_list_.begin(), device_list_.end(),
296 [type](const DeviceContext &device) { return device.device_type_ == type; });
297 if (iter == device_list_.end()) {
298 return {};
299 } else {
300 return iter->device_info_;
301 }
302 }
303
GetLinkInfo(void * pre) const304 std::set<void *> InnerContext::GetLinkInfo(void *pre) const {
305 auto iter = link_info_.find(pre);
306 if (iter == link_info_.end()) {
307 MS_LOG(DEBUG) << "Not found precursor in link information.";
308 return {};
309 }
310 return iter->second;
311 }
312
GetAllLinkInfo() const313 std::unordered_map<void *, std::set<void *>> InnerContext::GetAllLinkInfo() const { return link_info_; }
314
SetLinkInfo(void * pre,void * suc)315 void InnerContext::SetLinkInfo(void *pre, void *suc) {
316 auto iter = link_info_.find(pre);
317 if (iter != link_info_.end()) {
318 (void)iter->second.insert(suc);
319 return;
320 }
321 std::set<void *> suc_set{suc};
322 link_info_[pre] = suc_set;
323 }
324
SetAllLinkInfo(const std::unordered_map<void *,std::set<void * >> & all_link_info)325 void InnerContext::SetAllLinkInfo(const std::unordered_map<void *, std::set<void *>> &all_link_info) {
326 link_info_ = all_link_info;
327 }
328
ReplaceLinkInfoReceiverWithNewOne(void * new_receiver,void * old_receiver)329 void InnerContext::ReplaceLinkInfoReceiverWithNewOne(void *new_receiver, void *old_receiver) {
330 for (auto &info : link_info_) {
331 auto &receivers = info.second;
332 auto iter = receivers.find(old_receiver);
333 if (iter != receivers.end()) {
334 (void)receivers.erase(iter);
335 (void)receivers.insert(new_receiver);
336 }
337 }
338 }
339
ReplaceLinkInfoSenderWithNewOne(void * new_sender,void * old_sender)340 void InnerContext::ReplaceLinkInfoSenderWithNewOne(void *new_sender, void *old_sender) {
341 auto receiver_set = this->GetLinkInfo(old_sender);
342 for (auto item : receiver_set) {
343 this->SetLinkInfo(new_sender, item);
344 }
345 }
346
ParallelLaunch(const InnerContext * context,const Func & func,Content content,int task_num)347 int ParallelLaunch(const InnerContext *context, const Func &func, Content content, int task_num) {
348 ThreadPool *pool = context->thread_pool_;
349 if (pool == nullptr) {
350 MS_LOG(ERROR) << "thread pool is nullptr";
351 return RET_NULL_PTR;
352 }
353 return pool->ParallelLaunch(func, content, task_num);
354 }
355 } // namespace mindspore::lite
356