• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include "src/litert/cxx_api/converters.h"
17 #include "src/common/log_adapter.h"
18 #include "src/common/utils.h"
19 
20 namespace mindspore {
21 constexpr static int kMaxNumOfDevices = 3;
22 constexpr static int kDefaultThreadNumTwo = 2;
23 constexpr static int kDefaultThreadNumFour = 4;
24 constexpr static int kDefaultInterOpParallelNum = 1;
25 constexpr static int kCoreNumThreshold = 32;
26 
SetContextAttr(int32_t thread_num,int32_t inter_op_parallel_num,bool enable_parallel,const std::vector<int32_t> & affinity_core_list,int delegate_mode,const std::shared_ptr<Delegate> & delegate,lite::InnerContext * inner_context,bool float_mode)27 void ContextUtils::SetContextAttr(int32_t thread_num, int32_t inter_op_parallel_num, bool enable_parallel,
28                                   const std::vector<int32_t> &affinity_core_list, int delegate_mode,
29                                   const std::shared_ptr<Delegate> &delegate, lite::InnerContext *inner_context,
30                                   bool float_mode) {
31   inner_context->thread_num_ = thread_num;
32   inner_context->inter_op_parallel_num_ = inter_op_parallel_num;
33   inner_context->enable_parallel_ = enable_parallel;
34   inner_context->affinity_core_list_ = affinity_core_list;
35   inner_context->delegate_mode_ = delegate_mode;
36   inner_context->delegate = delegate;
37   inner_context->float_mode = float_mode;
38 }
39 
AddCpuDevice(const std::shared_ptr<Allocator> & allocator,int affinity_mode,bool enable_fp16,const std::string & provider,const std::string & provider_device,lite::InnerContext * inner_context)40 Status ContextUtils::AddCpuDevice(const std::shared_ptr<Allocator> &allocator, int affinity_mode, bool enable_fp16,
41                                   const std::string &provider, const std::string &provider_device,
42                                   lite::InnerContext *inner_context) {
43   inner_context->allocator = allocator;
44   if (!IsAffinityModeValid(affinity_mode)) {
45     MS_LOG(ERROR) << "Invalid affinity mode, only supports 0:no affinities, 1:big cores first, 2:little cores first.";
46     return kLiteInputParamInvalid;
47   }
48   lite::DeviceInfo device_info;
49   device_info.cpu_device_info_ = {enable_fp16, static_cast<lite::CpuBindMode>(affinity_mode)};
50   inner_context->device_list_.push_back({lite::DT_CPU, device_info, provider, provider_device, allocator});
51   return kSuccess;
52 }
53 
AddGpuDevice(bool enable_fp16,uint32_t device_id,int rank_id,int group_size,bool enable_gl_texture,void * gl_context,void * gl_display,const std::string & provider,const std::string & provider_device,const std::shared_ptr<Allocator> & allocator,lite::InnerContext * inner_context)54 Status ContextUtils::AddGpuDevice(bool enable_fp16, uint32_t device_id, int rank_id, int group_size,
55                                   bool enable_gl_texture, void *gl_context, void *gl_display,
56                                   const std::string &provider, const std::string &provider_device,
57                                   const std::shared_ptr<Allocator> &allocator, lite::InnerContext *inner_context) {
58   lite::DeviceInfo device_info;
59   device_info.gpu_device_info_ = {enable_fp16,       device_id,  rank_id,   group_size,
60                                   enable_gl_texture, gl_context, gl_display};
61   inner_context->device_list_.push_back({lite::DT_GPU, device_info, provider, provider_device, allocator});
62   return kSuccess;
63 }
64 
AddNpuDevice(bool enable_fp16,int frequency,lite::InnerContext * inner_context)65 Status ContextUtils::AddNpuDevice(bool enable_fp16, int frequency, lite::InnerContext *inner_context) {
66   lite::DeviceInfo device_info;
67   device_info.npu_device_info_ = {enable_fp16, frequency};
68   inner_context->device_list_.push_back({lite::DT_NPU, device_info});
69   return kSuccess;
70 }
71 
AddAscendDevice(lite::InnerContext * inner_context,DeviceInfoContext * device)72 Status ContextUtils::AddAscendDevice(lite::InnerContext *inner_context, DeviceInfoContext *device) {
73   lite::DeviceInfo device_info;
74   auto ascend_context = device->Cast<AscendDeviceInfo>();
75   device_info.ascend_device_info_ = {ascend_context->GetDeviceID(), ascend_context->GetDynamicBatchSize(),
76                                      ascend_context->GetDynamicImageSize()};
77   inner_context->device_list_.push_back({lite::DT_ASCEND, device_info});
78   return kSuccess;
79 }
80 
AddCustomDevice(lite::InnerContext * inner_context,const std::shared_ptr<DeviceInfoContext> & device)81 Status ContextUtils::AddCustomDevice(lite::InnerContext *inner_context,
82                                      const std::shared_ptr<DeviceInfoContext> &device) {
83   lite::DeviceInfo device_info;
84   device_info.custom_device_info_ = {device};
85   inner_context->device_list_.push_back({lite::DeviceType::DT_CUSTOM, device_info});
86   return kSuccess;
87 }
88 
AddNNRtDevice(lite::InnerContext * inner_context,size_t device_id,int performance_mode,int priority,bool enable_fp16,const std::vector<Extension> & extensions)89 Status ContextUtils::AddNNRtDevice(lite::InnerContext *inner_context, size_t device_id, int performance_mode,
90                                    int priority, bool enable_fp16, const std::vector<Extension> &extensions) {
91   lite::DeviceInfo device_info = {0};
92   device_info.nnrt_device_info_.device_id_ = device_id;
93   device_info.nnrt_device_info_.performance_mode_ = performance_mode;
94   device_info.nnrt_device_info_.priority_ = priority;
95   device_info.nnrt_device_info_.enable_fp16_ = enable_fp16;
96   for (auto src_extension: extensions) {
97     lite::Extension dest_extension;
98     dest_extension.name = src_extension.name;
99     dest_extension.value = src_extension.value;
100     device_info.nnrt_device_info_.extensions_.push_back(dest_extension);
101   }
102   inner_context->device_list_.push_back({lite::DT_NNRT, device_info});
103   return kSuccess;
104 }
105 
ResetContextDefaultParam(Context * context)106 void ContextUtils::ResetContextDefaultParam(Context *context) {
107   if (context->GetInterOpParallelNum() == 0) {
108     context->SetInterOpParallelNum(kDefaultInterOpParallelNum);
109   }
110   if (context->GetThreadNum() != 0) {
111     return;
112   }
113   MS_LOG(INFO) << "thread num is 0, will set the optimal number of threads";
114 #if defined(__ANDROID__) || defined(MS_COMPILE_IOS)
115   context->SetThreadNum(kDefaultThreadNumTwo);
116   MS_LOG(INFO) << "Set the number of threads to " << kDefaultThreadNumTwo;
117   return;
118 #endif
119   auto core_num = lite::GetCoreNum();
120   if (core_num <= kCoreNumThreshold) {
121     context->SetThreadNum(kDefaultThreadNumTwo);
122     MS_LOG(INFO) << "Set the number of threads to " << kDefaultThreadNumTwo;
123   } else {
124     context->SetThreadNum(kDefaultThreadNumFour);
125     MS_LOG(INFO) << "Set the number of threads to " << kDefaultThreadNumFour;
126   }
127   return;
128 }
129 
Convert(Context * context)130 std::shared_ptr<lite::InnerContext> ContextUtils::Convert(Context *context) {
131   auto inner_context = std::make_shared<lite::InnerContext>();
132   if ((context == nullptr) || (inner_context == nullptr)) {
133     MS_LOG(ERROR) << "Invalid context pointers.";
134     return nullptr;
135   }
136   ResetContextDefaultParam(context);
137   auto device_list = context->MutableDeviceInfo();
138   if (device_list.size() == 0 || device_list.size() > kMaxNumOfDevices) {
139     MS_LOG(ERROR) << "Device num, support min: 1, max: " << kMaxNumOfDevices;
140     return nullptr;
141   }
142   if (context->GetInterOpParallelNum() <= 0 || context->GetInterOpParallelNum() > context->GetThreadNum()) {
143     MS_LOG(ERROR) << "Invalid inter op parallel num : " << context->GetInterOpParallelNum()
144                   << " | thread num: " << context->GetThreadNum();
145     return nullptr;
146   }
147 #ifdef ENABLE_CLOUD_FUSION_INFERENCE
148   inner_context->thread_num_ = context->GetThreadNum();
149   inner_context->inter_op_parallel_num_ = context->GetInterOpParallelNum();
150   inner_context->affinity_core_list_ = context->GetThreadAffinityCoreList();
151 #else
152   SetContextAttr(context->GetThreadNum(), context->GetInterOpParallelNum(), context->GetEnableParallel(),
153                  context->GetThreadAffinityCoreList(), static_cast<int>(context->GetBuiltInDelegate()),
154                  context->GetDelegate(), inner_context.get(), context->GetMultiModalHW());
155 #endif
156   inner_context->device_list_.clear();
157   Status ret = kLiteError;
158   for (auto &device : device_list) {
159     MS_CHECK_TRUE_RET(device != nullptr, nullptr);
160     if (device->GetDeviceType() == kCPU) {
161       auto cpu_context = device->Cast<CPUDeviceInfo>();
162       if (cpu_context->GetAllocator() == nullptr) {
163         cpu_context->SetAllocator(Allocator::Create());
164       }
165       ret = AddCpuDevice(cpu_context->GetAllocator(), context->GetThreadAffinityMode(), cpu_context->GetEnableFP16(),
166                          cpu_context->GetProvider(), cpu_context->GetProviderDevice(), inner_context.get());
167     } else if (device->GetDeviceType() == kGPU) {
168       auto gpu_context = device->Cast<GPUDeviceInfo>();
169       bool enable_gl_texture = gpu_context->GetEnableGLTexture();
170       void *gl_context = gpu_context->GetGLContext();
171       void *gl_display = gpu_context->GetGLDisplay();
172       ret =
173         AddGpuDevice(gpu_context->GetEnableFP16(), gpu_context->GetDeviceID(), gpu_context->GetRankID(),
174                      gpu_context->GetGroupSize(), enable_gl_texture, gl_context, gl_display, gpu_context->GetProvider(),
175                      gpu_context->GetProviderDevice(), gpu_context->GetAllocator(), inner_context.get());
176     } else if (device->GetDeviceType() == kKirinNPU) {
177       auto npu_context = device->Cast<KirinNPUDeviceInfo>();
178       ret = AddNpuDevice(npu_context->GetEnableFP16(), npu_context->GetFrequency(), inner_context.get());
179     } else if (device->GetDeviceType() == kAscend) {
180       ret = AddAscendDevice(inner_context.get(), device.get());
181     } else if (device->GetDeviceType() == kCustomDevice) {
182       ret = AddCustomDevice(inner_context.get(), device);
183     } else if (device->GetDeviceType() == kNNRt) {
184       auto nnrt_device_info = device->Cast<NNRTDeviceInfo>();
185       ret = AddNNRtDevice(inner_context.get(), nnrt_device_info->GetDeviceID(),
186                           nnrt_device_info->GetPerformanceMode(), nnrt_device_info->GetPriority(),
187                           nnrt_device_info->GetEnableFP16(), nnrt_device_info->GetExtensions());
188     }
189     if (ret != kSuccess) {
190       MS_LOG(ERROR) << "Add device failed!";
191       return nullptr;
192     }
193   }
194   return inner_context;
195 }
196 }  // namespace mindspore
197