1 /**
2 * Copyright 2021-2023 Huawei Technologies Co., Ltd
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "thread/core_affinity.h"
18 #include <string.h>
19 #include <cstdlib>
20 #include <string>
21 #include <algorithm>
22 #ifdef MS_COMPILE_IOS
23 #include <sys/types.h>
24 #include <sys/sysctl.h>
25 #include <mach/machine.h>
26 #endif // MS_COMPILE_IOS
27 #include "thread/threadpool.h"
28 #ifdef _WIN32
29 #include <windows.h>
30 #endif
31
32 namespace mindspore {
33 #ifdef _WIN32
34 std::vector<DWORD_PTR> WindowsCoreList;
35 #endif
36
37 enum Arch {
38 UnKnown_Arch = 0,
39 Cortex_A5,
40 Cortex_A7,
41 Cortex_A8,
42 Cortex_A9,
43 Cortex_A12,
44 Cortex_A15,
45 Cortex_A17,
46 Cortex_A32,
47 Cortex_A34,
48 Cortex_A35,
49 Cortex_A53,
50 Cortex_A55,
51 Cortex_A57,
52 Cortex_A65,
53 Cortex_A72,
54 Cortex_A73,
55 Cortex_A75,
56 Cortex_A76,
57 Cortex_A77,
58 Cortex_A78,
59 Cortex_X1
60 };
61
62 typedef struct {
63 int core_id;
64 int max_freq;
65 enum Arch arch;
66 } CpuInfo;
67
GetArch(int cpu_part)68 enum Arch GetArch(int cpu_part) {
69 typedef struct {
70 int part;
71 enum Arch arch;
72 } ArchSet;
73 // https://en.wikipedia.org/wiki/Comparison_of_ARMv7-A_cores
74 // https://en.wikipedia.org/wiki/Comparison_of_ARMv8-A_cores
75 std::vector<ArchSet> arch_set = {
76 {0x800, Cortex_A73}, // High-performance Kryo 260 (r10p2) / Kryo 280 (r10p1) "Gold" -> Cortex-A73
77 {0x801, Cortex_A53}, // Low-power Kryo 260 / 280 "Silver" -> Cortex-A53
78 {0x802, Cortex_A75}, // High-performance Kryo 385 "Gold" -> Cortex-A75
79 {0x803, Cortex_A55}, // Low-power Kryo 385 "Silver" -> Cortex-A55r0
80 {0x804, Cortex_A76}, // High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76
81 {0x805, Cortex_A55}, // Low-performance Kryo 485 "Silver" -> Cortex-A55
82 {0xC05, Cortex_A5},
83 {0xC07, Cortex_A7},
84 {0xC08, Cortex_A8},
85 {0xC09, Cortex_A9},
86 {0xC0C, Cortex_A12},
87 {0xC0D, Cortex_A12},
88 {0xC0E, Cortex_A17},
89 {0xC0F, Cortex_A15},
90 {0xD01, Cortex_A32}, // also Huawei Kunpeng 920
91 // series taishan_v110 when not
92 // on android
93 {0xD02, Cortex_A34},
94 {0xD03, Cortex_A53},
95 {0xD04, Cortex_A35},
96 {0xD05, Cortex_A55},
97 {0xD06, Cortex_A65},
98 {0xD07, Cortex_A57},
99 {0xD08, Cortex_A72},
100 {0xD09, Cortex_A73},
101 {0xD0A, Cortex_A75},
102 {0xD0B, Cortex_A76},
103 {0xD0D, Cortex_A77},
104 {0xD0E, Cortex_A76}, // Cortex-A76AE
105 {0xD40, Cortex_A76}, // Kirin 980 Big/Medium cores -> Cortex-A76
106 {0xD41, Cortex_A78},
107 {0xD43, Cortex_A65}, // Cortex-A65AE
108 {0xD44, Cortex_X1}};
109 auto item =
110 std::find_if(arch_set.begin(), arch_set.end(), [&cpu_part](const ArchSet &a) { return a.part == cpu_part; });
111 return item != arch_set.end() ? item->arch : UnKnown_Arch;
112 }
113
ParseCpuPart(const char * line,int start,int size)114 int ParseCpuPart(const char *line, int start, int size) {
115 int cpu_part = 0;
116 for (int i = start; i < size && i < start + PARSE_CPU_GAP; i++) {
117 char c = line[i];
118 int d;
119 if (c >= '0' && c <= '9') {
120 d = c - '0';
121 } else if ((c - 'A') < (PARSE_CPU_HEX - PARSE_CPU_DEC)) {
122 d = PARSE_CPU_DEC + (c - 'A');
123 } else if ((c - 'a') < (PARSE_CPU_HEX - PARSE_CPU_DEC)) {
124 d = PARSE_CPU_DEC + (c - 'a');
125 } else {
126 THREAD_ERROR("CPU part in /proc/cpuinfo is ignored due to unexpected non-hex character");
127 break;
128 }
129 cpu_part = cpu_part * PARSE_CPU_HEX + d;
130 }
131 return cpu_part;
132 }
133
SetArch(std::vector<CpuInfo> * freq_set,int core_num)134 int SetArch(std::vector<CpuInfo> *freq_set, int core_num) {
135 if (core_num <= 0) {
136 THREAD_ERROR("core_num must be greater than 0.");
137 return THREAD_ERROR;
138 }
139 FILE *fp = fopen("/proc/cpuinfo", "r");
140 if (fp == nullptr) {
141 THREAD_ERROR("read /proc/cpuinfo error.");
142 return THREAD_ERROR;
143 }
144 std::vector<Arch> archs;
145 archs.resize(core_num);
146 const int max_line_size = 1024;
147 char line[max_line_size] = {0};
148 int count = 0;
149 while (!feof(fp)) {
150 if (fgets(line, max_line_size, fp)) {
151 // line start with "CPU part"
152 if (0 == memcmp(line, "CPU part", 8)) {
153 // get number like 0xD03
154 for (int i = 0; i < max_line_size - 4; ++i) {
155 if (line[i] == '0' && line[i + 1] == 'x') {
156 int cpu_part = ParseCpuPart(line, i + 2, max_line_size);
157 enum Arch arch = GetArch(cpu_part);
158 if (arch == UnKnown_Arch) {
159 THREAD_ERROR("cpu's architecture is unknown.");
160 (void)fclose(fp);
161 return THREAD_ERROR;
162 }
163 count++;
164 if (count > core_num) {
165 THREAD_ERROR("number of cpu_part in /proc/cpuinfo is more than core_num.");
166 (void)fclose(fp);
167 return THREAD_ERROR;
168 }
169 archs[count - 1] = arch;
170 }
171 }
172 }
173 }
174 }
175 if (count < core_num) {
176 THREAD_ERROR("number of cpu_part in /proc/cpuinfo is less than core_num.");
177 (void)fclose(fp);
178 return THREAD_ERROR;
179 }
180 for (int i = 0; i < core_num; ++i) {
181 (*freq_set)[i].arch = archs[i];
182 }
183 (void)fclose(fp);
184 return THREAD_OK;
185 }
186
GetMaxFrequency(int core_id)187 int GetMaxFrequency(int core_id) {
188 FILE *fp;
189 std::vector<std::string> paths = {"/sys/devices/system/cpu/cpufreq/stats/cpu",
190 "/sys/devices/system/cpu/cpufreq/stats/cpu", "/sys/devices/system/cpu/cpu"};
191 std::vector<std::string> files = {"/time_in_state", "/cpufreq/stats/time_in_state", "/cpufreq/cpuinfo_max_freq"};
192 for (size_t i = 0; i < paths.size(); ++i) {
193 std::string file = paths[i] + std::to_string(core_id) + files[i];
194 fp = fopen(file.c_str(), "rb");
195 if (fp != nullptr) {
196 break;
197 }
198 }
199 int max_freq = -1;
200 if (fp == nullptr) {
201 THREAD_ERROR("open system file failed");
202 return max_freq;
203 }
204 while (feof(fp) == 0) {
205 int freq = 0;
206 int tmp = fscanf(fp, "%d", &freq);
207 if (tmp != 1) {
208 break;
209 }
210 if (freq > max_freq) {
211 max_freq = freq;
212 }
213 }
214 (void)fclose(fp);
215 return max_freq;
216 }
217
GetServerFrequency()218 float CoreAffinity::GetServerFrequency() {
219 float max_freq = -1.0f;
220 #if defined(__APPLE__) || defined(__MACOSX) || defined(_MSC_VER) || defined(_WIN32) || defined(ANDROID) || \
221 defined(__ANDROID__) || defined(MS_COMPILE_OHOS)
222 return max_freq; // MHz
223 #else
224 // The CPU cores in the server of the numa architecture are the same.
225 // The main frequency of the first core is obtained.
226 FILE *fp = popen("cat /proc/cpuinfo|grep cpu\\ MHz | sed -e 's/.*:[^0-9]//'", "r");
227 if (fp == nullptr) {
228 THREAD_ERROR("get system cpuinfo frequency failed");
229 return max_freq;
230 }
231
232 while (feof(fp) == 0) {
233 float freq = 0;
234 int tmp = fscanf(fp, "%f", &freq);
235 if (tmp != 1) {
236 break;
237 }
238 if (max_freq < freq) {
239 max_freq = freq;
240 }
241 }
242 (void)pclose(fp);
243 return max_freq; // MHz
244 #endif
245 }
246
247 #ifdef _WIN32
SetWindowsAffinity(HANDLE thread,DWORD_PTR mask)248 void SetWindowsAffinity(HANDLE thread, DWORD_PTR mask) {
249 THREAD_INFO("Bind thread[%ld] to core[%lld].", GetThreadId(thread), mask);
250 SetThreadAffinityMask(thread, mask);
251 return;
252 }
253
SetWindowsSelfAffinity(uint64_t core_id)254 void SetWindowsSelfAffinity(uint64_t core_id) {
255 if (WindowsCoreList.size() <= core_id) {
256 return;
257 }
258 DWORD_PTR mask = WindowsCoreList[core_id];
259 SetWindowsAffinity(GetCurrentThread(), mask);
260 return;
261 }
262 #endif
263
InitHardwareCoreInfo()264 int CoreAffinity::InitHardwareCoreInfo() {
265 core_num_ = std::thread::hardware_concurrency();
266 #ifdef _WIN32
267 WindowsCoreList.resize(core_num_);
268 for (size_t i = 0; i < core_num_; i++) {
269 WindowsCoreList[i] = 1 << i;
270 }
271 #endif
272 std::vector<CpuInfo> freq_set;
273 freq_set.resize(core_num_);
274 core_freq_.resize(core_num_);
275 for (size_t i = 0; i < core_num_; ++i) {
276 int max_freq = GetMaxFrequency(i);
277 core_freq_[i] = max_freq;
278 freq_set[i].core_id = i;
279 freq_set[i].max_freq = max_freq;
280 freq_set[i].arch = UnKnown_Arch;
281 }
282 int err_code = SetArch(&freq_set, core_num_);
283 if (err_code != THREAD_OK) {
284 THREAD_INFO("set arch failed, ignoring arch.");
285 }
286 // sort core id by frequency into descending order
287 for (size_t i = 0; i < core_num_; ++i) {
288 for (size_t j = i + 1; j < core_num_; ++j) {
289 if (freq_set[i].max_freq < freq_set[j].max_freq ||
290 (freq_set[i].max_freq == freq_set[j].max_freq && freq_set[i].arch <= freq_set[j].arch)) {
291 CpuInfo temp = freq_set[i];
292 freq_set[i] = freq_set[j];
293 freq_set[j] = temp;
294 }
295 }
296 }
297 higher_num_ = 0;
298 sorted_id_.clear();
299 int max_freq = freq_set.front().max_freq;
300 for (const auto &info : freq_set) {
301 THREAD_INFO("sorted core id: %d, max frequency: %d, arch: %d", info.core_id, info.max_freq, info.arch);
302 sorted_id_.push_back(info.core_id);
303 higher_num_ += info.max_freq == max_freq ? 1 : 0;
304 }
305 return THREAD_OK;
306 }
307
GetCoreId(size_t thread_num,BindMode bind_mode) const308 std::vector<int> CoreAffinity::GetCoreId(size_t thread_num, BindMode bind_mode) const {
309 std::vector<int> bind_id;
310 #ifdef _WIN32
311 return bind_id;
312 #elif defined(BIND_CORE)
313 if (core_num_ != sorted_id_.size()) {
314 THREAD_ERROR("init sorted core id failed");
315 return bind_id;
316 }
317 if (bind_mode == Power_Higher) {
318 for (size_t i = 0; i < thread_num; ++i) {
319 bind_id.push_back(sorted_id_[i % core_num_]);
320 }
321 } else if (bind_mode == Power_Middle) {
322 for (size_t i = 0; i < thread_num; ++i) {
323 bind_id.push_back(sorted_id_[(i + higher_num_) % core_num_]);
324 }
325 } else {
326 return bind_id;
327 }
328 #endif
329 return bind_id;
330 }
SetCoreId(const std::vector<int> & core_list)331 void CoreAffinity::SetCoreId(const std::vector<int> &core_list) { bind_id_ = core_list; }
332
InitBindCoreId(size_t thread_num,BindMode bind_mode)333 int CoreAffinity::InitBindCoreId(size_t thread_num, BindMode bind_mode) {
334 #ifndef _WIN32
335 bind_id_.clear();
336 bind_id_ = GetCoreId(thread_num, bind_mode);
337 if (bind_id_.empty()) {
338 return THREAD_ERROR;
339 }
340 #endif
341 return THREAD_OK;
342 }
343
344 #ifdef _WIN32
SetAffinity()345 int CoreAffinity::SetAffinity() { return THREAD_OK; }
346 #elif defined(BIND_CORE)
SetAffinity(const pthread_t & thread_id,cpu_set_t * cpu_set)347 int CoreAffinity::SetAffinity(const pthread_t &thread_id, cpu_set_t *cpu_set) {
348 #if defined(__ANDROID__) || defined(MS_COMPILE_OHOS)
349 #if (__ANDROID_API__ >= 21) || defined(MS_COMPILE_OHOS)
350 THREAD_INFO("thread: %d, mask: %lu", pthread_gettid_np(thread_id), cpu_set->__bits[0]);
351 int ret = sched_setaffinity(pthread_gettid_np(thread_id), sizeof(cpu_set_t), cpu_set);
352 if (ret != THREAD_OK) {
353 THREAD_ERROR("bind thread %d to cpu failed. ERROR %d", pthread_gettid_np(thread_id), ret);
354 return THREAD_ERROR;
355 }
356 #endif
357 #else
358 #if defined(__APPLE__)
359 THREAD_ERROR("not bind thread to apple's cpu.");
360 return THREAD_ERROR;
361 #else
362 int ret = pthread_setaffinity_np(thread_id, sizeof(cpu_set_t), cpu_set);
363 if (ret != THREAD_OK) {
364 THREAD_ERROR("set thread: %lu to cpu failed", thread_id);
365 return THREAD_ERROR;
366 }
367 #endif // __APPLE__
368 #endif
369 return THREAD_OK;
370 }
371 #endif
372
FreeScheduleThreads(const std::vector<Worker * > & workers)373 int CoreAffinity::FreeScheduleThreads(const std::vector<Worker *> &workers) {
374 #ifdef _WIN32
375 return THREAD_OK;
376 #elif defined(BIND_CORE)
377 cpu_set_t mask;
378 CPU_ZERO(&mask);
379 for (int i : bind_id_) {
380 CPU_SET(i, &mask);
381 }
382 for (auto worker : workers) {
383 int ret = SetAffinity(worker->handle(), &mask);
384 if (ret != THREAD_OK) {
385 return THREAD_ERROR;
386 }
387 }
388 #endif // BIND_CORE
389 return THREAD_OK;
390 }
391
BindThreadsToCoreList(const std::vector<Worker * > & workers)392 int CoreAffinity::BindThreadsToCoreList(const std::vector<Worker *> &workers) {
393 #ifdef _WIN32
394 return THREAD_OK;
395 #elif defined(BIND_CORE)
396 if (bind_id_.empty()) {
397 THREAD_INFO("bind id is empty, it will not bind thread");
398 return THREAD_OK;
399 }
400 size_t window = bind_id_.size();
401 size_t thread_num = workers.size();
402 for (size_t i = 0; i < thread_num; ++i) {
403 cpu_set_t mask;
404 CPU_ZERO(&mask);
405 CPU_SET(bind_id_[i % window], &mask);
406 // affinity mask determines the CPU core which it is eligible to run
407 int ret = SetAffinity(workers[i]->handle(), &mask);
408 if (ret != THREAD_OK) {
409 return THREAD_ERROR;
410 }
411 THREAD_INFO("set thread[%zu] affinity to core[%d] success", i, bind_id_[i % window]);
412 workers[i]->set_frequency(core_freq_[bind_id_[i]]);
413 }
414 #endif // BIND_CORE
415 return THREAD_OK;
416 }
417
BindProcess(BindMode bind_mode)418 int CoreAffinity::BindProcess(BindMode bind_mode) {
419 #ifdef _WIN32
420 return THREAD_OK;
421 #elif defined(BIND_CORE)
422 if (bind_id_.empty()) {
423 // initializes bind id before bind currently process
424 THREAD_ERROR("bind id is empty");
425 return THREAD_ERROR;
426 }
427 cpu_set_t mask;
428 CPU_ZERO(&mask);
429 if (bind_mode != Power_NoBind) {
430 CPU_SET(bind_id_.front(), &mask);
431 } else {
432 for (int id : bind_id_) {
433 CPU_SET(id, &mask);
434 }
435 }
436 return SetAffinity(pthread_self(), &mask);
437 #else
438 return THREAD_OK;
439 #endif // BIND_CORE
440 }
441
BindThreads(const std::vector<Worker * > & workers,BindMode bind_mode)442 int CoreAffinity::BindThreads(const std::vector<Worker *> &workers, BindMode bind_mode) {
443 if (bind_id_.empty()) {
444 int ret = InitBindCoreId(workers.size(), bind_mode);
445 if (ret != THREAD_OK) {
446 THREAD_ERROR("init bind id failed");
447 return THREAD_ERROR;
448 }
449 }
450 if (bind_mode == Power_NoBind) {
451 return FreeScheduleThreads(workers);
452 } else {
453 return BindThreadsToCoreList(workers);
454 }
455 }
456
BindThreads(const std::vector<Worker * > & workers,const std::vector<int> & core_list)457 int CoreAffinity::BindThreads(const std::vector<Worker *> &workers, const std::vector<int> &core_list) {
458 // the size of core_list doesn't have to be the same as the size of workers(thread_num)
459 bind_id_ = core_list;
460 return BindThreadsToCoreList(workers);
461 }
462 } // namespace mindspore
463