• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021-2023 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "thread/core_affinity.h"
18 #include <string.h>
19 #include <cstdlib>
20 #include <string>
21 #include <algorithm>
22 #ifdef MS_COMPILE_IOS
23 #include <sys/types.h>
24 #include <sys/sysctl.h>
25 #include <mach/machine.h>
26 #endif  // MS_COMPILE_IOS
27 #include "thread/threadpool.h"
28 #ifdef _WIN32
29 #include <windows.h>
30 #endif
31 
32 namespace mindspore {
33 #ifdef _WIN32
34 std::vector<DWORD_PTR> WindowsCoreList;
35 #endif
36 
37 enum Arch {
38   UnKnown_Arch = 0,
39   Cortex_A5,
40   Cortex_A7,
41   Cortex_A8,
42   Cortex_A9,
43   Cortex_A12,
44   Cortex_A15,
45   Cortex_A17,
46   Cortex_A32,
47   Cortex_A34,
48   Cortex_A35,
49   Cortex_A53,
50   Cortex_A55,
51   Cortex_A57,
52   Cortex_A65,
53   Cortex_A72,
54   Cortex_A73,
55   Cortex_A75,
56   Cortex_A76,
57   Cortex_A77,
58   Cortex_A78,
59   Cortex_X1
60 };
61 
62 typedef struct {
63   int core_id;
64   int max_freq;
65   enum Arch arch;
66 } CpuInfo;
67 
GetArch(int cpu_part)68 enum Arch GetArch(int cpu_part) {
69   typedef struct {
70     int part;
71     enum Arch arch;
72   } ArchSet;
73   // https://en.wikipedia.org/wiki/Comparison_of_ARMv7-A_cores
74   // https://en.wikipedia.org/wiki/Comparison_of_ARMv8-A_cores
75   std::vector<ArchSet> arch_set = {
76     {0x800, Cortex_A73},  // High-performance Kryo 260 (r10p2) / Kryo 280 (r10p1) "Gold" -> Cortex-A73
77     {0x801, Cortex_A53},  // Low-power Kryo 260 / 280 "Silver" -> Cortex-A53
78     {0x802, Cortex_A75},  // High-performance Kryo 385 "Gold" -> Cortex-A75
79     {0x803, Cortex_A55},  // Low-power Kryo 385 "Silver" -> Cortex-A55r0
80     {0x804, Cortex_A76},  // High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76
81     {0x805, Cortex_A55},  // Low-performance Kryo 485 "Silver" -> Cortex-A55
82     {0xC05, Cortex_A5},
83     {0xC07, Cortex_A7},
84     {0xC08, Cortex_A8},
85     {0xC09, Cortex_A9},
86     {0xC0C, Cortex_A12},
87     {0xC0D, Cortex_A12},
88     {0xC0E, Cortex_A17},
89     {0xC0F, Cortex_A15},
90     {0xD01, Cortex_A32},  // also Huawei Kunpeng 920
91                           // series taishan_v110 when not
92                           // on android
93     {0xD02, Cortex_A34},
94     {0xD03, Cortex_A53},
95     {0xD04, Cortex_A35},
96     {0xD05, Cortex_A55},
97     {0xD06, Cortex_A65},
98     {0xD07, Cortex_A57},
99     {0xD08, Cortex_A72},
100     {0xD09, Cortex_A73},
101     {0xD0A, Cortex_A75},
102     {0xD0B, Cortex_A76},
103     {0xD0D, Cortex_A77},
104     {0xD0E, Cortex_A76},  // Cortex-A76AE
105     {0xD40, Cortex_A76},  // Kirin 980 Big/Medium cores -> Cortex-A76
106     {0xD41, Cortex_A78},
107     {0xD43, Cortex_A65},  // Cortex-A65AE
108     {0xD44, Cortex_X1}};
109   auto item =
110     std::find_if(arch_set.begin(), arch_set.end(), [&cpu_part](const ArchSet &a) { return a.part == cpu_part; });
111   return item != arch_set.end() ? item->arch : UnKnown_Arch;
112 }
113 
ParseCpuPart(const char * line,int start,int size)114 int ParseCpuPart(const char *line, int start, int size) {
115   int cpu_part = 0;
116   for (int i = start; i < size && i < start + PARSE_CPU_GAP; i++) {
117     char c = line[i];
118     int d;
119     if (c >= '0' && c <= '9') {
120       d = c - '0';
121     } else if ((c - 'A') < (PARSE_CPU_HEX - PARSE_CPU_DEC)) {
122       d = PARSE_CPU_DEC + (c - 'A');
123     } else if ((c - 'a') < (PARSE_CPU_HEX - PARSE_CPU_DEC)) {
124       d = PARSE_CPU_DEC + (c - 'a');
125     } else {
126       THREAD_ERROR("CPU part in /proc/cpuinfo is ignored due to unexpected non-hex character");
127       break;
128     }
129     cpu_part = cpu_part * PARSE_CPU_HEX + d;
130   }
131   return cpu_part;
132 }
133 
SetArch(std::vector<CpuInfo> * freq_set,int core_num)134 int SetArch(std::vector<CpuInfo> *freq_set, int core_num) {
135   if (core_num <= 0) {
136     THREAD_ERROR("core_num must be greater than 0.");
137     return THREAD_ERROR;
138   }
139   FILE *fp = fopen("/proc/cpuinfo", "r");
140   if (fp == nullptr) {
141     THREAD_ERROR("read /proc/cpuinfo error.");
142     return THREAD_ERROR;
143   }
144   std::vector<Arch> archs;
145   archs.resize(core_num);
146   const int max_line_size = 1024;
147   char line[max_line_size] = {0};
148   int count = 0;
149   while (!feof(fp)) {
150     if (fgets(line, max_line_size, fp)) {
151       // line start with "CPU part"
152       if (0 == memcmp(line, "CPU part", 8)) {
153         // get number like 0xD03
154         for (int i = 0; i < max_line_size - 4; ++i) {
155           if (line[i] == '0' && line[i + 1] == 'x') {
156             int cpu_part = ParseCpuPart(line, i + 2, max_line_size);
157             enum Arch arch = GetArch(cpu_part);
158             if (arch == UnKnown_Arch) {
159               THREAD_ERROR("cpu's architecture is unknown.");
160               (void)fclose(fp);
161               return THREAD_ERROR;
162             }
163             count++;
164             if (count > core_num) {
165               THREAD_ERROR("number of cpu_part in /proc/cpuinfo is more than core_num.");
166               (void)fclose(fp);
167               return THREAD_ERROR;
168             }
169             archs[count - 1] = arch;
170           }
171         }
172       }
173     }
174   }
175   if (count < core_num) {
176     THREAD_ERROR("number of cpu_part in /proc/cpuinfo is less than core_num.");
177     (void)fclose(fp);
178     return THREAD_ERROR;
179   }
180   for (int i = 0; i < core_num; ++i) {
181     (*freq_set)[i].arch = archs[i];
182   }
183   (void)fclose(fp);
184   return THREAD_OK;
185 }
186 
GetMaxFrequency(int core_id)187 int GetMaxFrequency(int core_id) {
188   FILE *fp;
189   std::vector<std::string> paths = {"/sys/devices/system/cpu/cpufreq/stats/cpu",
190                                     "/sys/devices/system/cpu/cpufreq/stats/cpu", "/sys/devices/system/cpu/cpu"};
191   std::vector<std::string> files = {"/time_in_state", "/cpufreq/stats/time_in_state", "/cpufreq/cpuinfo_max_freq"};
192   for (size_t i = 0; i < paths.size(); ++i) {
193     std::string file = paths[i] + std::to_string(core_id) + files[i];
194     fp = fopen(file.c_str(), "rb");
195     if (fp != nullptr) {
196       break;
197     }
198   }
199   int max_freq = -1;
200   if (fp == nullptr) {
201     THREAD_ERROR("open system file failed");
202     return max_freq;
203   }
204   while (feof(fp) == 0) {
205     int freq = 0;
206     int tmp = fscanf(fp, "%d", &freq);
207     if (tmp != 1) {
208       break;
209     }
210     if (freq > max_freq) {
211       max_freq = freq;
212     }
213   }
214   (void)fclose(fp);
215   return max_freq;
216 }
217 
GetServerFrequency()218 float CoreAffinity::GetServerFrequency() {
219   float max_freq = -1.0f;
220 #if defined(__APPLE__) || defined(__MACOSX) || defined(_MSC_VER) || defined(_WIN32) || defined(ANDROID) || \
221   defined(__ANDROID__) || defined(MS_COMPILE_OHOS)
222   return max_freq;  // MHz
223 #else
224   // The CPU cores in the server of the numa architecture are the same.
225   // The main frequency of the first core is obtained.
226   FILE *fp = popen("cat /proc/cpuinfo|grep cpu\\ MHz | sed -e 's/.*:[^0-9]//'", "r");
227   if (fp == nullptr) {
228     THREAD_ERROR("get system cpuinfo frequency failed");
229     return max_freq;
230   }
231 
232   while (feof(fp) == 0) {
233     float freq = 0;
234     int tmp = fscanf(fp, "%f", &freq);
235     if (tmp != 1) {
236       break;
237     }
238     if (max_freq < freq) {
239       max_freq = freq;
240     }
241   }
242   (void)pclose(fp);
243   return max_freq;  // MHz
244 #endif
245 }
246 
247 #ifdef _WIN32
SetWindowsAffinity(HANDLE thread,DWORD_PTR mask)248 void SetWindowsAffinity(HANDLE thread, DWORD_PTR mask) {
249   THREAD_INFO("Bind thread[%ld] to core[%lld].", GetThreadId(thread), mask);
250   SetThreadAffinityMask(thread, mask);
251   return;
252 }
253 
SetWindowsSelfAffinity(uint64_t core_id)254 void SetWindowsSelfAffinity(uint64_t core_id) {
255   if (WindowsCoreList.size() <= core_id) {
256     return;
257   }
258   DWORD_PTR mask = WindowsCoreList[core_id];
259   SetWindowsAffinity(GetCurrentThread(), mask);
260   return;
261 }
262 #endif
263 
InitHardwareCoreInfo()264 int CoreAffinity::InitHardwareCoreInfo() {
265   core_num_ = std::thread::hardware_concurrency();
266 #ifdef _WIN32
267   WindowsCoreList.resize(core_num_);
268   for (size_t i = 0; i < core_num_; i++) {
269     WindowsCoreList[i] = 1 << i;
270   }
271 #endif
272   std::vector<CpuInfo> freq_set;
273   freq_set.resize(core_num_);
274   core_freq_.resize(core_num_);
275   for (size_t i = 0; i < core_num_; ++i) {
276     int max_freq = GetMaxFrequency(i);
277     core_freq_[i] = max_freq;
278     freq_set[i].core_id = i;
279     freq_set[i].max_freq = max_freq;
280     freq_set[i].arch = UnKnown_Arch;
281   }
282   int err_code = SetArch(&freq_set, core_num_);
283   if (err_code != THREAD_OK) {
284     THREAD_INFO("set arch failed, ignoring arch.");
285   }
286   // sort core id by frequency into descending order
287   for (size_t i = 0; i < core_num_; ++i) {
288     for (size_t j = i + 1; j < core_num_; ++j) {
289       if (freq_set[i].max_freq < freq_set[j].max_freq ||
290           (freq_set[i].max_freq == freq_set[j].max_freq && freq_set[i].arch <= freq_set[j].arch)) {
291         CpuInfo temp = freq_set[i];
292         freq_set[i] = freq_set[j];
293         freq_set[j] = temp;
294       }
295     }
296   }
297   higher_num_ = 0;
298   sorted_id_.clear();
299   int max_freq = freq_set.front().max_freq;
300   for (const auto &info : freq_set) {
301     THREAD_INFO("sorted core id: %d, max frequency: %d, arch: %d", info.core_id, info.max_freq, info.arch);
302     sorted_id_.push_back(info.core_id);
303     higher_num_ += info.max_freq == max_freq ? 1 : 0;
304   }
305   return THREAD_OK;
306 }
307 
GetCoreId(size_t thread_num,BindMode bind_mode) const308 std::vector<int> CoreAffinity::GetCoreId(size_t thread_num, BindMode bind_mode) const {
309   std::vector<int> bind_id;
310 #ifdef _WIN32
311   return bind_id;
312 #elif defined(BIND_CORE)
313   if (core_num_ != sorted_id_.size()) {
314     THREAD_ERROR("init sorted core id failed");
315     return bind_id;
316   }
317   if (bind_mode == Power_Higher) {
318     for (size_t i = 0; i < thread_num; ++i) {
319       bind_id.push_back(sorted_id_[i % core_num_]);
320     }
321   } else if (bind_mode == Power_Middle) {
322     for (size_t i = 0; i < thread_num; ++i) {
323       bind_id.push_back(sorted_id_[(i + higher_num_) % core_num_]);
324     }
325   } else {
326     return bind_id;
327   }
328 #endif
329   return bind_id;
330 }
SetCoreId(const std::vector<int> & core_list)331 void CoreAffinity::SetCoreId(const std::vector<int> &core_list) { bind_id_ = core_list; }
332 
InitBindCoreId(size_t thread_num,BindMode bind_mode)333 int CoreAffinity::InitBindCoreId(size_t thread_num, BindMode bind_mode) {
334 #ifndef _WIN32
335   bind_id_.clear();
336   bind_id_ = GetCoreId(thread_num, bind_mode);
337   if (bind_id_.empty()) {
338     return THREAD_ERROR;
339   }
340 #endif
341   return THREAD_OK;
342 }
343 
344 #ifdef _WIN32
SetAffinity()345 int CoreAffinity::SetAffinity() { return THREAD_OK; }
346 #elif defined(BIND_CORE)
SetAffinity(const pthread_t & thread_id,cpu_set_t * cpu_set)347 int CoreAffinity::SetAffinity(const pthread_t &thread_id, cpu_set_t *cpu_set) {
348 #if defined(__ANDROID__) || defined(MS_COMPILE_OHOS)
349 #if (__ANDROID_API__ >= 21) || defined(MS_COMPILE_OHOS)
350   THREAD_INFO("thread: %d, mask: %lu", pthread_gettid_np(thread_id), cpu_set->__bits[0]);
351   int ret = sched_setaffinity(pthread_gettid_np(thread_id), sizeof(cpu_set_t), cpu_set);
352   if (ret != THREAD_OK) {
353     THREAD_ERROR("bind thread %d to cpu failed. ERROR %d", pthread_gettid_np(thread_id), ret);
354     return THREAD_ERROR;
355   }
356 #endif
357 #else
358 #if defined(__APPLE__)
359   THREAD_ERROR("not bind thread to apple's cpu.");
360   return THREAD_ERROR;
361 #else
362   int ret = pthread_setaffinity_np(thread_id, sizeof(cpu_set_t), cpu_set);
363   if (ret != THREAD_OK) {
364     THREAD_ERROR("set thread: %lu to cpu failed", thread_id);
365     return THREAD_ERROR;
366   }
367 #endif  // __APPLE__
368 #endif
369   return THREAD_OK;
370 }
371 #endif
372 
FreeScheduleThreads(const std::vector<Worker * > & workers)373 int CoreAffinity::FreeScheduleThreads(const std::vector<Worker *> &workers) {
374 #ifdef _WIN32
375   return THREAD_OK;
376 #elif defined(BIND_CORE)
377   cpu_set_t mask;
378   CPU_ZERO(&mask);
379   for (int i : bind_id_) {
380     CPU_SET(i, &mask);
381   }
382   for (auto worker : workers) {
383     int ret = SetAffinity(worker->handle(), &mask);
384     if (ret != THREAD_OK) {
385       return THREAD_ERROR;
386     }
387   }
388 #endif  // BIND_CORE
389   return THREAD_OK;
390 }
391 
BindThreadsToCoreList(const std::vector<Worker * > & workers)392 int CoreAffinity::BindThreadsToCoreList(const std::vector<Worker *> &workers) {
393 #ifdef _WIN32
394   return THREAD_OK;
395 #elif defined(BIND_CORE)
396   if (bind_id_.empty()) {
397     THREAD_INFO("bind id is empty, it will not bind thread");
398     return THREAD_OK;
399   }
400   size_t window = bind_id_.size();
401   size_t thread_num = workers.size();
402   for (size_t i = 0; i < thread_num; ++i) {
403     cpu_set_t mask;
404     CPU_ZERO(&mask);
405     CPU_SET(bind_id_[i % window], &mask);
406     // affinity mask determines the CPU core which it is eligible to run
407     int ret = SetAffinity(workers[i]->handle(), &mask);
408     if (ret != THREAD_OK) {
409       return THREAD_ERROR;
410     }
411     THREAD_INFO("set thread[%zu] affinity to core[%d] success", i, bind_id_[i % window]);
412     workers[i]->set_frequency(core_freq_[bind_id_[i]]);
413   }
414 #endif  // BIND_CORE
415   return THREAD_OK;
416 }
417 
BindProcess(BindMode bind_mode)418 int CoreAffinity::BindProcess(BindMode bind_mode) {
419 #ifdef _WIN32
420   return THREAD_OK;
421 #elif defined(BIND_CORE)
422   if (bind_id_.empty()) {
423     // initializes bind id before bind currently process
424     THREAD_ERROR("bind id is empty");
425     return THREAD_ERROR;
426   }
427   cpu_set_t mask;
428   CPU_ZERO(&mask);
429   if (bind_mode != Power_NoBind) {
430     CPU_SET(bind_id_.front(), &mask);
431   } else {
432     for (int id : bind_id_) {
433       CPU_SET(id, &mask);
434     }
435   }
436   return SetAffinity(pthread_self(), &mask);
437 #else
438   return THREAD_OK;
439 #endif  // BIND_CORE
440 }
441 
BindThreads(const std::vector<Worker * > & workers,BindMode bind_mode)442 int CoreAffinity::BindThreads(const std::vector<Worker *> &workers, BindMode bind_mode) {
443   if (bind_id_.empty()) {
444     int ret = InitBindCoreId(workers.size(), bind_mode);
445     if (ret != THREAD_OK) {
446       THREAD_ERROR("init bind id failed");
447       return THREAD_ERROR;
448     }
449   }
450   if (bind_mode == Power_NoBind) {
451     return FreeScheduleThreads(workers);
452   } else {
453     return BindThreadsToCoreList(workers);
454   }
455 }
456 
BindThreads(const std::vector<Worker * > & workers,const std::vector<int> & core_list)457 int CoreAffinity::BindThreads(const std::vector<Worker *> &workers, const std::vector<int> &core_list) {
458   // the size of core_list doesn't have to be the same as the size of workers(thread_num)
459   bind_id_ = core_list;
460   return BindThreadsToCoreList(workers);
461 }
462 }  // namespace mindspore
463