• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "thread/core_affinity.h"
18 #include <string.h>
19 #include <stdlib.h>
20 #include <string>
21 #include <algorithm>
22 #ifdef MS_COMPILE_IOS
23 #include <sys/types.h>
24 #include <sys/sysctl.h>
25 #include <mach/machine.h>
26 #endif  // MS_COMPILE_IOS
27 #include "thread/threadpool.h"
28 
29 namespace mindspore {
30 
31 enum Arch {
32   UnKnown_Arch = 0,
33   Cortex_A5,
34   Cortex_A7,
35   Cortex_A8,
36   Cortex_A9,
37   Cortex_A12,
38   Cortex_A15,
39   Cortex_A17,
40   Cortex_A32,
41   Cortex_A34,
42   Cortex_A35,
43   Cortex_A53,
44   Cortex_A55,
45   Cortex_A57,
46   Cortex_A65,
47   Cortex_A72,
48   Cortex_A73,
49   Cortex_A75,
50   Cortex_A76,
51   Cortex_A77,
52   Cortex_A78,
53   Cortex_X1
54 };
55 
56 typedef struct {
57   int core_id;
58   int max_freq;
59   enum Arch arch;
60 } CpuInfo;
61 
GetArch(int cpu_part)62 enum Arch GetArch(int cpu_part) {
63   typedef struct {
64     int part;
65     enum Arch arch;
66   } ArchSet;
67   // https://en.wikipedia.org/wiki/Comparison_of_ARMv7-A_cores
68   // https://en.wikipedia.org/wiki/Comparison_of_ARMv8-A_cores
69   std::vector<ArchSet> arch_set = {
70     {0x800, Cortex_A73},  // High-performance Kryo 260 (r10p2) / Kryo 280 (r10p1) "Gold" -> Cortex-A73
71     {0x801, Cortex_A53},  // Low-power Kryo 260 / 280 "Silver" -> Cortex-A53
72     {0x802, Cortex_A75},  // High-performance Kryo 385 "Gold" -> Cortex-A75
73     {0x803, Cortex_A55},  // Low-power Kryo 385 "Silver" -> Cortex-A55r0
74     {0x804, Cortex_A76},  // High-performance Kryo 485 "Gold" / "Gold Prime" -> Cortex-A76
75     {0x805, Cortex_A55},  // Low-performance Kryo 485 "Silver" -> Cortex-A55
76     {0xC05, Cortex_A5},
77     {0xC07, Cortex_A7},
78     {0xC08, Cortex_A8},
79     {0xC09, Cortex_A9},
80     {0xC0C, Cortex_A12},
81     {0xC0D, Cortex_A12},
82     {0xC0E, Cortex_A17},
83     {0xC0F, Cortex_A15},
84     {0xD01, Cortex_A32},  // also Huawei Kunpeng 920
85                           // series taishan_v110 when not
86                           // on android
87     {0xD02, Cortex_A34},
88     {0xD03, Cortex_A53},
89     {0xD04, Cortex_A35},
90     {0xD05, Cortex_A55},
91     {0xD06, Cortex_A65},
92     {0xD07, Cortex_A57},
93     {0xD08, Cortex_A72},
94     {0xD09, Cortex_A73},
95     {0xD0A, Cortex_A75},
96     {0xD0B, Cortex_A76},
97     {0xD0D, Cortex_A77},
98     {0xD0E, Cortex_A76},  // Cortex-A76AE
99     {0xD40, Cortex_A76},  // Kirin 980 Big/Medium cores -> Cortex-A76
100     {0xD41, Cortex_A78},
101     {0xD43, Cortex_A65},  // Cortex-A65AE
102     {0xD44, Cortex_X1}};
103   auto item =
104     std::find_if(arch_set.begin(), arch_set.end(), [&cpu_part](const ArchSet &a) { return a.part == cpu_part; });
105   return item != arch_set.end() ? item->arch : UnKnown_Arch;
106 }
107 
ParseCpuPart(const char * line,int start,int size)108 int ParseCpuPart(const char *line, int start, int size) {
109   int cpu_part = 0;
110   for (int i = start; i < size && i < start + 3; i++) {
111     char c = line[i];
112     int d;
113     if (c >= '0' && c <= '9') {
114       d = c - '0';
115     } else if ((c - 'A') < 6) {
116       d = 10 + (c - 'A');
117     } else if ((c - 'a') < 6) {
118       d = 10 + (c - 'a');
119     } else {
120       THREAD_ERROR("CPU part in /proc/cpuinfo is ignored due to unexpected non-hex character");
121       break;
122     }
123     cpu_part = cpu_part * 16 + d;
124   }
125   return cpu_part;
126 }
127 
SetArch(std::vector<CpuInfo> * freq_set,int core_num)128 int SetArch(std::vector<CpuInfo> *freq_set, int core_num) {
129   if (core_num <= 0) {
130     THREAD_ERROR("core_num must be greater than 0.");
131     return THREAD_ERROR;
132   }
133   FILE *fp = fopen("/proc/cpuinfo", "r");
134   if (fp == nullptr) {
135     THREAD_ERROR("read /proc/cpuinfo error.");
136     return THREAD_ERROR;
137   }
138   std::vector<Arch> archs;
139   archs.resize(core_num);
140   const int max_line_size = 1024;
141   char line[max_line_size] = {0};
142   int count = 0;
143   while (!feof(fp)) {
144     if (fgets(line, max_line_size, fp)) {
145       // line start with "CPU part"
146       if (0 == memcmp(line, "CPU part", 8)) {
147         // get number like 0xD03
148         for (int i = 0; i < max_line_size - 4; ++i) {
149           if (line[i] == '0' && line[i + 1] == 'x') {
150             int cpu_part = ParseCpuPart(line, i + 2, max_line_size);
151             enum Arch arch = GetArch(cpu_part);
152             if (arch == UnKnown_Arch) {
153               THREAD_ERROR("cpu's architecture is unknown.");
154               (void)fclose(fp);
155               return THREAD_ERROR;
156             }
157             count++;
158             if (count > core_num) {
159               THREAD_ERROR("number of cpu_part in /proc/cpuinfo is more than core_num.");
160               (void)fclose(fp);
161               return THREAD_ERROR;
162             }
163             archs[count - 1] = arch;
164           }
165         }
166       }
167     }
168   }
169   if (count < core_num) {
170     THREAD_ERROR("number of cpu_part in /proc/cpuinfo is less than core_num.");
171     (void)fclose(fp);
172     return THREAD_ERROR;
173   }
174   for (int i = 0; i < core_num; ++i) {
175     (*freq_set)[i].arch = archs[i];
176   }
177   (void)fclose(fp);
178   return THREAD_OK;
179 }
180 
GetMaxFrequency(int core_id)181 int GetMaxFrequency(int core_id) {
182   FILE *fp;
183   std::vector<std::string> paths = {"/sys/devices/system/cpu/cpufreq/stats/cpu",
184                                     "/sys/devices/system/cpu/cpufreq/stats/cpu", "/sys/devices/system/cpu/cpu"};
185   std::vector<std::string> files = {"/time_in_state", "/cpufreq/stats/time_in_state", "/cpufreq/cpuinfo_max_freq"};
186   for (size_t i = 0; i < paths.size(); ++i) {
187     std::string file = paths[i] + std::to_string(core_id) + files[i];
188     fp = fopen(file.c_str(), "rb");
189     if (fp != nullptr) {
190       break;
191     }
192   }
193   int max_freq = -1;
194   if (fp == nullptr) {
195     THREAD_ERROR("open system file failed");
196     return max_freq;
197   }
198   while (feof(fp) == 0) {
199     int freq = 0;
200     int tmp = fscanf(fp, "%d", &freq);
201     if (tmp != 1) {
202       break;
203     }
204     if (freq > max_freq) {
205       max_freq = freq;
206     }
207   }
208   (void)fclose(fp);
209   return max_freq;
210 }
211 
InitHardwareCoreInfo()212 int CoreAffinity::InitHardwareCoreInfo() {
213   core_num_ = std::thread::hardware_concurrency();
214   std::vector<CpuInfo> freq_set;
215   freq_set.resize(core_num_);
216   core_freq_.resize(core_num_);
217   for (size_t i = 0; i < core_num_; ++i) {
218     int max_freq = GetMaxFrequency(i);
219     core_freq_[i] = max_freq;
220     freq_set[i].core_id = i;
221     freq_set[i].max_freq = max_freq;
222     freq_set[i].arch = UnKnown_Arch;
223   }
224   int err_code = SetArch(&freq_set, core_num_);
225   if (err_code != THREAD_OK) {
226     THREAD_INFO("set arch failed, ignoring arch.");
227   }
228   // sort core id by frequency into descending order
229   for (size_t i = 0; i < core_num_; ++i) {
230     for (size_t j = i + 1; j < core_num_; ++j) {
231       if (freq_set[i].max_freq < freq_set[j].max_freq ||
232           (freq_set[i].max_freq == freq_set[j].max_freq && freq_set[i].arch <= freq_set[j].arch)) {
233         CpuInfo temp = freq_set[i];
234         freq_set[i] = freq_set[j];
235         freq_set[j] = temp;
236       }
237     }
238   }
239   higher_num_ = 0;
240   sorted_id_.clear();
241   int max_freq = freq_set.front().max_freq;
242   for (const auto &info : freq_set) {
243     THREAD_INFO("sorted core id: %d, max frequency: %d, arch: %d", info.core_id, info.max_freq, info.arch);
244     sorted_id_.push_back(info.core_id);
245     higher_num_ += info.max_freq == max_freq ? 1 : 0;
246   }
247   return THREAD_OK;
248 }
249 
GetCoreId(size_t thread_num,BindMode bind_mode)250 std::vector<int> CoreAffinity::GetCoreId(size_t thread_num, BindMode bind_mode) {
251   std::vector<int> bind_id;
252   if (core_num_ != sorted_id_.size()) {
253     THREAD_ERROR("init sorted core id failed");
254     return bind_id;
255   }
256   if (bind_mode == Power_Higher || bind_mode == Power_NoBind) {
257     for (size_t i = 0; i < thread_num; ++i) {
258       bind_id.push_back(sorted_id_[i % core_num_]);
259     }
260   } else if (bind_mode == Power_Middle) {
261     for (size_t i = 0; i < thread_num; ++i) {
262       bind_id.push_back(sorted_id_[(i + higher_num_) % core_num_]);
263     }
264   } else {
265     return bind_id;
266   }
267   return bind_id;
268 }
SetCoreId(const std::vector<int> & core_list)269 void CoreAffinity::SetCoreId(const std::vector<int> &core_list) { bind_id_ = core_list; }
270 
InitBindCoreId(size_t thread_num,BindMode bind_mode)271 int CoreAffinity::InitBindCoreId(size_t thread_num, BindMode bind_mode) {
272   bind_id_.clear();
273   bind_id_ = GetCoreId(thread_num, bind_mode);
274   if (bind_id_.empty()) {
275     return THREAD_ERROR;
276   }
277   return THREAD_OK;
278 }
279 
280 #ifdef BIND_CORE
SetAffinity(const pthread_t & thread_id,cpu_set_t * cpu_set) const281 int CoreAffinity::SetAffinity(const pthread_t &thread_id, cpu_set_t *cpu_set) const {
282 #ifdef __ANDROID__
283 #if __ANDROID_API__ >= 21
284   THREAD_INFO("thread: %d, mask: %lu", pthread_gettid_np(thread_id), cpu_set->__bits[0]);
285   int ret = sched_setaffinity(pthread_gettid_np(thread_id), sizeof(cpu_set_t), cpu_set);
286   if (ret != THREAD_OK) {
287     THREAD_ERROR("bind thread %d to cpu failed. ERROR %d", pthread_gettid_np(thread_id), ret);
288     return THREAD_ERROR;
289   }
290 #endif
291 #else
292 #if defined(__APPLE__)
293   THREAD_ERROR("not bind thread to apple's cpu.");
294   return THREAD_ERROR;
295 #else
296   int ret = pthread_setaffinity_np(thread_id, sizeof(cpu_set_t), cpu_set);
297   if (ret != THREAD_OK) {
298     THREAD_ERROR("set thread: %lu to cpu failed", thread_id);
299     return THREAD_ERROR;
300   }
301 #endif  // __APPLE__
302 #endif
303   return THREAD_OK;
304 }
305 #endif  // BIND_CORE
306 
FreeScheduleThreads(const std::vector<Worker * > & workers) const307 int CoreAffinity::FreeScheduleThreads(const std::vector<Worker *> &workers) const {
308 #ifdef BIND_CORE
309   cpu_set_t mask;
310   CPU_ZERO(&mask);
311   for (int i : bind_id_) {
312     CPU_SET(i, &mask);
313   }
314   for (auto worker : workers) {
315     int ret = SetAffinity(worker->handle(), &mask);
316     if (ret != THREAD_OK) {
317       return THREAD_ERROR;
318     }
319   }
320 #endif  // BIND_CORE
321   return THREAD_OK;
322 }
323 
BindThreadsToCoreList(const std::vector<Worker * > & workers) const324 int CoreAffinity::BindThreadsToCoreList(const std::vector<Worker *> &workers) const {
325 #ifdef BIND_CORE
326   if (bind_id_.empty()) {
327     THREAD_ERROR("bind id is empty");
328     return THREAD_ERROR;
329   }
330   size_t window = bind_id_.size();
331   size_t thread_num = workers.size();
332   for (size_t i = 0; i < thread_num; ++i) {
333     cpu_set_t mask;
334     CPU_ZERO(&mask);
335     CPU_SET(bind_id_[i % window], &mask);
336     // affinity mask determines the CPU core which it is eligible to run
337     int ret = SetAffinity(workers[i]->handle(), &mask);
338     if (ret != THREAD_OK) {
339       return THREAD_ERROR;
340     }
341     THREAD_INFO("set thread[%zu] affinity to core[%d] success", i, bind_id_[i % window]);
342     workers[i]->set_frequency(core_freq_[bind_id_[i]]);
343   }
344 #endif  // BIND_CORE
345   return THREAD_OK;
346 }
347 
BindProcess(BindMode bind_mode) const348 int CoreAffinity::BindProcess(BindMode bind_mode) const {
349 #ifdef BIND_CORE
350   if (bind_id_.empty()) {
351     // initializes bind id before bind currently process
352     THREAD_ERROR("bind id is empty");
353     return THREAD_ERROR;
354   }
355   cpu_set_t mask;
356   CPU_ZERO(&mask);
357   if (bind_mode != Power_NoBind) {
358     CPU_SET(bind_id_.front(), &mask);
359   } else {
360     for (int id : bind_id_) {
361       CPU_SET(id, &mask);
362     }
363   }
364   return SetAffinity(pthread_self(), &mask);
365 #else
366   return THREAD_OK;
367 #endif  // BIND_CORE
368 }
369 
BindThreads(const std::vector<Worker * > & workers,BindMode bind_mode)370 int CoreAffinity::BindThreads(const std::vector<Worker *> &workers, BindMode bind_mode) {
371   if (bind_id_.empty()) {
372     int ret = InitBindCoreId(workers.size(), bind_mode);
373     if (ret != THREAD_OK) {
374       THREAD_ERROR("init bind id failed");
375       return THREAD_ERROR;
376     }
377   }
378   if (bind_mode == Power_NoBind) {
379     return FreeScheduleThreads(workers);
380   } else {
381     return BindThreadsToCoreList(workers);
382   }
383 }
384 
BindThreads(const std::vector<Worker * > & workers,const std::vector<int> & core_list)385 int CoreAffinity::BindThreads(const std::vector<Worker *> &workers, const std::vector<int> &core_list) {
386   // the size of core_list doesn't have to be the same as the size of workers(thread_num)
387   bind_id_ = core_list;
388   return BindThreadsToCoreList(workers);
389 }
390 }  // namespace mindspore
391