• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2018-2020 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/runtime/CPUUtils.h"
25 
26 #include "arm_compute/core/CPP/CPPTypes.h"
27 #include "arm_compute/core/Error.h"
28 #include "support/StringSupport.h"
29 
30 #include <algorithm>
31 #include <array>
32 #include <cstdlib>
33 #include <cstring>
34 #include <fstream>
35 #include <map>
36 
37 #ifndef BARE_METAL
38 /* C++ std::regex takes up a lot of space in the standalone builds */
39 #include <regex.h>
40 #include <thread>
41 #endif /* BARE_METAL */
42 
43 #if !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
44 #include <sys/auxv.h>
45 
46 /* Get HWCAP bits from asm/hwcap.h */
47 #include <asm/hwcap.h>
48 #endif /* !BARE_METAL */
49 
50 /* Make sure the bits we care about are defined, just in case asm/hwcap.h is
51  * out of date (or for bare metal mode) */
52 #ifndef HWCAP_ASIMDHP
53 #define HWCAP_ASIMDHP (1 << 10) // NOLINT
54 #endif                          /* HWCAP_ASIMDHP */
55 
56 #ifndef HWCAP_CPUID
57 #define HWCAP_CPUID (1 << 11) // NOLINT
58 #endif                        /* HWCAP_CPUID */
59 
60 #ifndef HWCAP_ASIMDDP
61 #define HWCAP_ASIMDDP (1 << 20) // NOLINT
62 #endif                          /* HWCAP_ASIMDDP */
63 
64 namespace
65 {
66 using namespace arm_compute;
67 
68 #if !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
69 
model_supports_dot(CPUModel model)70 bool model_supports_dot(CPUModel model)
71 {
72     switch(model)
73     {
74         case CPUModel::GENERIC_FP16_DOT:
75         case CPUModel::A55r1:
76         case CPUModel::X1:
77             return true;
78         default:
79             return false;
80     }
81 }
82 
model_supports_fp16(CPUModel model)83 bool model_supports_fp16(CPUModel model)
84 {
85     switch(model)
86     {
87         case CPUModel::GENERIC_FP16:
88         case CPUModel::GENERIC_FP16_DOT:
89         case CPUModel::A55r1:
90         case CPUModel::X1:
91             return true;
92         default:
93             return false;
94     }
95 }
96 
97 /* Convert an MIDR register value to a CPUModel enum value. */
midr_to_model(const unsigned int midr)98 CPUModel midr_to_model(const unsigned int midr)
99 {
100     CPUModel model = CPUModel::GENERIC;
101 
102     // Unpack variant and CPU ID
103     const int implementer = (midr >> 24) & 0xFF;
104     const int variant     = (midr >> 20) & 0xF;
105     const int cpunum      = (midr >> 4) & 0xFFF;
106 
107     if(implementer == 0x41) // Arm CPUs
108     {
109         // Only CPUs we have code paths for are detected.  All other CPUs can be safely classed as "GENERIC"
110         switch(cpunum)
111         {
112             case 0xd03: // A53
113             case 0xd04: // A35
114                 model = CPUModel::A53;
115                 break;
116             case 0xd05: // A55
117                 if(variant != 0)
118                 {
119                     model = CPUModel::A55r1;
120                 }
121                 else
122                 {
123                     model = CPUModel::A55r0;
124                 }
125                 break;
126             case 0xd44: // X1
127                 model = CPUModel::X1;
128                 break;
129             case 0xd09: // A73
130                 model = CPUModel::A73;
131                 break;
132             case 0xd0a: // A75
133                 if(variant != 0)
134                 {
135                     model = CPUModel::GENERIC_FP16_DOT;
136                 }
137                 else
138                 {
139                     model = CPUModel::GENERIC_FP16;
140                 }
141                 break;
142             case 0xd0b: // A76
143             case 0xd06:
144             case 0xd0c:
145             case 0xd0d:
146                 model = CPUModel::GENERIC_FP16_DOT;
147                 break;
148             default:
149                 model = CPUModel::GENERIC;
150                 break;
151         }
152     }
153     else if(implementer == 0x48)
154     {
155         // Only CPUs we have code paths for are detected.  All other CPUs can be safely classed as "GENERIC"
156         switch(cpunum)
157         {
158             case 0xd40: // A76
159                 model = CPUModel::GENERIC_FP16_DOT;
160                 break;
161             default:
162                 model = CPUModel::GENERIC;
163                 break;
164         }
165     }
166 
167     return model;
168 }
169 
populate_models_cpuid(std::vector<CPUModel> & cpusv)170 void populate_models_cpuid(std::vector<CPUModel> &cpusv)
171 {
172     // If the CPUID capability is present, MIDR information is provided in /sys. Use that to populate the CPU model table.
173     uint32_t i = 0;
174     for(auto &c : cpusv)
175     {
176         std::stringstream str;
177         str << "/sys/devices/system/cpu/cpu" << i++ << "/regs/identification/midr_el1";
178         std::ifstream file;
179         file.open(str.str(), std::ios::in);
180         if(file.is_open())
181         {
182             std::string line;
183             if(bool(getline(file, line)))
184             {
185                 const uint32_t midr = support::cpp11::stoul(line, nullptr, support::cpp11::NumericBase::BASE_16);
186                 c                   = midr_to_model(midr & 0xffffffff);
187             }
188         }
189     }
190 }
191 
populate_models_cpuinfo(std::vector<CPUModel> & cpusv)192 void populate_models_cpuinfo(std::vector<CPUModel> &cpusv)
193 {
194     regex_t proc_regex;
195     regex_t imp_regex;
196     regex_t var_regex;
197     regex_t part_regex;
198     regex_t rev_regex;
199 
200     memset(&proc_regex, 0, sizeof(regex_t));
201     memset(&imp_regex, 0, sizeof(regex_t));
202     memset(&var_regex, 0, sizeof(regex_t));
203     memset(&part_regex, 0, sizeof(regex_t));
204     memset(&rev_regex, 0, sizeof(regex_t));
205 
206     int ret_status = 0;
207     // If "long-form" cpuinfo is present, parse that to populate models.
208     ret_status |= regcomp(&proc_regex, R"(^processor.*([[:digit:]]+)$)", REG_EXTENDED);
209     ret_status |= regcomp(&imp_regex, R"(^CPU implementer.*0x(..)$)", REG_EXTENDED);
210     ret_status |= regcomp(&var_regex, R"(^CPU variant.*0x(.)$)", REG_EXTENDED);
211     ret_status |= regcomp(&part_regex, R"(^CPU part.*0x(...)$)", REG_EXTENDED);
212     ret_status |= regcomp(&rev_regex, R"(^CPU revision.*([[:digit:]]+)$)", REG_EXTENDED);
213     ARM_COMPUTE_UNUSED(ret_status);
214     ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed.");
215 
216     std::ifstream file;
217     file.open("/proc/cpuinfo", std::ios::in);
218 
219     if(file.is_open())
220     {
221         std::string line;
222         int         midr   = 0;
223         int         curcpu = -1;
224 
225         while(bool(getline(file, line)))
226         {
227             std::array<regmatch_t, 2> match;
228             ret_status = regexec(&proc_regex, line.c_str(), 2, match.data(), 0);
229             if(ret_status == 0)
230             {
231                 std::string id     = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
232                 int         newcpu = support::cpp11::stoi(id, nullptr);
233 
234                 if(curcpu >= 0 && midr == 0)
235                 {
236                     // Matched a new CPU ID without any description of the previous one - looks like old format.
237                     return;
238                 }
239 
240                 if(curcpu >= 0)
241                 {
242                     cpusv[curcpu] = midr_to_model(midr);
243                 }
244 
245                 midr   = 0;
246                 curcpu = newcpu;
247 
248                 continue;
249             }
250 
251             ret_status = regexec(&imp_regex, line.c_str(), 2, match.data(), 0);
252             if(ret_status == 0)
253             {
254                 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
255                 int         impv   = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
256                 midr |= (impv << 24);
257 
258                 continue;
259             }
260 
261             ret_status = regexec(&var_regex, line.c_str(), 2, match.data(), 0);
262             if(ret_status == 0)
263             {
264                 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
265                 int         varv   = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
266                 midr |= (varv << 20);
267 
268                 continue;
269             }
270 
271             ret_status = regexec(&part_regex, line.c_str(), 2, match.data(), 0);
272             if(ret_status == 0)
273             {
274                 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
275                 int         partv  = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
276                 midr |= (partv << 4);
277 
278                 continue;
279             }
280 
281             ret_status = regexec(&rev_regex, line.c_str(), 2, match.data(), 0);
282             if(ret_status == 0)
283             {
284                 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
285                 int         regv   = support::cpp11::stoi(subexp, nullptr);
286                 midr |= (regv);
287                 midr |= (0xf << 16);
288 
289                 continue;
290             }
291         }
292 
293         if(curcpu >= 0)
294         {
295             cpusv[curcpu] = midr_to_model(midr);
296         }
297     }
298 
299     // Free allocated memory
300     regfree(&proc_regex);
301     regfree(&imp_regex);
302     regfree(&var_regex);
303     regfree(&part_regex);
304     regfree(&rev_regex);
305 }
306 
get_max_cpus()307 int get_max_cpus()
308 {
309     int           max_cpus = 1;
310     std::ifstream CPUspresent;
311     CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in);
312     bool success = false;
313 
314     if(CPUspresent.is_open())
315     {
316         std::string line;
317 
318         if(bool(getline(CPUspresent, line)))
319         {
320             /* The content of this file is a list of ranges or single values, e.g.
321                  * 0-5, or 1-3,5,7 or similar.  As we are interested in the
322                  * max valid ID, we just need to find the last valid
323                  * delimiter ('-' or ',') and parse the integer immediately after that.
324                  */
325             auto startfrom = line.begin();
326 
327             for(auto i = line.begin(); i < line.end(); ++i)
328             {
329                 if(*i == '-' || *i == ',')
330                 {
331                     startfrom = i + 1;
332                 }
333             }
334 
335             line.erase(line.begin(), startfrom);
336 
337             max_cpus = support::cpp11::stoi(line, nullptr) + 1;
338             success  = true;
339         }
340     }
341 
342     // Return std::thread::hardware_concurrency() as a fallback.
343     if(!success)
344     {
345         max_cpus = std::thread::hardware_concurrency();
346     }
347     return max_cpus;
348 }
349 #endif /* !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__)) */
350 
351 } // namespace
352 
353 namespace arm_compute
354 {
355 namespace utils
356 {
357 namespace cpu
358 {
get_cpu_configuration(CPUInfo & cpuinfo)359 void get_cpu_configuration(CPUInfo &cpuinfo)
360 {
361 #if !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__))
362     bool cpuid               = false;
363     bool hwcaps_fp16_support = false;
364     bool hwcaps_dot_support  = false;
365 
366     const uint32_t hwcaps = getauxval(AT_HWCAP);
367 
368     if((hwcaps & HWCAP_CPUID) != 0)
369     {
370         cpuid = true;
371     }
372 
373     if((hwcaps & HWCAP_ASIMDHP) != 0)
374     {
375         hwcaps_fp16_support = true;
376     }
377 
378 #if defined(__aarch64__)
379     if((hwcaps & HWCAP_ASIMDDP) != 0)
380     {
381         hwcaps_dot_support = true;
382     }
383 #endif /* defined(__aarch64__) */
384 
385     const unsigned int max_cpus = get_max_cpus();
386     cpuinfo.set_cpu_num(max_cpus);
387     std::vector<CPUModel> percpu(max_cpus, CPUModel::GENERIC);
388     if(cpuid)
389     {
390         populate_models_cpuid(percpu);
391     }
392     else
393     {
394         populate_models_cpuinfo(percpu);
395     }
396     int j(0);
397     // Update dot product and FP16 support if one of the CPUs support these features
398     // We assume that the system does not have mixed architectures
399     bool one_supports_dot  = false;
400     bool one_supports_fp16 = false;
401     for(const auto &v : percpu)
402     {
403         one_supports_dot  = one_supports_dot || model_supports_dot(v);
404         one_supports_fp16 = one_supports_fp16 || model_supports_fp16(v);
405         cpuinfo.set_cpu_model(j++, v);
406     }
407     cpuinfo.set_dotprod(one_supports_dot || hwcaps_dot_support);
408     cpuinfo.set_fp16(one_supports_fp16 || hwcaps_fp16_support);
409 #else  /* !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__)) */
410     ARM_COMPUTE_UNUSED(cpuinfo);
411 #endif /* !defined(BARE_METAL) && (defined(__arm__) || defined(__aarch64__)) */
412 }
413 
get_threads_hint()414 unsigned int get_threads_hint()
415 {
416     unsigned int num_threads_hint = 1;
417 
418 #if !defined(BARE_METAL)
419     std::map<std::string, unsigned int> cpu_part_occurrence_map;
420 
421     // CPU part regex
422     regex_t cpu_part_rgx;
423     memset(&cpu_part_rgx, 0, sizeof(regex_t));
424     int ret_status = regcomp(&cpu_part_rgx, R"(.*CPU part.+/?\:[[:space:]]+([[:alnum:]]+).*)", REG_EXTENDED);
425     ARM_COMPUTE_UNUSED(ret_status);
426     ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed.");
427 
428     // Read cpuinfo and get occurrence of each core
429     std::ifstream cpuinfo;
430     cpuinfo.open("/proc/cpuinfo", std::ios::in);
431     if(cpuinfo.is_open())
432     {
433         std::string line;
434         while(bool(getline(cpuinfo, line)))
435         {
436             std::array<regmatch_t, 2> match;
437             ret_status = regexec(&cpu_part_rgx, line.c_str(), 2, match.data(), 0);
438             if(ret_status == 0)
439             {
440                 std::string cpu_part = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
441                 if(cpu_part_occurrence_map.find(cpu_part) != cpu_part_occurrence_map.end())
442                 {
443                     cpu_part_occurrence_map[cpu_part]++;
444                 }
445                 else
446                 {
447                     cpu_part_occurrence_map[cpu_part] = 1;
448                 }
449             }
450         }
451     }
452     regfree(&cpu_part_rgx);
453 
454     // Get min number of threads
455     auto min_common_cores = std::min_element(cpu_part_occurrence_map.begin(), cpu_part_occurrence_map.end(),
456                                              [](const std::pair<std::string, unsigned int> &p1, const std::pair<std::string, unsigned int> &p2)
457     {
458         return p1.second < p2.second;
459     });
460 
461     // Set thread hint
462     num_threads_hint = cpu_part_occurrence_map.empty() ? std::thread::hardware_concurrency() : min_common_cores->second;
463 #endif /* !defined(BARE_METAL) */
464 
465     return num_threads_hint;
466 }
467 } // namespace cpu
468 } // namespace utils
469 } // namespace arm_compute
470