• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2023 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #include "src/common/cpuinfo/CpuInfo.h"
25 
26 #include "arm_compute/core/Error.h"
27 #include "arm_compute/core/Log.h"
28 #include "support/StringSupport.h"
29 #include "support/ToolchainSupport.h"
30 
31 #include <sstream>
32 
33 #if !defined(BARE_METAL)
34 #include <algorithm>
35 #include <cstring>
36 #include <fstream>
37 #if !defined(_WIN64)
38 #include <regex.h> /* C++ std::regex takes up a lot of space in the standalone builds */
39 #include <sched.h>
40 #endif /* !defined(_WIN64) */
41 
42 #include <thread>
43 #include <unordered_map>
44 #endif /* !defined(BARE_METAL) */
45 
46 #if !defined(_WIN64)
47 #if !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__))
48 #include <asm/hwcap.h> /* Get HWCAP bits from asm/hwcap.h */
49 #include <sys/auxv.h>
50 #elif defined(__APPLE__) && defined(__aarch64__)
51 #include <sys/sysctl.h>
52 #include <sys/types.h>
53 #endif /* defined(__APPLE__) && defined(__aarch64__)) */
54 #endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
55 
56 #define ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID (1 << 11)
57 #define ARM_COMPUTE_GET_FEATURE_REG(var, freg) __asm __volatile("MRS %0, " #freg \
58                                                                 : "=r"(var))
59 namespace arm_compute
60 {
61 namespace cpuinfo
62 {
63 namespace
64 {
65 #if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__))
66 /** Extract MIDR using CPUID information that are exposed to user-space
67  *
68  * @param[in] max_num_cpus Maximum number of possible CPUs
69  *
70  * @return std::vector<uint32_t> A list of the MIDR of each core
71  */
midr_from_cpuid(uint32_t max_num_cpus)72 std::vector<uint32_t> midr_from_cpuid(uint32_t max_num_cpus)
73 {
74     std::vector<uint32_t> cpus;
75     for(unsigned int i = 0; i < max_num_cpus; ++i)
76     {
77         std::stringstream str;
78         str << "/sys/devices/system/cpu/cpu" << i << "/regs/identification/midr_el1";
79         std::ifstream file(str.str(), std::ios::in);
80         if(file.is_open())
81         {
82             std::string line;
83             if(bool(getline(file, line)))
84             {
85                 cpus.emplace_back(support::cpp11::stoul(line, nullptr, support::cpp11::NumericBase::BASE_16));
86             }
87         }
88     }
89     return cpus;
90 }
91 
92 /** Extract MIDR by parsing the /proc/cpuinfo meta-data
93  *
94  * @param[in] max_num_cpus Maximum number of possible CPUs
95  *
96  * @return std::vector<uint32_t> A list of the MIDR of each core
97  */
midr_from_proc_cpuinfo(int max_num_cpus)98 std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus)
99 {
100     std::vector<uint32_t> cpus;
101 
102     regex_t proc_regex;
103     regex_t imp_regex;
104     regex_t var_regex;
105     regex_t part_regex;
106     regex_t rev_regex;
107 
108     memset(&proc_regex, 0, sizeof(regex_t));
109     memset(&imp_regex, 0, sizeof(regex_t));
110     memset(&var_regex, 0, sizeof(regex_t));
111     memset(&part_regex, 0, sizeof(regex_t));
112     memset(&rev_regex, 0, sizeof(regex_t));
113 
114     int ret_status = 0;
115     // If "long-form" cpuinfo is present, parse that to populate models.
116     ret_status |= regcomp(&proc_regex, R"(^processor.*([[:digit:]]+)$)", REG_EXTENDED);
117     ret_status |= regcomp(&imp_regex, R"(^CPU implementer.*0x(..)$)", REG_EXTENDED);
118     ret_status |= regcomp(&var_regex, R"(^CPU variant.*0x(.)$)", REG_EXTENDED);
119     ret_status |= regcomp(&part_regex, R"(^CPU part.*0x(...)$)", REG_EXTENDED);
120     ret_status |= regcomp(&rev_regex, R"(^CPU revision.*([[:digit:]]+)$)", REG_EXTENDED);
121     ARM_COMPUTE_UNUSED(ret_status);
122     ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed.");
123 
124     std::ifstream file("/proc/cpuinfo", std::ios::in);
125     if(file.is_open())
126     {
127         std::string line;
128         int         midr   = 0;
129         int         curcpu = -1;
130 
131         while(bool(getline(file, line)))
132         {
133             std::array<regmatch_t, 2> match;
134             ret_status = regexec(&proc_regex, line.c_str(), 2, match.data(), 0);
135             if(ret_status == 0)
136             {
137                 std::string id     = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
138                 int         newcpu = support::cpp11::stoi(id, nullptr);
139 
140                 if(curcpu >= 0 && midr == 0)
141                 {
142                     // Matched a new CPU ID without any description of the previous one - looks like old format.
143                     return {};
144                 }
145 
146                 if(curcpu >= 0 && curcpu < max_num_cpus)
147                 {
148                     cpus.emplace_back(midr);
149                 }
150                 else
151                 {
152                     ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!");
153                 }
154 
155                 midr   = 0;
156                 curcpu = newcpu;
157 
158                 continue;
159             }
160 
161             ret_status = regexec(&imp_regex, line.c_str(), 2, match.data(), 0);
162             if(ret_status == 0)
163             {
164                 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
165                 int         impv   = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
166                 midr |= (impv << 24);
167 
168                 continue;
169             }
170 
171             ret_status = regexec(&var_regex, line.c_str(), 2, match.data(), 0);
172             if(ret_status == 0)
173             {
174                 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
175                 int         varv   = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
176                 midr |= (varv << 20);
177 
178                 continue;
179             }
180 
181             ret_status = regexec(&part_regex, line.c_str(), 2, match.data(), 0);
182             if(ret_status == 0)
183             {
184                 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
185                 int         partv  = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
186                 midr |= (partv << 4);
187 
188                 continue;
189             }
190 
191             ret_status = regexec(&rev_regex, line.c_str(), 2, match.data(), 0);
192             if(ret_status == 0)
193             {
194                 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
195                 int         regv   = support::cpp11::stoi(subexp, nullptr);
196                 midr |= (regv);
197                 midr |= (0xf << 16);
198 
199                 continue;
200             }
201         }
202 
203         if(curcpu >= 0 && curcpu < max_num_cpus)
204         {
205             cpus.emplace_back(midr);
206         }
207         else
208         {
209             ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!");
210         }
211     }
212 
213     // Free allocated memory
214     regfree(&proc_regex);
215     regfree(&imp_regex);
216     regfree(&var_regex);
217     regfree(&part_regex);
218     regfree(&rev_regex);
219 
220     return cpus;
221 }
222 
223 /** Get the maximim number of CPUs in the system by parsing /sys/devices/system/cpu/present
224  *
225  * @return int Maximum number of CPUs
226  */
get_max_cpus()227 int get_max_cpus()
228 {
229     int           max_cpus = 1;
230     std::ifstream CPUspresent;
231     CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in);
232     bool success = false;
233 
234     if(CPUspresent.is_open())
235     {
236         std::string line;
237 
238         if(bool(getline(CPUspresent, line)))
239         {
240             /* The content of this file is a list of ranges or single values, e.g.
241                  * 0-5, or 1-3,5,7 or similar.  As we are interested in the
242                  * max valid ID, we just need to find the last valid
243                  * delimiter ('-' or ',') and parse the integer immediately after that.
244                  */
245             auto startfrom = line.begin();
246 
247             for(auto i = line.begin(); i < line.end(); ++i)
248             {
249                 if(*i == '-' || *i == ',')
250                 {
251                     startfrom = i + 1;
252                 }
253             }
254 
255             line.erase(line.begin(), startfrom);
256 
257             max_cpus = support::cpp11::stoi(line, nullptr) + 1;
258             success  = true;
259         }
260     }
261 
262     // Return std::thread::hardware_concurrency() as a fallback.
263     if(!success)
264     {
265         max_cpus = std::thread::hardware_concurrency();
266     }
267     return max_cpus;
268 }
269 #elif defined(__aarch64__) && defined(__APPLE__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
270 /** Query features through sysctlbyname
271   *
272   * @return int value queried
273   */
274 int get_hw_capability(const std::string &cap)
275 {
276     int64_t result(0);
277     size_t  size = sizeof(result);
278     sysctlbyname(cap.c_str(), &result, &size, NULL, 0);
279     return result;
280 }
281 #endif                                           /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
282 
283 #if defined(BARE_METAL) && defined(__aarch64__)
get_sve_feature_reg()284 uint64_t get_sve_feature_reg()
285 {
286     uint64_t svefr0 = 0;
287     __asm __volatile(
288         ".inst 0xd5380483 // mrs x3, ID_AA64ZFR0_EL1\n"
289         "MOV  %0, X3"
290         : "=r"(svefr0)
291         :
292         : "x3");
293     return svefr0;
294 }
295 #endif /* defined(BARE_METAL) && defined(__aarch64__) */
296 } // namespace
297 
CpuInfo(CpuIsaInfo isa,std::vector<CpuModel> cpus)298 CpuInfo::CpuInfo(CpuIsaInfo isa, std::vector<CpuModel> cpus)
299     : _isa(std::move(isa)), _cpus(std::move(cpus))
300 {
301 }
302 
build()303 CpuInfo CpuInfo::build()
304 {
305 #if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__))
306     const uint32_t hwcaps   = getauxval(AT_HWCAP);
307     const uint32_t hwcaps2  = getauxval(AT_HWCAP2);
308     const uint32_t max_cpus = get_max_cpus();
309 
310     // Populate midr values
311     std::vector<uint32_t> cpus_midr;
312     if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID)
313     {
314         cpus_midr = midr_from_cpuid(max_cpus);
315     }
316     if(cpus_midr.empty())
317     {
318         cpus_midr = midr_from_proc_cpuinfo(max_cpus);
319     }
320     if(cpus_midr.empty())
321     {
322         cpus_midr.resize(max_cpus, 0);
323     }
324 
325     // Populate isa (Assume homogeneous ISA specification)
326     CpuIsaInfo isa = init_cpu_isa_from_hwcaps(hwcaps, hwcaps2, cpus_midr.back());
327 
328     // Convert midr to models
329     std::vector<CpuModel> cpus_model;
330     std::transform(std::begin(cpus_midr), std::end(cpus_midr), std::back_inserter(cpus_model),
331                    [](uint32_t midr) -> CpuModel { return midr_to_model(midr); });
332 
333     CpuInfo info(isa, cpus_model);
334     return info;
335 
336 #elif(BARE_METAL) && defined(__aarch64__)        /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
337 
338     // Assume single CPU in bare metal mode.  Just read the ID register and feature bits directly.
339     uint64_t isar0 = 0, isar1 = 0, pfr0 = 0, pfr1 = 0, svefr0 = 0, midr = 0;
340     ARM_COMPUTE_GET_FEATURE_REG(isar0, ID_AA64ISAR0_EL1);
341     ARM_COMPUTE_GET_FEATURE_REG(isar1, ID_AA64ISAR1_EL1);
342     ARM_COMPUTE_GET_FEATURE_REG(pfr0, ID_AA64PFR0_EL1);
343     ARM_COMPUTE_GET_FEATURE_REG(pfr1, ID_AA64PFR1_EL1);
344     ARM_COMPUTE_GET_FEATURE_REG(midr, MIDR_EL1);
345     if((pfr0 >> 32) & 0xf)
346     {
347         svefr0 = get_sve_feature_reg();
348     }
349 
350     CpuIsaInfo            isa = init_cpu_isa_from_regs(isar0, isar1, pfr0, pfr1, svefr0, midr);
351     std::vector<CpuModel> cpus_model(1, midr_to_model(midr));
352     CpuInfo               info(isa, cpus_model);
353     return info;
354 #elif defined(__aarch64__) && defined(__APPLE__) /* #elif(BARE_METAL) && defined(__aarch64__) */
355     int                   ncpus = get_hw_capability("hw.perflevel0.logicalcpu");
356     CpuIsaInfo            isainfo;
357     std::vector<CpuModel> cpus_model(ncpus);
358     isainfo.neon = get_hw_capability("hw.optional.neon");
359     isainfo.fp16 = get_hw_capability("hw.optional.neon_fp16");
360     isainfo.dot  = get_hw_capability("hw.optional.arm.FEAT_DotProd");
361     CpuInfo info(isainfo, cpus_model);
362     return info;
363 #else                                            /* #elif defined(__aarch64__) && defined(__APPLE__) */
364     CpuInfo info(CpuIsaInfo(), { CpuModel::GENERIC });
365     return info;
366 #endif                                           /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
367 }
368 
cpu_model(uint32_t cpuid) const369 CpuModel CpuInfo::cpu_model(uint32_t cpuid) const
370 {
371     if(cpuid < _cpus.size())
372     {
373         return _cpus[cpuid];
374     }
375     return CpuModel::GENERIC;
376 }
377 
cpu_model() const378 CpuModel CpuInfo::cpu_model() const
379 {
380 #if defined(_WIN64) || defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__))
381     return cpu_model(0);
382 #else  /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */
383     return cpu_model(sched_getcpu());
384 #endif /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */
385 }
386 
num_cpus() const387 uint32_t CpuInfo::num_cpus() const
388 {
389     return _cpus.size();
390 }
391 
num_threads_hint()392 uint32_t num_threads_hint()
393 {
394     unsigned int num_threads_hint = 1;
395 
396 #if !defined(BARE_METAL) && !defined(_WIN64)
397     std::vector<std::string> cpus;
398     cpus.reserve(64);
399 
400     // CPU part regex
401     regex_t cpu_part_rgx;
402     memset(&cpu_part_rgx, 0, sizeof(regex_t));
403     int ret_status = regcomp(&cpu_part_rgx, R"(.*CPU part.+/?\:[[:space:]]+([[:alnum:]]+).*)", REG_EXTENDED);
404     ARM_COMPUTE_UNUSED(ret_status);
405     ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed.");
406 
407     // Read cpuinfo and get occurrence of each core
408     std::ifstream cpuinfo_file("/proc/cpuinfo", std::ios::in);
409     if(cpuinfo_file.is_open())
410     {
411         std::string line;
412         while(bool(getline(cpuinfo_file, line)))
413         {
414             std::array<regmatch_t, 2> match;
415             if(regexec(&cpu_part_rgx, line.c_str(), 2, match.data(), 0) == 0)
416             {
417                 cpus.emplace_back(line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)));
418             }
419         }
420     }
421     regfree(&cpu_part_rgx);
422 
423     // Get min number of threads
424     std::sort(std::begin(cpus), std::end(cpus));
425     auto least_frequent_cpu_occurences = [](const std::vector<std::string> &cpus) -> uint32_t
426     {
427         std::unordered_map<std::string, uint32_t> cpus_freq;
428         for(const auto &cpu : cpus)
429         {
430             cpus_freq[cpu]++;
431         }
432 
433         uint32_t vmin = cpus.size() + 1;
434         for(const auto &cpu_freq : cpus_freq)
435         {
436             vmin = std::min(vmin, cpu_freq.second);
437         }
438         return vmin;
439     };
440 
441     // Set thread hint
442     num_threads_hint = cpus.empty() ? std::thread::hardware_concurrency() : least_frequent_cpu_occurences(cpus);
443 #endif /* !defined(BARE_METAL) */
444 
445     return num_threads_hint;
446 }
447 } // namespace cpuinfo
448 } // namespace arm_compute
449