1 /*
2 * Copyright (c) 2021-2023 Arm Limited.
3 *
4 * SPDX-License-Identifier: MIT
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24 #include "src/common/cpuinfo/CpuInfo.h"
25
26 #include "arm_compute/core/Error.h"
27 #include "arm_compute/core/Log.h"
28 #include "support/StringSupport.h"
29 #include "support/ToolchainSupport.h"
30
31 #include <sstream>
32
33 #if !defined(BARE_METAL)
34 #include <algorithm>
35 #include <cstring>
36 #include <fstream>
37 #if !defined(_WIN64)
38 #include <regex.h> /* C++ std::regex takes up a lot of space in the standalone builds */
39 #include <sched.h>
40 #endif /* !defined(_WIN64) */
41
42 #include <thread>
43 #include <unordered_map>
44 #endif /* !defined(BARE_METAL) */
45
46 #if !defined(_WIN64)
47 #if !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__))
48 #include <asm/hwcap.h> /* Get HWCAP bits from asm/hwcap.h */
49 #include <sys/auxv.h>
50 #elif defined(__APPLE__) && defined(__aarch64__)
51 #include <sys/sysctl.h>
52 #include <sys/types.h>
53 #endif /* defined(__APPLE__) && defined(__aarch64__)) */
54 #endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
55
56 #define ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID (1 << 11)
57 #define ARM_COMPUTE_GET_FEATURE_REG(var, freg) __asm __volatile("MRS %0, " #freg \
58 : "=r"(var))
59 namespace arm_compute
60 {
61 namespace cpuinfo
62 {
63 namespace
64 {
65 #if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__))
66 /** Extract MIDR using CPUID information that are exposed to user-space
67 *
68 * @param[in] max_num_cpus Maximum number of possible CPUs
69 *
70 * @return std::vector<uint32_t> A list of the MIDR of each core
71 */
midr_from_cpuid(uint32_t max_num_cpus)72 std::vector<uint32_t> midr_from_cpuid(uint32_t max_num_cpus)
73 {
74 std::vector<uint32_t> cpus;
75 for(unsigned int i = 0; i < max_num_cpus; ++i)
76 {
77 std::stringstream str;
78 str << "/sys/devices/system/cpu/cpu" << i << "/regs/identification/midr_el1";
79 std::ifstream file(str.str(), std::ios::in);
80 if(file.is_open())
81 {
82 std::string line;
83 if(bool(getline(file, line)))
84 {
85 cpus.emplace_back(support::cpp11::stoul(line, nullptr, support::cpp11::NumericBase::BASE_16));
86 }
87 }
88 }
89 return cpus;
90 }
91
92 /** Extract MIDR by parsing the /proc/cpuinfo meta-data
93 *
94 * @param[in] max_num_cpus Maximum number of possible CPUs
95 *
96 * @return std::vector<uint32_t> A list of the MIDR of each core
97 */
midr_from_proc_cpuinfo(int max_num_cpus)98 std::vector<uint32_t> midr_from_proc_cpuinfo(int max_num_cpus)
99 {
100 std::vector<uint32_t> cpus;
101
102 regex_t proc_regex;
103 regex_t imp_regex;
104 regex_t var_regex;
105 regex_t part_regex;
106 regex_t rev_regex;
107
108 memset(&proc_regex, 0, sizeof(regex_t));
109 memset(&imp_regex, 0, sizeof(regex_t));
110 memset(&var_regex, 0, sizeof(regex_t));
111 memset(&part_regex, 0, sizeof(regex_t));
112 memset(&rev_regex, 0, sizeof(regex_t));
113
114 int ret_status = 0;
115 // If "long-form" cpuinfo is present, parse that to populate models.
116 ret_status |= regcomp(&proc_regex, R"(^processor.*([[:digit:]]+)$)", REG_EXTENDED);
117 ret_status |= regcomp(&imp_regex, R"(^CPU implementer.*0x(..)$)", REG_EXTENDED);
118 ret_status |= regcomp(&var_regex, R"(^CPU variant.*0x(.)$)", REG_EXTENDED);
119 ret_status |= regcomp(&part_regex, R"(^CPU part.*0x(...)$)", REG_EXTENDED);
120 ret_status |= regcomp(&rev_regex, R"(^CPU revision.*([[:digit:]]+)$)", REG_EXTENDED);
121 ARM_COMPUTE_UNUSED(ret_status);
122 ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed.");
123
124 std::ifstream file("/proc/cpuinfo", std::ios::in);
125 if(file.is_open())
126 {
127 std::string line;
128 int midr = 0;
129 int curcpu = -1;
130
131 while(bool(getline(file, line)))
132 {
133 std::array<regmatch_t, 2> match;
134 ret_status = regexec(&proc_regex, line.c_str(), 2, match.data(), 0);
135 if(ret_status == 0)
136 {
137 std::string id = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
138 int newcpu = support::cpp11::stoi(id, nullptr);
139
140 if(curcpu >= 0 && midr == 0)
141 {
142 // Matched a new CPU ID without any description of the previous one - looks like old format.
143 return {};
144 }
145
146 if(curcpu >= 0 && curcpu < max_num_cpus)
147 {
148 cpus.emplace_back(midr);
149 }
150 else
151 {
152 ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!");
153 }
154
155 midr = 0;
156 curcpu = newcpu;
157
158 continue;
159 }
160
161 ret_status = regexec(&imp_regex, line.c_str(), 2, match.data(), 0);
162 if(ret_status == 0)
163 {
164 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
165 int impv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
166 midr |= (impv << 24);
167
168 continue;
169 }
170
171 ret_status = regexec(&var_regex, line.c_str(), 2, match.data(), 0);
172 if(ret_status == 0)
173 {
174 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
175 int varv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
176 midr |= (varv << 20);
177
178 continue;
179 }
180
181 ret_status = regexec(&part_regex, line.c_str(), 2, match.data(), 0);
182 if(ret_status == 0)
183 {
184 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
185 int partv = support::cpp11::stoi(subexp, nullptr, support::cpp11::NumericBase::BASE_16);
186 midr |= (partv << 4);
187
188 continue;
189 }
190
191 ret_status = regexec(&rev_regex, line.c_str(), 2, match.data(), 0);
192 if(ret_status == 0)
193 {
194 std::string subexp = line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so));
195 int regv = support::cpp11::stoi(subexp, nullptr);
196 midr |= (regv);
197 midr |= (0xf << 16);
198
199 continue;
200 }
201 }
202
203 if(curcpu >= 0 && curcpu < max_num_cpus)
204 {
205 cpus.emplace_back(midr);
206 }
207 else
208 {
209 ARM_COMPUTE_LOG_INFO_MSG_CORE("Trying to populate a core id with id greater than the expected number of cores!");
210 }
211 }
212
213 // Free allocated memory
214 regfree(&proc_regex);
215 regfree(&imp_regex);
216 regfree(&var_regex);
217 regfree(&part_regex);
218 regfree(&rev_regex);
219
220 return cpus;
221 }
222
223 /** Get the maximim number of CPUs in the system by parsing /sys/devices/system/cpu/present
224 *
225 * @return int Maximum number of CPUs
226 */
get_max_cpus()227 int get_max_cpus()
228 {
229 int max_cpus = 1;
230 std::ifstream CPUspresent;
231 CPUspresent.open("/sys/devices/system/cpu/present", std::ios::in);
232 bool success = false;
233
234 if(CPUspresent.is_open())
235 {
236 std::string line;
237
238 if(bool(getline(CPUspresent, line)))
239 {
240 /* The content of this file is a list of ranges or single values, e.g.
241 * 0-5, or 1-3,5,7 or similar. As we are interested in the
242 * max valid ID, we just need to find the last valid
243 * delimiter ('-' or ',') and parse the integer immediately after that.
244 */
245 auto startfrom = line.begin();
246
247 for(auto i = line.begin(); i < line.end(); ++i)
248 {
249 if(*i == '-' || *i == ',')
250 {
251 startfrom = i + 1;
252 }
253 }
254
255 line.erase(line.begin(), startfrom);
256
257 max_cpus = support::cpp11::stoi(line, nullptr) + 1;
258 success = true;
259 }
260 }
261
262 // Return std::thread::hardware_concurrency() as a fallback.
263 if(!success)
264 {
265 max_cpus = std::thread::hardware_concurrency();
266 }
267 return max_cpus;
268 }
269 #elif defined(__aarch64__) && defined(__APPLE__) /* !defined(BARE_METAL) && !defined(__APPLE__) && (defined(__arm__) || defined(__aarch64__)) */
270 /** Query features through sysctlbyname
271 *
272 * @return int value queried
273 */
274 int get_hw_capability(const std::string &cap)
275 {
276 int64_t result(0);
277 size_t size = sizeof(result);
278 sysctlbyname(cap.c_str(), &result, &size, NULL, 0);
279 return result;
280 }
281 #endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
282
283 #if defined(BARE_METAL) && defined(__aarch64__)
get_sve_feature_reg()284 uint64_t get_sve_feature_reg()
285 {
286 uint64_t svefr0 = 0;
287 __asm __volatile(
288 ".inst 0xd5380483 // mrs x3, ID_AA64ZFR0_EL1\n"
289 "MOV %0, X3"
290 : "=r"(svefr0)
291 :
292 : "x3");
293 return svefr0;
294 }
295 #endif /* defined(BARE_METAL) && defined(__aarch64__) */
296 } // namespace
297
CpuInfo(CpuIsaInfo isa,std::vector<CpuModel> cpus)298 CpuInfo::CpuInfo(CpuIsaInfo isa, std::vector<CpuModel> cpus)
299 : _isa(std::move(isa)), _cpus(std::move(cpus))
300 {
301 }
302
build()303 CpuInfo CpuInfo::build()
304 {
305 #if !defined(_WIN64) && !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__))
306 const uint32_t hwcaps = getauxval(AT_HWCAP);
307 const uint32_t hwcaps2 = getauxval(AT_HWCAP2);
308 const uint32_t max_cpus = get_max_cpus();
309
310 // Populate midr values
311 std::vector<uint32_t> cpus_midr;
312 if(hwcaps & ARM_COMPUTE_CPU_FEATURE_HWCAP_CPUID)
313 {
314 cpus_midr = midr_from_cpuid(max_cpus);
315 }
316 if(cpus_midr.empty())
317 {
318 cpus_midr = midr_from_proc_cpuinfo(max_cpus);
319 }
320 if(cpus_midr.empty())
321 {
322 cpus_midr.resize(max_cpus, 0);
323 }
324
325 // Populate isa (Assume homogeneous ISA specification)
326 CpuIsaInfo isa = init_cpu_isa_from_hwcaps(hwcaps, hwcaps2, cpus_midr.back());
327
328 // Convert midr to models
329 std::vector<CpuModel> cpus_model;
330 std::transform(std::begin(cpus_midr), std::end(cpus_midr), std::back_inserter(cpus_model),
331 [](uint32_t midr) -> CpuModel { return midr_to_model(midr); });
332
333 CpuInfo info(isa, cpus_model);
334 return info;
335
336 #elif(BARE_METAL) && defined(__aarch64__) /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
337
338 // Assume single CPU in bare metal mode. Just read the ID register and feature bits directly.
339 uint64_t isar0 = 0, isar1 = 0, pfr0 = 0, pfr1 = 0, svefr0 = 0, midr = 0;
340 ARM_COMPUTE_GET_FEATURE_REG(isar0, ID_AA64ISAR0_EL1);
341 ARM_COMPUTE_GET_FEATURE_REG(isar1, ID_AA64ISAR1_EL1);
342 ARM_COMPUTE_GET_FEATURE_REG(pfr0, ID_AA64PFR0_EL1);
343 ARM_COMPUTE_GET_FEATURE_REG(pfr1, ID_AA64PFR1_EL1);
344 ARM_COMPUTE_GET_FEATURE_REG(midr, MIDR_EL1);
345 if((pfr0 >> 32) & 0xf)
346 {
347 svefr0 = get_sve_feature_reg();
348 }
349
350 CpuIsaInfo isa = init_cpu_isa_from_regs(isar0, isar1, pfr0, pfr1, svefr0, midr);
351 std::vector<CpuModel> cpus_model(1, midr_to_model(midr));
352 CpuInfo info(isa, cpus_model);
353 return info;
354 #elif defined(__aarch64__) && defined(__APPLE__) /* #elif(BARE_METAL) && defined(__aarch64__) */
355 int ncpus = get_hw_capability("hw.perflevel0.logicalcpu");
356 CpuIsaInfo isainfo;
357 std::vector<CpuModel> cpus_model(ncpus);
358 isainfo.neon = get_hw_capability("hw.optional.neon");
359 isainfo.fp16 = get_hw_capability("hw.optional.neon_fp16");
360 isainfo.dot = get_hw_capability("hw.optional.arm.FEAT_DotProd");
361 CpuInfo info(isainfo, cpus_model);
362 return info;
363 #else /* #elif defined(__aarch64__) && defined(__APPLE__) */
364 CpuInfo info(CpuIsaInfo(), { CpuModel::GENERIC });
365 return info;
366 #endif /* !defined(BARE_METAL) && !defined(__APPLE__) && !defined(__OpenBSD__) && (defined(__arm__) || defined(__aarch64__)) */
367 }
368
cpu_model(uint32_t cpuid) const369 CpuModel CpuInfo::cpu_model(uint32_t cpuid) const
370 {
371 if(cpuid < _cpus.size())
372 {
373 return _cpus[cpuid];
374 }
375 return CpuModel::GENERIC;
376 }
377
cpu_model() const378 CpuModel CpuInfo::cpu_model() const
379 {
380 #if defined(_WIN64) || defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__))
381 return cpu_model(0);
382 #else /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */
383 return cpu_model(sched_getcpu());
384 #endif /* defined(BARE_METAL) || defined(__APPLE__) || defined(__OpenBSD__) || (!defined(__arm__) && !defined(__aarch64__)) */
385 }
386
num_cpus() const387 uint32_t CpuInfo::num_cpus() const
388 {
389 return _cpus.size();
390 }
391
num_threads_hint()392 uint32_t num_threads_hint()
393 {
394 unsigned int num_threads_hint = 1;
395
396 #if !defined(BARE_METAL) && !defined(_WIN64)
397 std::vector<std::string> cpus;
398 cpus.reserve(64);
399
400 // CPU part regex
401 regex_t cpu_part_rgx;
402 memset(&cpu_part_rgx, 0, sizeof(regex_t));
403 int ret_status = regcomp(&cpu_part_rgx, R"(.*CPU part.+/?\:[[:space:]]+([[:alnum:]]+).*)", REG_EXTENDED);
404 ARM_COMPUTE_UNUSED(ret_status);
405 ARM_COMPUTE_ERROR_ON_MSG(ret_status != 0, "Regex compilation failed.");
406
407 // Read cpuinfo and get occurrence of each core
408 std::ifstream cpuinfo_file("/proc/cpuinfo", std::ios::in);
409 if(cpuinfo_file.is_open())
410 {
411 std::string line;
412 while(bool(getline(cpuinfo_file, line)))
413 {
414 std::array<regmatch_t, 2> match;
415 if(regexec(&cpu_part_rgx, line.c_str(), 2, match.data(), 0) == 0)
416 {
417 cpus.emplace_back(line.substr(match[1].rm_so, (match[1].rm_eo - match[1].rm_so)));
418 }
419 }
420 }
421 regfree(&cpu_part_rgx);
422
423 // Get min number of threads
424 std::sort(std::begin(cpus), std::end(cpus));
425 auto least_frequent_cpu_occurences = [](const std::vector<std::string> &cpus) -> uint32_t
426 {
427 std::unordered_map<std::string, uint32_t> cpus_freq;
428 for(const auto &cpu : cpus)
429 {
430 cpus_freq[cpu]++;
431 }
432
433 uint32_t vmin = cpus.size() + 1;
434 for(const auto &cpu_freq : cpus_freq)
435 {
436 vmin = std::min(vmin, cpu_freq.second);
437 }
438 return vmin;
439 };
440
441 // Set thread hint
442 num_threads_hint = cpus.empty() ? std::thread::hardware_concurrency() : least_frequent_cpu_occurences(cpus);
443 #endif /* !defined(BARE_METAL) */
444
445 return num_threads_hint;
446 }
447 } // namespace cpuinfo
448 } // namespace arm_compute
449