• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2015 Google Inc. All rights reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "internal_macros.h"
16 
17 #ifdef BENCHMARK_OS_WINDOWS
18 #include <Shlwapi.h>
19 #include <VersionHelpers.h>
20 #include <Windows.h>
21 #else
22 #include <fcntl.h>
23 #include <sys/resource.h>
24 #include <sys/time.h>
25 #include <sys/types.h>  // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
26 #include <unistd.h>
27 #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \
28     defined BENCHMARK_OS_NETBSD
29 #define BENCHMARK_HAS_SYSCTL
30 #include <sys/sysctl.h>
31 #endif
32 #endif
33 
34 #include <algorithm>
35 #include <array>
36 #include <bitset>
37 #include <cerrno>
38 #include <climits>
39 #include <cstdint>
40 #include <cstdio>
41 #include <cstdlib>
42 #include <cstring>
43 #include <fstream>
44 #include <iostream>
45 #include <iterator>
46 #include <limits>
47 #include <memory>
48 #include <sstream>
49 
50 #include "check.h"
51 #include "cycleclock.h"
52 #include "internal_macros.h"
53 #include "log.h"
54 #include "sleep.h"
55 #include "string_util.h"
56 
57 namespace benchmark {
58 namespace {
59 
PrintImp(std::ostream & out)60 void PrintImp(std::ostream& out) { out << std::endl; }
61 
62 template <class First, class... Rest>
PrintImp(std::ostream & out,First && f,Rest &&...rest)63 void PrintImp(std::ostream& out, First&& f, Rest&&... rest) {
64   out << std::forward<First>(f);
65   PrintImp(out, std::forward<Rest>(rest)...);
66 }
67 
68 template <class... Args>
PrintErrorAndDie(Args &&...args)69 BENCHMARK_NORETURN void PrintErrorAndDie(Args&&... args) {
70   PrintImp(std::cerr, std::forward<Args>(args)...);
71   std::exit(EXIT_FAILURE);
72 }
73 
74 #ifdef BENCHMARK_HAS_SYSCTL
75 
76 /// ValueUnion - A type used to correctly alias the byte-for-byte output of
77 /// `sysctl` with the result type it's to be interpreted as.
78 struct ValueUnion {
79   union DataT {
80     uint32_t uint32_value;
81     uint64_t uint64_value;
82     // For correct aliasing of union members from bytes.
83     char bytes[8];
84   };
85   using DataPtr = std::unique_ptr<DataT, decltype(&std::free)>;
86 
87   // The size of the data union member + its trailing array size.
88   size_t Size;
89   DataPtr Buff;
90 
91  public:
ValueUnionbenchmark::__anond2d052880111::ValueUnion92   ValueUnion() : Size(0), Buff(nullptr, &std::free) {}
93 
ValueUnionbenchmark::__anond2d052880111::ValueUnion94   explicit ValueUnion(size_t BuffSize)
95       : Size(sizeof(DataT) + BuffSize),
96         Buff(::new (std::malloc(Size)) DataT(), &std::free) {}
97 
98   ValueUnion(ValueUnion&& other) = default;
99 
operator boolbenchmark::__anond2d052880111::ValueUnion100   explicit operator bool() const { return bool(Buff); }
101 
databenchmark::__anond2d052880111::ValueUnion102   char* data() const { return Buff->bytes; }
103 
GetAsStringbenchmark::__anond2d052880111::ValueUnion104   std::string GetAsString() const { return std::string(data()); }
105 
GetAsIntegerbenchmark::__anond2d052880111::ValueUnion106   int64_t GetAsInteger() const {
107     if (Size == sizeof(Buff->uint32_value))
108       return static_cast<int32_t>(Buff->uint32_value);
109     else if (Size == sizeof(Buff->uint64_value))
110       return static_cast<int64_t>(Buff->uint64_value);
111     BENCHMARK_UNREACHABLE();
112   }
113 
GetAsUnsignedbenchmark::__anond2d052880111::ValueUnion114   uint64_t GetAsUnsigned() const {
115     if (Size == sizeof(Buff->uint32_value))
116       return Buff->uint32_value;
117     else if (Size == sizeof(Buff->uint64_value))
118       return Buff->uint64_value;
119     BENCHMARK_UNREACHABLE();
120   }
121 
122   template <class T, int N>
GetAsArraybenchmark::__anond2d052880111::ValueUnion123   std::array<T, N> GetAsArray() {
124     const int ArrSize = sizeof(T) * N;
125     CHECK_LE(ArrSize, Size);
126     std::array<T, N> Arr;
127     std::memcpy(Arr.data(), data(), ArrSize);
128     return Arr;
129   }
130 };
131 
GetSysctlImp(std::string const & Name)132 ValueUnion GetSysctlImp(std::string const& Name) {
133   size_t CurBuffSize = 0;
134   if (sysctlbyname(Name.c_str(), nullptr, &CurBuffSize, nullptr, 0) == -1)
135     return ValueUnion();
136 
137   ValueUnion buff(CurBuffSize);
138   if (sysctlbyname(Name.c_str(), buff.data(), &buff.Size, nullptr, 0) == 0)
139     return buff;
140   return ValueUnion();
141 }
142 
143 BENCHMARK_MAYBE_UNUSED
GetSysctl(std::string const & Name,std::string * Out)144 bool GetSysctl(std::string const& Name, std::string* Out) {
145   Out->clear();
146   auto Buff = GetSysctlImp(Name);
147   if (!Buff) return false;
148   Out->assign(Buff.data());
149   return true;
150 }
151 
152 template <class Tp,
153           class = typename std::enable_if<std::is_integral<Tp>::value>::type>
GetSysctl(std::string const & Name,Tp * Out)154 bool GetSysctl(std::string const& Name, Tp* Out) {
155   *Out = 0;
156   auto Buff = GetSysctlImp(Name);
157   if (!Buff) return false;
158   *Out = static_cast<Tp>(Buff.GetAsUnsigned());
159   return true;
160 }
161 
162 template <class Tp, size_t N>
GetSysctl(std::string const & Name,std::array<Tp,N> * Out)163 bool GetSysctl(std::string const& Name, std::array<Tp, N>* Out) {
164   auto Buff = GetSysctlImp(Name);
165   if (!Buff) return false;
166   *Out = Buff.GetAsArray<Tp, N>();
167   return true;
168 }
169 #endif
170 
171 template <class ArgT>
ReadFromFile(std::string const & fname,ArgT * arg)172 bool ReadFromFile(std::string const& fname, ArgT* arg) {
173   *arg = ArgT();
174   std::ifstream f(fname.c_str());
175   if (!f.is_open()) return false;
176   f >> *arg;
177   return f.good();
178 }
179 
CpuScalingEnabled(int num_cpus)180 bool CpuScalingEnabled(int num_cpus) {
181   // We don't have a valid CPU count, so don't even bother.
182   if (num_cpus <= 0) return false;
183 #ifndef BENCHMARK_OS_WINDOWS
184   // On Linux, the CPUfreq subsystem exposes CPU information as files on the
185   // local file system. If reading the exported files fails, then we may not be
186   // running on Linux, so we silently ignore all the read errors.
187   std::string res;
188   for (int cpu = 0; cpu < num_cpus; ++cpu) {
189     std::string governor_file =
190         StrCat("/sys/devices/system/cpu/cpu", cpu, "/cpufreq/scaling_governor");
191     if (ReadFromFile(governor_file, &res) && res != "performance") return true;
192   }
193 #endif
194   return false;
195 }
196 
CountSetBitsInCPUMap(std::string Val)197 int CountSetBitsInCPUMap(std::string Val) {
198   auto CountBits = [](std::string Part) {
199     using CPUMask = std::bitset<sizeof(std::uintptr_t) * CHAR_BIT>;
200     Part = "0x" + Part;
201     CPUMask Mask(std::stoul(Part, nullptr, 16));
202     return static_cast<int>(Mask.count());
203   };
204   size_t Pos;
205   int total = 0;
206   while ((Pos = Val.find(',')) != std::string::npos) {
207     total += CountBits(Val.substr(0, Pos));
208     Val = Val.substr(Pos + 1);
209   }
210   if (!Val.empty()) {
211     total += CountBits(Val);
212   }
213   return total;
214 }
215 
216 BENCHMARK_MAYBE_UNUSED
GetCacheSizesFromKVFS()217 std::vector<CPUInfo::CacheInfo> GetCacheSizesFromKVFS() {
218   std::vector<CPUInfo::CacheInfo> res;
219   std::string dir = "/sys/devices/system/cpu/cpu0/cache/";
220   int Idx = 0;
221   while (true) {
222     CPUInfo::CacheInfo info;
223     std::string FPath = StrCat(dir, "index", Idx++, "/");
224     std::ifstream f(StrCat(FPath, "size").c_str());
225     if (!f.is_open()) break;
226     std::string suffix;
227     f >> info.size;
228     if (f.fail())
229       PrintErrorAndDie("Failed while reading file '", FPath, "size'");
230     if (f.good()) {
231       f >> suffix;
232       if (f.bad())
233         PrintErrorAndDie(
234             "Invalid cache size format: failed to read size suffix");
235       else if (f && suffix != "K")
236         PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix);
237       else if (suffix == "K")
238         info.size *= 1000;
239     }
240     if (!ReadFromFile(StrCat(FPath, "type"), &info.type))
241       PrintErrorAndDie("Failed to read from file ", FPath, "type");
242     if (!ReadFromFile(StrCat(FPath, "level"), &info.level))
243       PrintErrorAndDie("Failed to read from file ", FPath, "level");
244     std::string map_str;
245     if (!ReadFromFile(StrCat(FPath, "shared_cpu_map"), &map_str))
246       PrintErrorAndDie("Failed to read from file ", FPath, "shared_cpu_map");
247     info.num_sharing = CountSetBitsInCPUMap(map_str);
248     res.push_back(info);
249   }
250 
251   return res;
252 }
253 
254 #ifdef BENCHMARK_OS_MACOSX
GetCacheSizesMacOSX()255 std::vector<CPUInfo::CacheInfo> GetCacheSizesMacOSX() {
256   std::vector<CPUInfo::CacheInfo> res;
257   std::array<uint64_t, 4> CacheCounts{{0, 0, 0, 0}};
258   GetSysctl("hw.cacheconfig", &CacheCounts);
259 
260   struct {
261     std::string name;
262     std::string type;
263     int level;
264     size_t num_sharing;
265   } Cases[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts[1]},
266                {"hw.l1icachesize", "Instruction", 1, CacheCounts[1]},
267                {"hw.l2cachesize", "Unified", 2, CacheCounts[2]},
268                {"hw.l3cachesize", "Unified", 3, CacheCounts[3]}};
269   for (auto& C : Cases) {
270     int val;
271     if (!GetSysctl(C.name, &val)) continue;
272     CPUInfo::CacheInfo info;
273     info.type = C.type;
274     info.level = C.level;
275     info.size = val;
276     info.num_sharing = static_cast<int>(C.num_sharing);
277     res.push_back(std::move(info));
278   }
279   return res;
280 }
281 #elif defined(BENCHMARK_OS_WINDOWS)
GetCacheSizesWindows()282 std::vector<CPUInfo::CacheInfo> GetCacheSizesWindows() {
283   std::vector<CPUInfo::CacheInfo> res;
284   DWORD buffer_size = 0;
285   using PInfo = SYSTEM_LOGICAL_PROCESSOR_INFORMATION;
286   using CInfo = CACHE_DESCRIPTOR;
287 
288   using UPtr = std::unique_ptr<PInfo, decltype(&std::free)>;
289   GetLogicalProcessorInformation(nullptr, &buffer_size);
290   UPtr buff((PInfo*)malloc(buffer_size), &std::free);
291   if (!GetLogicalProcessorInformation(buff.get(), &buffer_size))
292     PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ",
293                      GetLastError());
294 
295   PInfo* it = buff.get();
296   PInfo* end = buff.get() + (buffer_size / sizeof(PInfo));
297 
298   for (; it != end; ++it) {
299     if (it->Relationship != RelationCache) continue;
300     using BitSet = std::bitset<sizeof(ULONG_PTR) * CHAR_BIT>;
301     BitSet B(it->ProcessorMask);
302     // To prevent duplicates, only consider caches where CPU 0 is specified
303     if (!B.test(0)) continue;
304     CInfo* Cache = &it->Cache;
305     CPUInfo::CacheInfo C;
306     C.num_sharing = B.count();
307     C.level = Cache->Level;
308     C.size = Cache->Size;
309     switch (Cache->Type) {
310       case CacheUnified:
311         C.type = "Unified";
312         break;
313       case CacheInstruction:
314         C.type = "Instruction";
315         break;
316       case CacheData:
317         C.type = "Data";
318         break;
319       case CacheTrace:
320         C.type = "Trace";
321         break;
322       default:
323         C.type = "Unknown";
324         break;
325     }
326     res.push_back(C);
327   }
328   return res;
329 }
330 #endif
331 
GetCacheSizes()332 std::vector<CPUInfo::CacheInfo> GetCacheSizes() {
333 #ifdef BENCHMARK_OS_MACOSX
334   return GetCacheSizesMacOSX();
335 #elif defined(BENCHMARK_OS_WINDOWS)
336   return GetCacheSizesWindows();
337 #else
338   return GetCacheSizesFromKVFS();
339 #endif
340 }
341 
GetNumCPUs()342 int GetNumCPUs() {
343 #ifdef BENCHMARK_HAS_SYSCTL
344   int NumCPU = -1;
345   if (GetSysctl("hw.ncpu", &NumCPU)) return NumCPU;
346   fprintf(stderr, "Err: %s\n", strerror(errno));
347   std::exit(EXIT_FAILURE);
348 #elif defined(BENCHMARK_OS_WINDOWS)
349   SYSTEM_INFO sysinfo;
350   // Use memset as opposed to = {} to avoid GCC missing initializer false
351   // positives.
352   std::memset(&sysinfo, 0, sizeof(SYSTEM_INFO));
353   GetSystemInfo(&sysinfo);
354   return sysinfo.dwNumberOfProcessors;  // number of logical
355                                         // processors in the current
356                                         // group
357 #else
358   int NumCPUs = 0;
359   int MaxID = -1;
360   std::ifstream f("/proc/cpuinfo");
361   if (!f.is_open()) {
362     std::cerr << "failed to open /proc/cpuinfo\n";
363     return -1;
364   }
365   const std::string Key = "processor";
366   std::string ln;
367   while (std::getline(f, ln)) {
368     if (ln.empty()) continue;
369     size_t SplitIdx = ln.find(':');
370     std::string value;
371     if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
372     if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) {
373       NumCPUs++;
374       if (!value.empty()) {
375         int CurID = std::stoi(value);
376         MaxID = std::max(CurID, MaxID);
377       }
378     }
379   }
380   if (f.bad()) {
381     std::cerr << "Failure reading /proc/cpuinfo\n";
382     return -1;
383   }
384   if (!f.eof()) {
385     std::cerr << "Failed to read to end of /proc/cpuinfo\n";
386     return -1;
387   }
388   f.close();
389 
390   if ((MaxID + 1) != NumCPUs) {
391     fprintf(stderr,
392             "CPU ID assignments in /proc/cpuinfo seem messed up."
393             " This is usually caused by a bad BIOS.\n");
394   }
395   return NumCPUs;
396 #endif
397   BENCHMARK_UNREACHABLE();
398 }
399 
GetCPUCyclesPerSecond()400 double GetCPUCyclesPerSecond() {
401 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
402   long freq;
403 
404   // If the kernel is exporting the tsc frequency use that. There are issues
405   // where cpuinfo_max_freq cannot be relied on because the BIOS may be
406   // exporintg an invalid p-state (on x86) or p-states may be used to put the
407   // processor in a new mode (turbo mode). Essentially, those frequencies
408   // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
409   // well.
410   if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq)
411       // If CPU scaling is in effect, we want to use the *maximum* frequency,
412       // not whatever CPU speed some random processor happens to be using now.
413       || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
414                       &freq)) {
415     // The value is in kHz (as the file name suggests).  For example, on a
416     // 2GHz warpstation, the file contains the value "2000000".
417     return freq * 1000.0;
418   }
419 
420   const double error_value = -1;
421   double bogo_clock = error_value;
422 
423   std::ifstream f("/proc/cpuinfo");
424   if (!f.is_open()) {
425     std::cerr << "failed to open /proc/cpuinfo\n";
426     return error_value;
427   }
428 
429   auto startsWithKey = [](std::string const& Value, std::string const& Key) {
430     if (Key.size() > Value.size()) return false;
431     auto Cmp = [&](char X, char Y) {
432       return std::tolower(X) == std::tolower(Y);
433     };
434     return std::equal(Key.begin(), Key.end(), Value.begin(), Cmp);
435   };
436 
437   std::string ln;
438   while (std::getline(f, ln)) {
439     if (ln.empty()) continue;
440     size_t SplitIdx = ln.find(':');
441     std::string value;
442     if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
443     // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
444     // accept postive values. Some environments (virtual machines) report zero,
445     // which would cause infinite looping in WallTime_Init.
446     if (startsWithKey(ln, "cpu MHz")) {
447       if (!value.empty()) {
448         double cycles_per_second = std::stod(value) * 1000000.0;
449         if (cycles_per_second > 0) return cycles_per_second;
450       }
451     } else if (startsWithKey(ln, "bogomips")) {
452       if (!value.empty()) {
453         bogo_clock = std::stod(value) * 1000000.0;
454         if (bogo_clock < 0.0) bogo_clock = error_value;
455       }
456     }
457   }
458   if (f.bad()) {
459     std::cerr << "Failure reading /proc/cpuinfo\n";
460     return error_value;
461   }
462   if (!f.eof()) {
463     std::cerr << "Failed to read to end of /proc/cpuinfo\n";
464     return error_value;
465   }
466   f.close();
467   // If we found the bogomips clock, but nothing better, we'll use it (but
468   // we're not happy about it); otherwise, fallback to the rough estimation
469   // below.
470   if (bogo_clock >= 0.0) return bogo_clock;
471 
472 #elif defined BENCHMARK_HAS_SYSCTL
473   constexpr auto* FreqStr =
474 #if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD)
475       "machdep.tsc_freq";
476 #else
477       "hw.cpufrequency";
478 #endif
479   unsigned long long hz = 0;
480   if (GetSysctl(FreqStr, &hz)) return hz;
481 
482   fprintf(stderr, "Unable to determine clock rate from sysctl: %s: %s\n",
483           FreqStr, strerror(errno));
484 
485 #elif defined BENCHMARK_OS_WINDOWS
486   // In NT, read MHz from the registry. If we fail to do so or we're in win9x
487   // then make a crude estimate.
488   DWORD data, data_size = sizeof(data);
489   if (IsWindowsXPOrGreater() &&
490       SUCCEEDED(
491           SHGetValueA(HKEY_LOCAL_MACHINE,
492                       "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
493                       "~MHz", nullptr, &data, &data_size)))
494     return static_cast<double>((int64_t)data *
495                                (int64_t)(1000 * 1000));  // was mhz
496 #endif
497   // If we've fallen through, attempt to roughly estimate the CPU clock rate.
498   const int estimate_time_ms = 1000;
499   const auto start_ticks = cycleclock::Now();
500   SleepForMilliseconds(estimate_time_ms);
501   return static_cast<double>(cycleclock::Now() - start_ticks);
502 }
503 
504 }  // end namespace
505 
Get()506 const CPUInfo& CPUInfo::Get() {
507   static const CPUInfo* info = new CPUInfo();
508   return *info;
509 }
510 
CPUInfo()511 CPUInfo::CPUInfo()
512     : num_cpus(GetNumCPUs()),
513       cycles_per_second(GetCPUCyclesPerSecond()),
514       caches(GetCacheSizes()),
515       scaling_enabled(CpuScalingEnabled(num_cpus)) {}
516 
517 }  // end namespace benchmark
518