1 #include "ruy/cpuinfo.h"
2
3 #include <algorithm>
4 #include <cstdint>
5 #include <limits>
6
7 #include "ruy/check_macros.h"
8 #include "ruy/cpu_cache_params.h"
9 #include "ruy/platform.h"
10
11 #ifdef RUY_HAVE_CPUINFO
12 #include <cpuinfo.h>
13 #endif
14
15 namespace ruy {
16
17 namespace {
MakeDummyCacheParams(CpuCacheParams * result)18 void MakeDummyCacheParams(CpuCacheParams* result) {
19 // Reasonable dummy values
20 result->local_cache_size = 32 * 1024;
21 result->last_level_cache_size = 512 * 1024;
22 }
23 } // end namespace
24
25 #ifdef RUY_HAVE_CPUINFO
26
~CpuInfo()27 CpuInfo::~CpuInfo() {
28 if (init_status_ == InitStatus::kInitialized) {
29 cpuinfo_deinitialize();
30 }
31 }
32
EnsureInitialized()33 bool CpuInfo::EnsureInitialized() {
34 if (init_status_ == InitStatus::kNotYetAttempted) {
35 init_status_ = Initialize();
36 RUY_DCHECK_NE(init_status_, InitStatus::kNotYetAttempted);
37 }
38 return init_status_ == InitStatus::kInitialized;
39 }
40
41 namespace {
QueryCacheParams(CpuCacheParams * cache_params)42 void QueryCacheParams(CpuCacheParams* cache_params) {
43 const int processors_count = cpuinfo_get_processors_count();
44 RUY_DCHECK_GT(processors_count, 0);
45 int overall_local_cache_size = std::numeric_limits<int>::max();
46 int overall_last_level_cache_size = std::numeric_limits<int>::max();
47 for (int i = 0; i < processors_count; i++) {
48 int local_cache_size = 0;
49 int last_level_cache_size = 0;
50 const cpuinfo_processor* processor = cpuinfo_get_processor(i);
51 // Loop over cache levels. Ignoring L4 for now: it seems that in CPUs that
52 // have L4, we would still prefer to stay in lower-latency L3.
53 for (const cpuinfo_cache* cache :
54 {processor->cache.l1d, processor->cache.l2, processor->cache.l3}) {
55 if (!cache) {
56 continue; // continue, not break, it is possible to have L1+L3 but no
57 // L2.
58 }
59 const bool is_local =
60 cpuinfo_get_processor(cache->processor_start)->core ==
61 cpuinfo_get_processor(cache->processor_start +
62 cache->processor_count - 1)
63 ->core;
64 if (is_local) {
65 local_cache_size = cache->size;
66 }
67 last_level_cache_size = cache->size;
68 }
69 // If no local cache was found, use the last-level cache.
70 if (!local_cache_size) {
71 local_cache_size = last_level_cache_size;
72 }
73 RUY_DCHECK_GT(local_cache_size, 0);
74 RUY_DCHECK_GT(last_level_cache_size, 0);
75 RUY_DCHECK_GE(last_level_cache_size, local_cache_size);
76 overall_local_cache_size =
77 std::min(overall_local_cache_size, local_cache_size);
78 overall_last_level_cache_size =
79 std::min(overall_last_level_cache_size, last_level_cache_size);
80 }
81 cache_params->local_cache_size = overall_local_cache_size;
82 cache_params->last_level_cache_size = overall_last_level_cache_size;
83 }
84 } // end namespace
85
Initialize()86 CpuInfo::InitStatus CpuInfo::Initialize() {
87 RUY_DCHECK_EQ(init_status_, InitStatus::kNotYetAttempted);
88 if (!cpuinfo_initialize()) {
89 MakeDummyCacheParams(&cache_params_);
90 return InitStatus::kFailed;
91 }
92 QueryCacheParams(&cache_params_);
93 return InitStatus::kInitialized;
94 }
95
NeonDotprod()96 bool CpuInfo::NeonDotprod() {
97 return EnsureInitialized() && cpuinfo_has_arm_neon_dot();
98 }
99
Sse42()100 bool CpuInfo::Sse42() {
101 return EnsureInitialized() && cpuinfo_has_x86_sse4_2();
102 }
103
Avx2Fma()104 bool CpuInfo::Avx2Fma() {
105 return EnsureInitialized() && cpuinfo_has_x86_avx2() &&
106 cpuinfo_has_x86_fma3();
107 }
108
Avx()109 bool CpuInfo::Avx() { return EnsureInitialized() && cpuinfo_has_x86_avx(); }
110
Avx512()111 bool CpuInfo::Avx512() {
112 return EnsureInitialized() && cpuinfo_has_x86_avx512f() &&
113 cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512cd() &&
114 cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512vl();
115 }
116
AvxVnni()117 bool CpuInfo::AvxVnni() {
118 return EnsureInitialized() && cpuinfo_has_x86_avx512vnni();
119 }
120
CurrentCpuIsA55ish()121 bool CpuInfo::CurrentCpuIsA55ish() {
122 if (!EnsureInitialized()) {
123 return false;
124 }
125
126 switch (cpuinfo_get_uarch(cpuinfo_get_current_uarch_index())->uarch) {
127 case cpuinfo_uarch_cortex_a53:
128 case cpuinfo_uarch_cortex_a55r0:
129 case cpuinfo_uarch_cortex_a55:
130 return true;
131 default:
132 return false;
133 }
134 }
135
136 #else // not defined RUY_HAVE_CPUINFO
137
~CpuInfo()138 CpuInfo::~CpuInfo() {}
EnsureInitialized()139 bool CpuInfo::EnsureInitialized() {
140 if (init_status_ == InitStatus::kNotYetAttempted) {
141 MakeDummyCacheParams(&cache_params_);
142 init_status_ = InitStatus::kInitialized;
143 }
144 RUY_DCHECK_EQ(init_status_, InitStatus::kInitialized);
145 return true;
146 }
NeonDotprod()147 bool CpuInfo::NeonDotprod() { return false; }
Sse42()148 bool CpuInfo::Sse42() { return false; }
Avx()149 bool CpuInfo::Avx() { return false; }
Avx2Fma()150 bool CpuInfo::Avx2Fma() { return false; }
Avx512()151 bool CpuInfo::Avx512() { return false; }
AvxVnni()152 bool CpuInfo::AvxVnni() { return false; }
CurrentCpuIsA55ish()153 bool CpuInfo::CurrentCpuIsA55ish() { return false; }
154
155 #endif
156
CacheParams()157 const CpuCacheParams& CpuInfo::CacheParams() {
158 EnsureInitialized();
159 // On failure, EnsureInitialized leaves dummy values in cache_params_.
160 return cache_params_;
161 }
162
163 } // namespace ruy
164