1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/platform/cpu_info.h"
17
18 #include "absl/base/call_once.h"
19 #include "tensorflow/core/platform/logging.h"
20 #include "tensorflow/core/platform/platform.h"
21 #include "tensorflow/core/platform/types.h"
22 #if defined(PLATFORM_IS_X86)
23 #include <mutex> // NOLINT
24 #endif
25
26 // SIMD extension querying is only available on x86.
27 #ifdef PLATFORM_IS_X86
28 #ifdef PLATFORM_WINDOWS
29 // Visual Studio defines a builtin function for CPUID, so use that if possible.
30 #define GETCPUID(a, b, c, d, a_inp, c_inp) \
31 { \
32 int cpu_info[4] = {-1}; \
33 __cpuidex(cpu_info, a_inp, c_inp); \
34 a = cpu_info[0]; \
35 b = cpu_info[1]; \
36 c = cpu_info[2]; \
37 d = cpu_info[3]; \
38 }
39 #else
40 // Otherwise use gcc-format assembler to implement the underlying instructions.
41 #define GETCPUID(a, b, c, d, a_inp, c_inp) \
42 asm("mov %%rbx, %%rdi\n" \
43 "cpuid\n" \
44 "xchg %%rdi, %%rbx\n" \
45 : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \
46 : "a"(a_inp), "2"(c_inp))
47 #endif
48 #endif
49
50 namespace tensorflow {
51 namespace port {
52 namespace {
53
54 #ifdef PLATFORM_IS_X86
55 class CPUIDInfo;
56 void InitCPUIDInfo();
57
58 CPUIDInfo *cpuid = nullptr;
59
60 #ifdef PLATFORM_WINDOWS
61 // Visual Studio defines a builtin function, so use that if possible.
GetXCR0EAX()62 int GetXCR0EAX() { return _xgetbv(0); }
63 #else
GetXCR0EAX()64 int GetXCR0EAX() {
65 int eax, edx;
66 asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0));
67 return eax;
68 }
69 #endif
70
71 // Structure for basic CPUID info
72 class CPUIDInfo {
73 public:
CPUIDInfo()74 CPUIDInfo()
75 : have_adx_(0),
76 have_aes_(0),
77 have_avx_(0),
78 have_avx2_(0),
79 have_avx512f_(0),
80 have_avx512cd_(0),
81 have_avx512er_(0),
82 have_avx512pf_(0),
83 have_avx512vl_(0),
84 have_avx512bw_(0),
85 have_avx512dq_(0),
86 have_avx512vbmi_(0),
87 have_avx512ifma_(0),
88 have_avx512_4vnniw_(0),
89 have_avx512_4fmaps_(0),
90 have_bmi1_(0),
91 have_bmi2_(0),
92 have_cmov_(0),
93 have_cmpxchg16b_(0),
94 have_cmpxchg8b_(0),
95 have_f16c_(0),
96 have_fma_(0),
97 have_mmx_(0),
98 have_pclmulqdq_(0),
99 have_popcnt_(0),
100 have_prefetchw_(0),
101 have_prefetchwt1_(0),
102 have_rdrand_(0),
103 have_rdseed_(0),
104 have_smap_(0),
105 have_sse_(0),
106 have_sse2_(0),
107 have_sse3_(0),
108 have_sse4_1_(0),
109 have_sse4_2_(0),
110 have_ssse3_(0),
111 have_hypervisor_(0) {}
112
Initialize()113 static void Initialize() {
114 // Initialize cpuid struct
115 CHECK(cpuid == nullptr) << __func__ << " ran more than once";
116 cpuid = new CPUIDInfo;
117
118 uint32 eax, ebx, ecx, edx;
119
120 // Get vendor string (issue CPUID with eax = 0)
121 GETCPUID(eax, ebx, ecx, edx, 0, 0);
122 cpuid->vendor_str_.append(reinterpret_cast<char *>(&ebx), 4);
123 cpuid->vendor_str_.append(reinterpret_cast<char *>(&edx), 4);
124 cpuid->vendor_str_.append(reinterpret_cast<char *>(&ecx), 4);
125
126 // To get general information and extended features we send eax = 1 and
127 // ecx = 0 to cpuid. The response is returned in eax, ebx, ecx and edx.
128 // (See Intel 64 and IA-32 Architectures Software Developer's Manual
129 // Volume 2A: Instruction Set Reference, A-M CPUID).
130 GETCPUID(eax, ebx, ecx, edx, 1, 0);
131
132 cpuid->model_num_ = static_cast<int>((eax >> 4) & 0xf);
133 cpuid->family_ = static_cast<int>((eax >> 8) & 0xf);
134
135 cpuid->have_aes_ = (ecx >> 25) & 0x1;
136 cpuid->have_cmov_ = (edx >> 15) & 0x1;
137 cpuid->have_cmpxchg16b_ = (ecx >> 13) & 0x1;
138 cpuid->have_cmpxchg8b_ = (edx >> 8) & 0x1;
139 cpuid->have_mmx_ = (edx >> 23) & 0x1;
140 cpuid->have_pclmulqdq_ = (ecx >> 1) & 0x1;
141 cpuid->have_popcnt_ = (ecx >> 23) & 0x1;
142 cpuid->have_rdrand_ = (ecx >> 30) & 0x1;
143 cpuid->have_sse2_ = (edx >> 26) & 0x1;
144 cpuid->have_sse3_ = ecx & 0x1;
145 cpuid->have_sse4_1_ = (ecx >> 19) & 0x1;
146 cpuid->have_sse4_2_ = (ecx >> 20) & 0x1;
147 cpuid->have_sse_ = (edx >> 25) & 0x1;
148 cpuid->have_ssse3_ = (ecx >> 9) & 0x1;
149 cpuid->have_hypervisor_ = (ecx >> 31) & 1;
150
151 const uint64 xcr0_xmm_mask = 0x2;
152 const uint64 xcr0_ymm_mask = 0x4;
153 const uint64 xcr0_maskreg_mask = 0x20;
154 const uint64 xcr0_zmm0_15_mask = 0x40;
155 const uint64 xcr0_zmm16_31_mask = 0x80;
156
157 const uint64 xcr0_avx_mask = xcr0_xmm_mask | xcr0_ymm_mask;
158 const uint64 xcr0_avx512_mask = xcr0_avx_mask | xcr0_maskreg_mask |
159 xcr0_zmm0_15_mask | xcr0_zmm16_31_mask;
160
161 const bool have_avx =
162 // Does the OS support XGETBV instruction use by applications?
163 ((ecx >> 27) & 0x1) &&
164 // Does the OS save/restore XMM and YMM state?
165 ((GetXCR0EAX() & xcr0_avx_mask) == xcr0_avx_mask) &&
166 // Is AVX supported in hardware?
167 ((ecx >> 28) & 0x1);
168
169 const bool have_avx512 =
170 // Does the OS support XGETBV instruction use by applications?
171 ((ecx >> 27) & 0x1) &&
172 // Does the OS save/restore ZMM state?
173 ((GetXCR0EAX() & xcr0_avx512_mask) == xcr0_avx512_mask);
174
175 cpuid->have_avx_ = have_avx;
176 cpuid->have_fma_ = have_avx && ((ecx >> 12) & 0x1);
177 cpuid->have_f16c_ = have_avx && ((ecx >> 29) & 0x1);
178
179 // Get standard level 7 structured extension features (issue CPUID with
180 // eax = 7 and ecx= 0), which is required to check for AVX2 support as
181 // well as other Haswell (and beyond) features. (See Intel 64 and IA-32
182 // Architectures Software Developer's Manual Volume 2A: Instruction Set
183 // Reference, A-M CPUID).
184 GETCPUID(eax, ebx, ecx, edx, 7, 0);
185
186 cpuid->have_adx_ = (ebx >> 19) & 0x1;
187 cpuid->have_avx2_ = have_avx && ((ebx >> 5) & 0x1);
188 cpuid->have_bmi1_ = (ebx >> 3) & 0x1;
189 cpuid->have_bmi2_ = (ebx >> 8) & 0x1;
190 cpuid->have_prefetchwt1_ = ecx & 0x1;
191 cpuid->have_rdseed_ = (ebx >> 18) & 0x1;
192 cpuid->have_smap_ = (ebx >> 20) & 0x1;
193
194 cpuid->have_avx512f_ = have_avx512 && ((ebx >> 16) & 0x1);
195 cpuid->have_avx512cd_ = have_avx512 && ((ebx >> 28) & 0x1);
196 cpuid->have_avx512er_ = have_avx512 && ((ebx >> 27) & 0x1);
197 cpuid->have_avx512pf_ = have_avx512 && ((ebx >> 26) & 0x1);
198 cpuid->have_avx512vl_ = have_avx512 && ((ebx >> 31) & 0x1);
199 cpuid->have_avx512bw_ = have_avx512 && ((ebx >> 30) & 0x1);
200 cpuid->have_avx512dq_ = have_avx512 && ((ebx >> 17) & 0x1);
201 cpuid->have_avx512vbmi_ = have_avx512 && ((ecx >> 1) & 0x1);
202 cpuid->have_avx512ifma_ = have_avx512 && ((ebx >> 21) & 0x1);
203 cpuid->have_avx512_4vnniw_ = have_avx512 && ((edx >> 2) & 0x1);
204 cpuid->have_avx512_4fmaps_ = have_avx512 && ((edx >> 3) & 0x1);
205 }
206
TestFeature(CPUFeature feature)207 static bool TestFeature(CPUFeature feature) {
208 InitCPUIDInfo();
209 // clang-format off
210 switch (feature) {
211 case ADX: return cpuid->have_adx_;
212 case AES: return cpuid->have_aes_;
213 case AVX2: return cpuid->have_avx2_;
214 case AVX: return cpuid->have_avx_;
215 case AVX512F: return cpuid->have_avx512f_;
216 case AVX512CD: return cpuid->have_avx512cd_;
217 case AVX512PF: return cpuid->have_avx512pf_;
218 case AVX512ER: return cpuid->have_avx512er_;
219 case AVX512VL: return cpuid->have_avx512vl_;
220 case AVX512BW: return cpuid->have_avx512bw_;
221 case AVX512DQ: return cpuid->have_avx512dq_;
222 case AVX512VBMI: return cpuid->have_avx512vbmi_;
223 case AVX512IFMA: return cpuid->have_avx512ifma_;
224 case AVX512_4VNNIW: return cpuid->have_avx512_4vnniw_;
225 case AVX512_4FMAPS: return cpuid->have_avx512_4fmaps_;
226 case BMI1: return cpuid->have_bmi1_;
227 case BMI2: return cpuid->have_bmi2_;
228 case CMOV: return cpuid->have_cmov_;
229 case CMPXCHG16B: return cpuid->have_cmpxchg16b_;
230 case CMPXCHG8B: return cpuid->have_cmpxchg8b_;
231 case F16C: return cpuid->have_f16c_;
232 case FMA: return cpuid->have_fma_;
233 case MMX: return cpuid->have_mmx_;
234 case PCLMULQDQ: return cpuid->have_pclmulqdq_;
235 case POPCNT: return cpuid->have_popcnt_;
236 case PREFETCHW: return cpuid->have_prefetchw_;
237 case PREFETCHWT1: return cpuid->have_prefetchwt1_;
238 case RDRAND: return cpuid->have_rdrand_;
239 case RDSEED: return cpuid->have_rdseed_;
240 case SMAP: return cpuid->have_smap_;
241 case SSE2: return cpuid->have_sse2_;
242 case SSE3: return cpuid->have_sse3_;
243 case SSE4_1: return cpuid->have_sse4_1_;
244 case SSE4_2: return cpuid->have_sse4_2_;
245 case SSE: return cpuid->have_sse_;
246 case SSSE3: return cpuid->have_ssse3_;
247 case HYPERVISOR: return cpuid->have_hypervisor_;
248 default:
249 break;
250 }
251 // clang-format on
252 return false;
253 }
254
vendor_str() const255 string vendor_str() const { return vendor_str_; }
family() const256 int family() const { return family_; }
model_num()257 int model_num() { return model_num_; }
258
259 private:
260 int have_adx_ : 1;
261 int have_aes_ : 1;
262 int have_avx_ : 1;
263 int have_avx2_ : 1;
264 int have_avx512f_ : 1;
265 int have_avx512cd_ : 1;
266 int have_avx512er_ : 1;
267 int have_avx512pf_ : 1;
268 int have_avx512vl_ : 1;
269 int have_avx512bw_ : 1;
270 int have_avx512dq_ : 1;
271 int have_avx512vbmi_ : 1;
272 int have_avx512ifma_ : 1;
273 int have_avx512_4vnniw_ : 1;
274 int have_avx512_4fmaps_ : 1;
275 int have_bmi1_ : 1;
276 int have_bmi2_ : 1;
277 int have_cmov_ : 1;
278 int have_cmpxchg16b_ : 1;
279 int have_cmpxchg8b_ : 1;
280 int have_f16c_ : 1;
281 int have_fma_ : 1;
282 int have_mmx_ : 1;
283 int have_pclmulqdq_ : 1;
284 int have_popcnt_ : 1;
285 int have_prefetchw_ : 1;
286 int have_prefetchwt1_ : 1;
287 int have_rdrand_ : 1;
288 int have_rdseed_ : 1;
289 int have_smap_ : 1;
290 int have_sse_ : 1;
291 int have_sse2_ : 1;
292 int have_sse3_ : 1;
293 int have_sse4_1_ : 1;
294 int have_sse4_2_ : 1;
295 int have_ssse3_ : 1;
296 int have_hypervisor_ : 1;
297 string vendor_str_;
298 int family_;
299 int model_num_;
300 };
301
302 absl::once_flag cpuid_once_flag;
303
InitCPUIDInfo()304 void InitCPUIDInfo() {
305 // This ensures that CPUIDInfo::Initialize() is called exactly
306 // once regardless of how many threads concurrently call us
307 absl::call_once(cpuid_once_flag, CPUIDInfo::Initialize);
308 }
309
310 #endif // PLATFORM_IS_X86
311
312 } // namespace
313
TestCPUFeature(CPUFeature feature)314 bool TestCPUFeature(CPUFeature feature) {
315 #ifdef PLATFORM_IS_X86
316 return CPUIDInfo::TestFeature(feature);
317 #else
318 return false;
319 #endif
320 }
321
CPUVendorIDString()322 std::string CPUVendorIDString() {
323 #ifdef PLATFORM_IS_X86
324 InitCPUIDInfo();
325 return cpuid->vendor_str();
326 #else
327 return "";
328 #endif
329 }
330
CPUFamily()331 int CPUFamily() {
332 #ifdef PLATFORM_IS_X86
333 InitCPUIDInfo();
334 return cpuid->family();
335 #else
336 return 0;
337 #endif
338 }
339
CPUModelNum()340 int CPUModelNum() {
341 #ifdef PLATFORM_IS_X86
342 InitCPUIDInfo();
343 return cpuid->model_num();
344 #else
345 return 0;
346 #endif
347 }
348
CPUIDNumSMT()349 int CPUIDNumSMT() {
350 #ifdef PLATFORM_IS_X86
351 // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
352 // https://software.intel.com/en-us/articles/intel-sdm (Vol 3A)
353 // Section: Detecting Hardware Multi-threads Support and Topology
354 // Uses CPUID Leaf 11 to enumerate system topology on Intel x86 architectures
355 // Other cases not supported
356 uint32 eax, ebx, ecx, edx;
357 // Check if system supports Leaf 11
358 GETCPUID(eax, ebx, ecx, edx, 0, 0);
359 if (eax >= 11) {
360 // 1) Leaf 11 available? CPUID.(EAX=11, ECX=0):EBX != 0
361 // 2) SMT_Mask_Width = CPUID.(EAX=11, ECX=0):EAX[4:0] if CPUID.(EAX=11,
362 // ECX=0):ECX[15:8] is 1
363 GETCPUID(eax, ebx, ecx, edx, 11, 0);
364 if (ebx != 0 && ((ecx & 0xff00) >> 8) == 1) {
365 return 1 << (eax & 0x1f); // 2 ^ SMT_Mask_Width
366 }
367 }
368 #endif // PLATFORM_IS_X86
369 return 0;
370 }
371
372 } // namespace port
373 } // namespace tensorflow
374