• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/platform/cpu_info.h"
17 
18 #include "absl/base/call_once.h"
19 #include "tensorflow/core/platform/logging.h"
20 #include "tensorflow/core/platform/platform.h"
21 #include "tensorflow/core/platform/types.h"
22 #if defined(PLATFORM_IS_X86)
23 #include <mutex>  // NOLINT
24 #endif
25 
26 // SIMD extension querying is only available on x86.
27 #ifdef PLATFORM_IS_X86
28 #ifdef PLATFORM_WINDOWS
29 // Visual Studio defines a builtin function for CPUID, so use that if possible.
30 #define GETCPUID(a, b, c, d, a_inp, c_inp) \
31   {                                        \
32     int cpu_info[4] = {-1};                \
33     __cpuidex(cpu_info, a_inp, c_inp);     \
34     a = cpu_info[0];                       \
35     b = cpu_info[1];                       \
36     c = cpu_info[2];                       \
37     d = cpu_info[3];                       \
38   }
39 #else
40 // Otherwise use gcc-format assembler to implement the underlying instructions.
41 #define GETCPUID(a, b, c, d, a_inp, c_inp) \
42   asm("mov %%rbx, %%rdi\n"                 \
43       "cpuid\n"                            \
44       "xchg %%rdi, %%rbx\n"                \
45       : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \
46       : "a"(a_inp), "2"(c_inp))
47 #endif
48 #endif
49 
50 namespace tensorflow {
51 namespace port {
52 namespace {
53 
54 #ifdef PLATFORM_IS_X86
55 class CPUIDInfo;
56 void InitCPUIDInfo();
57 
58 CPUIDInfo *cpuid = nullptr;
59 
60 #ifdef PLATFORM_WINDOWS
61 // Visual Studio defines a builtin function, so use that if possible.
GetXCR0EAX()62 int GetXCR0EAX() { return _xgetbv(0); }
63 #else
GetXCR0EAX()64 int GetXCR0EAX() {
65   int eax, edx;
66   asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0));
67   return eax;
68 }
69 #endif
70 
71 // Structure for basic CPUID info
72 class CPUIDInfo {
73  public:
CPUIDInfo()74   CPUIDInfo()
75       : have_adx_(0),
76         have_aes_(0),
77         have_avx_(0),
78         have_avx2_(0),
79         have_avx512f_(0),
80         have_avx512cd_(0),
81         have_avx512er_(0),
82         have_avx512pf_(0),
83         have_avx512vl_(0),
84         have_avx512bw_(0),
85         have_avx512dq_(0),
86         have_avx512vbmi_(0),
87         have_avx512ifma_(0),
88         have_avx512_4vnniw_(0),
89         have_avx512_4fmaps_(0),
90         have_bmi1_(0),
91         have_bmi2_(0),
92         have_cmov_(0),
93         have_cmpxchg16b_(0),
94         have_cmpxchg8b_(0),
95         have_f16c_(0),
96         have_fma_(0),
97         have_mmx_(0),
98         have_pclmulqdq_(0),
99         have_popcnt_(0),
100         have_prefetchw_(0),
101         have_prefetchwt1_(0),
102         have_rdrand_(0),
103         have_rdseed_(0),
104         have_smap_(0),
105         have_sse_(0),
106         have_sse2_(0),
107         have_sse3_(0),
108         have_sse4_1_(0),
109         have_sse4_2_(0),
110         have_ssse3_(0),
111         have_hypervisor_(0) {}
112 
Initialize()113   static void Initialize() {
114     // Initialize cpuid struct
115     CHECK(cpuid == nullptr) << __func__ << " ran more than once";
116     cpuid = new CPUIDInfo;
117 
118     uint32 eax, ebx, ecx, edx;
119 
120     // Get vendor string (issue CPUID with eax = 0)
121     GETCPUID(eax, ebx, ecx, edx, 0, 0);
122     cpuid->vendor_str_.append(reinterpret_cast<char *>(&ebx), 4);
123     cpuid->vendor_str_.append(reinterpret_cast<char *>(&edx), 4);
124     cpuid->vendor_str_.append(reinterpret_cast<char *>(&ecx), 4);
125 
126     // To get general information and extended features we send eax = 1 and
127     // ecx = 0 to cpuid.  The response is returned in eax, ebx, ecx and edx.
128     // (See Intel 64 and IA-32 Architectures Software Developer's Manual
129     // Volume 2A: Instruction Set Reference, A-M CPUID).
130     GETCPUID(eax, ebx, ecx, edx, 1, 0);
131 
132     cpuid->model_num_ = static_cast<int>((eax >> 4) & 0xf);
133     cpuid->family_ = static_cast<int>((eax >> 8) & 0xf);
134 
135     cpuid->have_aes_ = (ecx >> 25) & 0x1;
136     cpuid->have_cmov_ = (edx >> 15) & 0x1;
137     cpuid->have_cmpxchg16b_ = (ecx >> 13) & 0x1;
138     cpuid->have_cmpxchg8b_ = (edx >> 8) & 0x1;
139     cpuid->have_mmx_ = (edx >> 23) & 0x1;
140     cpuid->have_pclmulqdq_ = (ecx >> 1) & 0x1;
141     cpuid->have_popcnt_ = (ecx >> 23) & 0x1;
142     cpuid->have_rdrand_ = (ecx >> 30) & 0x1;
143     cpuid->have_sse2_ = (edx >> 26) & 0x1;
144     cpuid->have_sse3_ = ecx & 0x1;
145     cpuid->have_sse4_1_ = (ecx >> 19) & 0x1;
146     cpuid->have_sse4_2_ = (ecx >> 20) & 0x1;
147     cpuid->have_sse_ = (edx >> 25) & 0x1;
148     cpuid->have_ssse3_ = (ecx >> 9) & 0x1;
149     cpuid->have_hypervisor_ = (ecx >> 31) & 1;
150 
151     const uint64 xcr0_xmm_mask = 0x2;
152     const uint64 xcr0_ymm_mask = 0x4;
153     const uint64 xcr0_maskreg_mask = 0x20;
154     const uint64 xcr0_zmm0_15_mask = 0x40;
155     const uint64 xcr0_zmm16_31_mask = 0x80;
156 
157     const uint64 xcr0_avx_mask = xcr0_xmm_mask | xcr0_ymm_mask;
158     const uint64 xcr0_avx512_mask = xcr0_avx_mask | xcr0_maskreg_mask |
159                                     xcr0_zmm0_15_mask | xcr0_zmm16_31_mask;
160 
161     const bool have_avx =
162         // Does the OS support XGETBV instruction use by applications?
163         ((ecx >> 27) & 0x1) &&
164         // Does the OS save/restore XMM and YMM state?
165         ((GetXCR0EAX() & xcr0_avx_mask) == xcr0_avx_mask) &&
166         // Is AVX supported in hardware?
167         ((ecx >> 28) & 0x1);
168 
169     const bool have_avx512 =
170         // Does the OS support XGETBV instruction use by applications?
171         ((ecx >> 27) & 0x1) &&
172         // Does the OS save/restore ZMM state?
173         ((GetXCR0EAX() & xcr0_avx512_mask) == xcr0_avx512_mask);
174 
175     cpuid->have_avx_ = have_avx;
176     cpuid->have_fma_ = have_avx && ((ecx >> 12) & 0x1);
177     cpuid->have_f16c_ = have_avx && ((ecx >> 29) & 0x1);
178 
179     // Get standard level 7 structured extension features (issue CPUID with
180     // eax = 7 and ecx= 0), which is required to check for AVX2 support as
181     // well as other Haswell (and beyond) features.  (See Intel 64 and IA-32
182     // Architectures Software Developer's Manual Volume 2A: Instruction Set
183     // Reference, A-M CPUID).
184     GETCPUID(eax, ebx, ecx, edx, 7, 0);
185 
186     cpuid->have_adx_ = (ebx >> 19) & 0x1;
187     cpuid->have_avx2_ = have_avx && ((ebx >> 5) & 0x1);
188     cpuid->have_bmi1_ = (ebx >> 3) & 0x1;
189     cpuid->have_bmi2_ = (ebx >> 8) & 0x1;
190     cpuid->have_prefetchwt1_ = ecx & 0x1;
191     cpuid->have_rdseed_ = (ebx >> 18) & 0x1;
192     cpuid->have_smap_ = (ebx >> 20) & 0x1;
193 
194     cpuid->have_avx512f_ = have_avx512 && ((ebx >> 16) & 0x1);
195     cpuid->have_avx512cd_ = have_avx512 && ((ebx >> 28) & 0x1);
196     cpuid->have_avx512er_ = have_avx512 && ((ebx >> 27) & 0x1);
197     cpuid->have_avx512pf_ = have_avx512 && ((ebx >> 26) & 0x1);
198     cpuid->have_avx512vl_ = have_avx512 && ((ebx >> 31) & 0x1);
199     cpuid->have_avx512bw_ = have_avx512 && ((ebx >> 30) & 0x1);
200     cpuid->have_avx512dq_ = have_avx512 && ((ebx >> 17) & 0x1);
201     cpuid->have_avx512vbmi_ = have_avx512 && ((ecx >> 1) & 0x1);
202     cpuid->have_avx512ifma_ = have_avx512 && ((ebx >> 21) & 0x1);
203     cpuid->have_avx512_4vnniw_ = have_avx512 && ((edx >> 2) & 0x1);
204     cpuid->have_avx512_4fmaps_ = have_avx512 && ((edx >> 3) & 0x1);
205   }
206 
TestFeature(CPUFeature feature)207   static bool TestFeature(CPUFeature feature) {
208     InitCPUIDInfo();
209     // clang-format off
210     switch (feature) {
211       case ADX:           return cpuid->have_adx_;
212       case AES:           return cpuid->have_aes_;
213       case AVX2:          return cpuid->have_avx2_;
214       case AVX:           return cpuid->have_avx_;
215       case AVX512F:       return cpuid->have_avx512f_;
216       case AVX512CD:      return cpuid->have_avx512cd_;
217       case AVX512PF:      return cpuid->have_avx512pf_;
218       case AVX512ER:      return cpuid->have_avx512er_;
219       case AVX512VL:      return cpuid->have_avx512vl_;
220       case AVX512BW:      return cpuid->have_avx512bw_;
221       case AVX512DQ:      return cpuid->have_avx512dq_;
222       case AVX512VBMI:    return cpuid->have_avx512vbmi_;
223       case AVX512IFMA:    return cpuid->have_avx512ifma_;
224       case AVX512_4VNNIW: return cpuid->have_avx512_4vnniw_;
225       case AVX512_4FMAPS: return cpuid->have_avx512_4fmaps_;
226       case BMI1:          return cpuid->have_bmi1_;
227       case BMI2:          return cpuid->have_bmi2_;
228       case CMOV:          return cpuid->have_cmov_;
229       case CMPXCHG16B:    return cpuid->have_cmpxchg16b_;
230       case CMPXCHG8B:     return cpuid->have_cmpxchg8b_;
231       case F16C:          return cpuid->have_f16c_;
232       case FMA:           return cpuid->have_fma_;
233       case MMX:           return cpuid->have_mmx_;
234       case PCLMULQDQ:     return cpuid->have_pclmulqdq_;
235       case POPCNT:        return cpuid->have_popcnt_;
236       case PREFETCHW:     return cpuid->have_prefetchw_;
237       case PREFETCHWT1:   return cpuid->have_prefetchwt1_;
238       case RDRAND:        return cpuid->have_rdrand_;
239       case RDSEED:        return cpuid->have_rdseed_;
240       case SMAP:          return cpuid->have_smap_;
241       case SSE2:          return cpuid->have_sse2_;
242       case SSE3:          return cpuid->have_sse3_;
243       case SSE4_1:        return cpuid->have_sse4_1_;
244       case SSE4_2:        return cpuid->have_sse4_2_;
245       case SSE:           return cpuid->have_sse_;
246       case SSSE3:         return cpuid->have_ssse3_;
247       case HYPERVISOR:    return cpuid->have_hypervisor_;
248       default:
249         break;
250     }
251     // clang-format on
252     return false;
253   }
254 
vendor_str() const255   string vendor_str() const { return vendor_str_; }
family() const256   int family() const { return family_; }
model_num()257   int model_num() { return model_num_; }
258 
259  private:
260   int have_adx_ : 1;
261   int have_aes_ : 1;
262   int have_avx_ : 1;
263   int have_avx2_ : 1;
264   int have_avx512f_ : 1;
265   int have_avx512cd_ : 1;
266   int have_avx512er_ : 1;
267   int have_avx512pf_ : 1;
268   int have_avx512vl_ : 1;
269   int have_avx512bw_ : 1;
270   int have_avx512dq_ : 1;
271   int have_avx512vbmi_ : 1;
272   int have_avx512ifma_ : 1;
273   int have_avx512_4vnniw_ : 1;
274   int have_avx512_4fmaps_ : 1;
275   int have_bmi1_ : 1;
276   int have_bmi2_ : 1;
277   int have_cmov_ : 1;
278   int have_cmpxchg16b_ : 1;
279   int have_cmpxchg8b_ : 1;
280   int have_f16c_ : 1;
281   int have_fma_ : 1;
282   int have_mmx_ : 1;
283   int have_pclmulqdq_ : 1;
284   int have_popcnt_ : 1;
285   int have_prefetchw_ : 1;
286   int have_prefetchwt1_ : 1;
287   int have_rdrand_ : 1;
288   int have_rdseed_ : 1;
289   int have_smap_ : 1;
290   int have_sse_ : 1;
291   int have_sse2_ : 1;
292   int have_sse3_ : 1;
293   int have_sse4_1_ : 1;
294   int have_sse4_2_ : 1;
295   int have_ssse3_ : 1;
296   int have_hypervisor_ : 1;
297   string vendor_str_;
298   int family_;
299   int model_num_;
300 };
301 
302 absl::once_flag cpuid_once_flag;
303 
InitCPUIDInfo()304 void InitCPUIDInfo() {
305   // This ensures that CPUIDInfo::Initialize() is called exactly
306   // once regardless of how many threads concurrently call us
307   absl::call_once(cpuid_once_flag, CPUIDInfo::Initialize);
308 }
309 
310 #endif  // PLATFORM_IS_X86
311 
312 }  // namespace
313 
TestCPUFeature(CPUFeature feature)314 bool TestCPUFeature(CPUFeature feature) {
315 #ifdef PLATFORM_IS_X86
316   return CPUIDInfo::TestFeature(feature);
317 #else
318   return false;
319 #endif
320 }
321 
CPUVendorIDString()322 std::string CPUVendorIDString() {
323 #ifdef PLATFORM_IS_X86
324   InitCPUIDInfo();
325   return cpuid->vendor_str();
326 #else
327   return "";
328 #endif
329 }
330 
CPUFamily()331 int CPUFamily() {
332 #ifdef PLATFORM_IS_X86
333   InitCPUIDInfo();
334   return cpuid->family();
335 #else
336   return 0;
337 #endif
338 }
339 
CPUModelNum()340 int CPUModelNum() {
341 #ifdef PLATFORM_IS_X86
342   InitCPUIDInfo();
343   return cpuid->model_num();
344 #else
345   return 0;
346 #endif
347 }
348 
CPUIDNumSMT()349 int CPUIDNumSMT() {
350 #ifdef PLATFORM_IS_X86
351   // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
352   // https://software.intel.com/en-us/articles/intel-sdm (Vol 3A)
353   // Section: Detecting Hardware Multi-threads Support and Topology
354   // Uses CPUID Leaf 11 to enumerate system topology on Intel x86 architectures
355   // Other cases not supported
356   uint32 eax, ebx, ecx, edx;
357   // Check if system supports Leaf 11
358   GETCPUID(eax, ebx, ecx, edx, 0, 0);
359   if (eax >= 11) {
360     // 1) Leaf 11 available? CPUID.(EAX=11, ECX=0):EBX != 0
361     // 2) SMT_Mask_Width = CPUID.(EAX=11, ECX=0):EAX[4:0] if CPUID.(EAX=11,
362     // ECX=0):ECX[15:8] is 1
363     GETCPUID(eax, ebx, ecx, edx, 11, 0);
364     if (ebx != 0 && ((ecx & 0xff00) >> 8) == 1) {
365       return 1 << (eax & 0x1f);  // 2 ^ SMT_Mask_Width
366     }
367   }
368 #endif  // PLATFORM_IS_X86
369   return 0;
370 }
371 
372 }  // namespace port
373 }  // namespace tensorflow
374