• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/platform/cpu_info.h"
17 #include "tensorflow/core/platform/logging.h"
18 #include "tensorflow/core/platform/platform.h"
19 #include "tensorflow/core/platform/types.h"
20 #if defined(PLATFORM_IS_X86)
21 #include <mutex>  // NOLINT
22 #endif
23 
24 // SIMD extension querying is only available on x86.
25 #ifdef PLATFORM_IS_X86
26 #ifdef PLATFORM_WINDOWS
27 // Visual Studio defines a builtin function for CPUID, so use that if possible.
28 #define GETCPUID(a, b, c, d, a_inp, c_inp) \
29   {                                        \
30     int cpu_info[4] = {-1};                \
31     __cpuidex(cpu_info, a_inp, c_inp);     \
32     a = cpu_info[0];                       \
33     b = cpu_info[1];                       \
34     c = cpu_info[2];                       \
35     d = cpu_info[3];                       \
36   }
37 #else
38 // Otherwise use gcc-format assembler to implement the underlying instructions.
39 #define GETCPUID(a, b, c, d, a_inp, c_inp) \
40   asm("mov %%rbx, %%rdi\n"                 \
41       "cpuid\n"                            \
42       "xchg %%rdi, %%rbx\n"                \
43       : "=a"(a), "=D"(b), "=c"(c), "=d"(d) \
44       : "a"(a_inp), "2"(c_inp))
45 #endif
46 #endif
47 
48 namespace tensorflow {
49 namespace port {
50 namespace {
51 
52 #ifdef PLATFORM_IS_X86
53 class CPUIDInfo;
54 void InitCPUIDInfo();
55 
56 CPUIDInfo *cpuid = nullptr;
57 
58 #ifdef PLATFORM_WINDOWS
59 // Visual Studio defines a builtin function, so use that if possible.
GetXCR0EAX()60 int GetXCR0EAX() { return _xgetbv(0); }
61 #else
GetXCR0EAX()62 int GetXCR0EAX() {
63   int eax, edx;
64   asm("XGETBV" : "=a"(eax), "=d"(edx) : "c"(0));
65   return eax;
66 }
67 #endif
68 
69 // Structure for basic CPUID info
70 class CPUIDInfo {
71  public:
CPUIDInfo()72   CPUIDInfo()
73       : have_adx_(0),
74         have_aes_(0),
75         have_avx_(0),
76         have_avx2_(0),
77         have_avx512f_(0),
78         have_avx512cd_(0),
79         have_avx512er_(0),
80         have_avx512pf_(0),
81         have_avx512vl_(0),
82         have_avx512bw_(0),
83         have_avx512dq_(0),
84         have_avx512vbmi_(0),
85         have_avx512ifma_(0),
86         have_avx512_4vnniw_(0),
87         have_avx512_4fmaps_(0),
88         have_bmi1_(0),
89         have_bmi2_(0),
90         have_cmov_(0),
91         have_cmpxchg16b_(0),
92         have_cmpxchg8b_(0),
93         have_f16c_(0),
94         have_fma_(0),
95         have_mmx_(0),
96         have_pclmulqdq_(0),
97         have_popcnt_(0),
98         have_prefetchw_(0),
99         have_prefetchwt1_(0),
100         have_rdrand_(0),
101         have_rdseed_(0),
102         have_smap_(0),
103         have_sse_(0),
104         have_sse2_(0),
105         have_sse3_(0),
106         have_sse4_1_(0),
107         have_sse4_2_(0),
108         have_ssse3_(0),
109         have_hypervisor_(0) {}
110 
Initialize()111   static void Initialize() {
112     // Initialize cpuid struct
113     CHECK(cpuid == nullptr) << __func__ << " ran more than once";
114     cpuid = new CPUIDInfo;
115 
116     uint32 eax, ebx, ecx, edx;
117 
118     // Get vendor string (issue CPUID with eax = 0)
119     GETCPUID(eax, ebx, ecx, edx, 0, 0);
120     cpuid->vendor_str_.append(reinterpret_cast<char *>(&ebx), 4);
121     cpuid->vendor_str_.append(reinterpret_cast<char *>(&edx), 4);
122     cpuid->vendor_str_.append(reinterpret_cast<char *>(&ecx), 4);
123 
124     // To get general information and extended features we send eax = 1 and
125     // ecx = 0 to cpuid.  The response is returned in eax, ebx, ecx and edx.
126     // (See Intel 64 and IA-32 Architectures Software Developer's Manual
127     // Volume 2A: Instruction Set Reference, A-M CPUID).
128     GETCPUID(eax, ebx, ecx, edx, 1, 0);
129 
130     cpuid->model_num_ = static_cast<int>((eax >> 4) & 0xf);
131     cpuid->family_ = static_cast<int>((eax >> 8) & 0xf);
132 
133     cpuid->have_aes_ = (ecx >> 25) & 0x1;
134     cpuid->have_cmov_ = (edx >> 15) & 0x1;
135     cpuid->have_cmpxchg16b_ = (ecx >> 13) & 0x1;
136     cpuid->have_cmpxchg8b_ = (edx >> 8) & 0x1;
137     cpuid->have_mmx_ = (edx >> 23) & 0x1;
138     cpuid->have_pclmulqdq_ = (ecx >> 1) & 0x1;
139     cpuid->have_popcnt_ = (ecx >> 23) & 0x1;
140     cpuid->have_rdrand_ = (ecx >> 30) & 0x1;
141     cpuid->have_sse2_ = (edx >> 26) & 0x1;
142     cpuid->have_sse3_ = ecx & 0x1;
143     cpuid->have_sse4_1_ = (ecx >> 19) & 0x1;
144     cpuid->have_sse4_2_ = (ecx >> 20) & 0x1;
145     cpuid->have_sse_ = (edx >> 25) & 0x1;
146     cpuid->have_ssse3_ = (ecx >> 9) & 0x1;
147     cpuid->have_hypervisor_ = (ecx >> 31) & 1;
148 
149     const uint64 xcr0_xmm_mask = 0x2;
150     const uint64 xcr0_ymm_mask = 0x4;
151     const uint64 xcr0_maskreg_mask = 0x20;
152     const uint64 xcr0_zmm0_15_mask = 0x40;
153     const uint64 xcr0_zmm16_31_mask = 0x80;
154 
155     const uint64 xcr0_avx_mask = xcr0_xmm_mask | xcr0_ymm_mask;
156     const uint64 xcr0_avx512_mask = xcr0_avx_mask | xcr0_maskreg_mask |
157                                     xcr0_zmm0_15_mask | xcr0_zmm16_31_mask;
158 
159     const bool have_avx =
160         // Does the OS support XGETBV instruction use by applications?
161         ((ecx >> 27) & 0x1) &&
162         // Does the OS save/restore XMM and YMM state?
163         ((GetXCR0EAX() & xcr0_avx_mask) == xcr0_avx_mask) &&
164         // Is AVX supported in hardware?
165         ((ecx >> 28) & 0x1);
166 
167     const bool have_avx512 =
168         // Does the OS support XGETBV instruction use by applications?
169         ((ecx >> 27) & 0x1) &&
170         // Does the OS save/restore ZMM state?
171         ((GetXCR0EAX() & xcr0_avx512_mask) == xcr0_avx512_mask);
172 
173     cpuid->have_avx_ = have_avx;
174     cpuid->have_fma_ = have_avx && ((ecx >> 12) & 0x1);
175     cpuid->have_f16c_ = have_avx && ((ecx >> 29) & 0x1);
176 
177     // Get standard level 7 structured extension features (issue CPUID with
178     // eax = 7 and ecx= 0), which is required to check for AVX2 support as
179     // well as other Haswell (and beyond) features.  (See Intel 64 and IA-32
180     // Architectures Software Developer's Manual Volume 2A: Instruction Set
181     // Reference, A-M CPUID).
182     GETCPUID(eax, ebx, ecx, edx, 7, 0);
183 
184     cpuid->have_adx_ = (ebx >> 19) & 0x1;
185     cpuid->have_avx2_ = have_avx && ((ebx >> 5) & 0x1);
186     cpuid->have_bmi1_ = (ebx >> 3) & 0x1;
187     cpuid->have_bmi2_ = (ebx >> 8) & 0x1;
188     cpuid->have_prefetchwt1_ = ecx & 0x1;
189     cpuid->have_rdseed_ = (ebx >> 18) & 0x1;
190     cpuid->have_smap_ = (ebx >> 20) & 0x1;
191 
192     cpuid->have_avx512f_ = have_avx512 && ((ebx >> 16) & 0x1);
193     cpuid->have_avx512cd_ = have_avx512 && ((ebx >> 28) & 0x1);
194     cpuid->have_avx512er_ = have_avx512 && ((ebx >> 27) & 0x1);
195     cpuid->have_avx512pf_ = have_avx512 && ((ebx >> 26) & 0x1);
196     cpuid->have_avx512vl_ = have_avx512 && ((ebx >> 31) & 0x1);
197     cpuid->have_avx512bw_ = have_avx512 && ((ebx >> 30) & 0x1);
198     cpuid->have_avx512dq_ = have_avx512 && ((ebx >> 17) & 0x1);
199     cpuid->have_avx512vbmi_ = have_avx512 && ((ecx >> 1) & 0x1);
200     cpuid->have_avx512ifma_ = have_avx512 && ((ebx >> 21) & 0x1);
201     cpuid->have_avx512_4vnniw_ = have_avx512 && ((edx >> 2) & 0x1);
202     cpuid->have_avx512_4fmaps_ = have_avx512 && ((edx >> 3) & 0x1);
203   }
204 
TestFeature(CPUFeature feature)205   static bool TestFeature(CPUFeature feature) {
206     InitCPUIDInfo();
207     // clang-format off
208     switch (feature) {
209       case ADX:           return cpuid->have_adx_;
210       case AES:           return cpuid->have_aes_;
211       case AVX2:          return cpuid->have_avx2_;
212       case AVX:           return cpuid->have_avx_;
213       case AVX512F:       return cpuid->have_avx512f_;
214       case AVX512CD:      return cpuid->have_avx512cd_;
215       case AVX512PF:      return cpuid->have_avx512pf_;
216       case AVX512ER:      return cpuid->have_avx512er_;
217       case AVX512VL:      return cpuid->have_avx512vl_;
218       case AVX512BW:      return cpuid->have_avx512bw_;
219       case AVX512DQ:      return cpuid->have_avx512dq_;
220       case AVX512VBMI:    return cpuid->have_avx512vbmi_;
221       case AVX512IFMA:    return cpuid->have_avx512ifma_;
222       case AVX512_4VNNIW: return cpuid->have_avx512_4vnniw_;
223       case AVX512_4FMAPS: return cpuid->have_avx512_4fmaps_;
224       case BMI1:          return cpuid->have_bmi1_;
225       case BMI2:          return cpuid->have_bmi2_;
226       case CMOV:          return cpuid->have_cmov_;
227       case CMPXCHG16B:    return cpuid->have_cmpxchg16b_;
228       case CMPXCHG8B:     return cpuid->have_cmpxchg8b_;
229       case F16C:          return cpuid->have_f16c_;
230       case FMA:           return cpuid->have_fma_;
231       case MMX:           return cpuid->have_mmx_;
232       case PCLMULQDQ:     return cpuid->have_pclmulqdq_;
233       case POPCNT:        return cpuid->have_popcnt_;
234       case PREFETCHW:     return cpuid->have_prefetchw_;
235       case PREFETCHWT1:   return cpuid->have_prefetchwt1_;
236       case RDRAND:        return cpuid->have_rdrand_;
237       case RDSEED:        return cpuid->have_rdseed_;
238       case SMAP:          return cpuid->have_smap_;
239       case SSE2:          return cpuid->have_sse2_;
240       case SSE3:          return cpuid->have_sse3_;
241       case SSE4_1:        return cpuid->have_sse4_1_;
242       case SSE4_2:        return cpuid->have_sse4_2_;
243       case SSE:           return cpuid->have_sse_;
244       case SSSE3:         return cpuid->have_ssse3_;
245       case HYPERVISOR:    return cpuid->have_hypervisor_;
246       default:
247         break;
248     }
249     // clang-format on
250     return false;
251   }
252 
vendor_str() const253   string vendor_str() const { return vendor_str_; }
family() const254   int family() const { return family_; }
model_num()255   int model_num() { return model_num_; }
256 
257  private:
258   int have_adx_ : 1;
259   int have_aes_ : 1;
260   int have_avx_ : 1;
261   int have_avx2_ : 1;
262   int have_avx512f_ : 1;
263   int have_avx512cd_ : 1;
264   int have_avx512er_ : 1;
265   int have_avx512pf_ : 1;
266   int have_avx512vl_ : 1;
267   int have_avx512bw_ : 1;
268   int have_avx512dq_ : 1;
269   int have_avx512vbmi_ : 1;
270   int have_avx512ifma_ : 1;
271   int have_avx512_4vnniw_ : 1;
272   int have_avx512_4fmaps_ : 1;
273   int have_bmi1_ : 1;
274   int have_bmi2_ : 1;
275   int have_cmov_ : 1;
276   int have_cmpxchg16b_ : 1;
277   int have_cmpxchg8b_ : 1;
278   int have_f16c_ : 1;
279   int have_fma_ : 1;
280   int have_mmx_ : 1;
281   int have_pclmulqdq_ : 1;
282   int have_popcnt_ : 1;
283   int have_prefetchw_ : 1;
284   int have_prefetchwt1_ : 1;
285   int have_rdrand_ : 1;
286   int have_rdseed_ : 1;
287   int have_smap_ : 1;
288   int have_sse_ : 1;
289   int have_sse2_ : 1;
290   int have_sse3_ : 1;
291   int have_sse4_1_ : 1;
292   int have_sse4_2_ : 1;
293   int have_ssse3_ : 1;
294   int have_hypervisor_ : 1;
295   string vendor_str_;
296   int family_;
297   int model_num_;
298 };
299 
300 std::once_flag cpuid_once_flag;
301 
InitCPUIDInfo()302 void InitCPUIDInfo() {
303   // This ensures that CPUIDInfo::Initialize() is called exactly
304   // once regardless of how many threads concurrently call us
305   std::call_once(cpuid_once_flag, CPUIDInfo::Initialize);
306 }
307 
308 #endif  // PLATFORM_IS_X86
309 
310 }  // namespace
311 
TestCPUFeature(CPUFeature feature)312 bool TestCPUFeature(CPUFeature feature) {
313 #ifdef PLATFORM_IS_X86
314   return CPUIDInfo::TestFeature(feature);
315 #else
316   return false;
317 #endif
318 }
319 
CPUVendorIDString()320 std::string CPUVendorIDString() {
321 #ifdef PLATFORM_IS_X86
322   InitCPUIDInfo();
323   return cpuid->vendor_str();
324 #else
325   return "";
326 #endif
327 }
328 
CPUFamily()329 int CPUFamily() {
330 #ifdef PLATFORM_IS_X86
331   InitCPUIDInfo();
332   return cpuid->family();
333 #else
334   return 0;
335 #endif
336 }
337 
CPUModelNum()338 int CPUModelNum() {
339 #ifdef PLATFORM_IS_X86
340   InitCPUIDInfo();
341   return cpuid->model_num();
342 #else
343   return 0;
344 #endif
345 }
346 
CPUIDNumSMT()347 int CPUIDNumSMT() {
348 #ifdef PLATFORM_IS_X86
349   // https://software.intel.com/en-us/articles/intel-64-architecture-processor-topology-enumeration
350   // https://software.intel.com/en-us/articles/intel-sdm (Vol 3A)
351   // Section: Detecting Hardware Multi-threads Support and Topology
352   // Uses CPUID Leaf 11 to enumerate system topology on Intel x86 architectures
353   // Other cases not supported
354   uint32 eax, ebx, ecx, edx;
355   // Check if system supports Leaf 11
356   GETCPUID(eax, ebx, ecx, edx, 0, 0);
357   if (eax >= 11) {
358     // 1) Leaf 11 available? CPUID.(EAX=11, ECX=0):EBX != 0
359     // 2) SMT_Mask_Width = CPUID.(EAX=11, ECX=0):EAX[4:0] if CPUID.(EAX=11,
360     // ECX=0):ECX[15:8] is 1
361     GETCPUID(eax, ebx, ecx, edx, 11, 0);
362     if (ebx != 0 && ((ecx & 0xff00) >> 8) == 1) {
363       return 1 << (eax & 0x1f);  // 2 ^ SMT_Mask_Width
364     }
365   }
366 #endif  // PLATFORM_IS_X86
367   return 0;
368 }
369 
370 }  // namespace port
371 }  // namespace tensorflow
372