1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_PLATFORM_CPU_INFO_H_ 17 #define TENSORFLOW_CORE_PLATFORM_CPU_INFO_H_ 18 19 #include <string> 20 21 // TODO(ahentz): This is not strictly required here but, for historical 22 // reasons, many people depend on cpu_info.h in order to use kLittleEndian. 23 #include "tensorflow/core/platform/byte_order.h" 24 25 #if defined(_MSC_VER) 26 // included so __cpuidex function is available for GETCPUID on Windows 27 #include <intrin.h> 28 #endif 29 30 namespace tensorflow { 31 namespace port { 32 33 // Returns an estimate of the number of schedulable CPUs for this 34 // process. Usually, it's constant throughout the lifetime of a 35 // process, but it might change if the underlying cluster management 36 // software can change it dynamically. If the underlying call fails, a default 37 // value (e.g. `4`) may be returned. 38 int NumSchedulableCPUs(); 39 40 // Returns an estimate for the maximum parallelism for this process. 41 // Applications should avoid running more than this number of threads with 42 // intensive workloads concurrently to avoid performance degradation and 43 // contention. 44 // This value is either the number of schedulable CPUs, or a value specific to 45 // the underlying cluster management. Applications should assume this value can 46 // change throughout the lifetime of the process. This function must not be 47 // called during initialization, i.e., before main() has started. 48 int MaxParallelism(); 49 50 // Returns an estimate for the maximum parallelism for this process on the 51 // provided numa node, or any numa node if `numa_node` is kNUMANoAffinity. 52 // See MaxParallelism() for more information. 53 int MaxParallelism(int numa_node); 54 55 // Returns the total number of CPUs on the system. This number should 56 // not change even if the underlying cluster management software may 57 // change the number of schedulable CPUs. Unlike `NumSchedulableCPUs`, if the 58 // underlying call fails, an invalid value of -1 will be returned; 59 // the user must check for validity. 60 static constexpr int kUnknownCPU = -1; 61 int NumTotalCPUs(); 62 63 // Returns the id of the current CPU. Returns -1 if the current CPU cannot be 64 // identified. If successful, the return value will be in [0, NumTotalCPUs()). 65 int GetCurrentCPU(); 66 67 // Returns an estimate of the number of hyperthreads per physical core 68 // on the CPU 69 int NumHyperthreadsPerCore(); 70 71 // Mostly ISA related features that we care about 72 enum CPUFeature { 73 // Do not change numeric assignments. 74 MMX = 0, 75 SSE = 1, 76 SSE2 = 2, 77 SSE3 = 3, 78 SSSE3 = 4, 79 SSE4_1 = 5, 80 SSE4_2 = 6, 81 CMOV = 7, 82 CMPXCHG8B = 8, 83 CMPXCHG16B = 9, 84 POPCNT = 10, 85 AES = 11, 86 AVX = 12, 87 RDRAND = 13, 88 AVX2 = 14, 89 FMA = 15, 90 F16C = 16, 91 PCLMULQDQ = 17, 92 RDSEED = 18, 93 ADX = 19, 94 SMAP = 20, 95 96 // Prefetch Vector Data Into Caches with Intent to Write and T1 Hint 97 // http://www.felixcloutier.com/x86/PREFETCHWT1.html. 98 // You probably want PREFETCHW instead. 99 PREFETCHWT1 = 21, 100 101 BMI1 = 22, 102 BMI2 = 23, 103 HYPERVISOR = 25, // 0 when on a real CPU, 1 on (well-behaved) hypervisor. 104 105 // Prefetch Data into Caches in Anticipation of a Write (3D Now!). 106 // http://www.felixcloutier.com/x86/PREFETCHW.html 107 PREFETCHW = 26, 108 109 // AVX-512: 512-bit vectors (plus masking, etc.) in Knights Landing, 110 // Skylake 111 // Xeon, etc.; each of these entries is a different subset of 112 // instructions, 113 // various combinations of which occur on various CPU types. 114 AVX512F = 27, // Foundation 115 AVX512CD = 28, // Conflict detection 116 AVX512ER = 29, // Exponential and reciprocal 117 AVX512PF = 30, // Prefetching 118 AVX512VL = 31, // Shorter vector lengths 119 AVX512BW = 32, // Byte and word 120 AVX512DQ = 33, // Dword and qword 121 AVX512VBMI = 34, // Bit manipulation 122 AVX512IFMA = 35, // Integer multiply-add 123 AVX512_4VNNIW = 36, // Integer neural network 124 AVX512_4FMAPS = 37, // Floating point neural network 125 }; 126 127 // Checks whether the current processor supports one of the features above. 128 // Checks CPU registers to return hardware capabilities. 129 bool TestCPUFeature(CPUFeature feature); 130 131 // Returns CPU Vendor string (i.e. 'GenuineIntel', 'AuthenticAMD', etc.) 132 std::string CPUVendorIDString(); 133 134 // Returns CPU family. 135 int CPUFamily(); 136 137 // Returns CPU model number. 138 int CPUModelNum(); 139 140 // Returns nominal core processor cycles per second of each processor. 141 double NominalCPUFrequency(); 142 143 // Returns num of hyperthreads per physical core 144 int CPUIDNumSMT(); 145 146 } // namespace port 147 } // namespace tensorflow 148 149 #endif // TENSORFLOW_CORE_PLATFORM_CPU_INFO_H_ 150