1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "absl/base/internal/sysinfo.h"
17
18 #include "tensorflow/core/platform/cpu_info.h"
19 #include "tensorflow/core/platform/logging.h"
20 #include "tensorflow/core/platform/mem.h"
21 #include "tensorflow/core/platform/numa.h"
22 #include "tensorflow/core/platform/snappy.h"
23 #include "tensorflow/core/platform/types.h"
24
25 #if defined(__linux__) && !defined(__ANDROID__)
26 #include <sched.h>
27 #include <sys/sysinfo.h>
28 #else
29 #include <sys/syscall.h>
30 #endif
31
32 #if (__x86_64__ || __i386__)
33 #include <cpuid.h>
34 #endif
35
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #ifdef TF_USE_SNAPPY
41 #include "snappy.h"
42 #endif
43 #if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \
44 defined(__HAIKU__)
45 #include <thread>
46 #endif
47
48 #if TENSORFLOW_USE_NUMA
49 #include "hwloc.h" // TF:hwloc
50 #endif
51
52 namespace tensorflow {
53 namespace port {
54
InitMain(const char * usage,int * argc,char *** argv)55 void InitMain(const char* usage, int* argc, char*** argv) {}
56
Hostname()57 string Hostname() {
58 char hostname[1024];
59 gethostname(hostname, sizeof hostname);
60 hostname[sizeof hostname - 1] = 0;
61 return string(hostname);
62 }
63
NumSchedulableCPUs()64 int NumSchedulableCPUs() {
65 #if defined(__linux__) && !defined(__ANDROID__)
66 cpu_set_t cpuset;
67 if (sched_getaffinity(0, sizeof(cpu_set_t), &cpuset) == 0) {
68 return CPU_COUNT(&cpuset);
69 }
70 perror("sched_getaffinity");
71 #endif
72 #if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \
73 defined(__HAIKU__)
74 unsigned int count = std::thread::hardware_concurrency();
75 if (count > 0) return static_cast<int>(count);
76 #endif
77 const int kDefaultCores = 4; // Semi-conservative guess
78 fprintf(stderr, "can't determine number of CPU cores: assuming %d\n",
79 kDefaultCores);
80 return kDefaultCores;
81 }
82
NumTotalCPUs()83 int NumTotalCPUs() {
84 int count = absl::base_internal::NumCPUs();
85 return (count <= 0) ? kUnknownCPU : count;
86 }
87
GetCurrentCPU()88 int GetCurrentCPU() {
89 #if defined(__EMSCRIPTEN__)
90 return sched_getcpu();
91 #elif defined(__linux__) && !defined(__ANDROID__)
92 return sched_getcpu();
93 // Attempt to use cpuid on all other platforms. If that fails, perform a
94 // syscall.
95 #elif defined(__cpuid) && !defined(__APPLE__)
96 // TODO(b/120919972): __cpuid returns invalid APIC ids on OS X.
97 uint32_t eax = 0;
98 uint32_t ebx = 0;
99 uint32_t ecx = 0;
100 uint32_t edx = 0;
101 __cpuid(/*level=*/1, eax, ebx, ecx, edx);
102 if ((edx & /*bit_APIC=*/(1 << 9)) != 0) {
103 // EBX bits 24-31 are APIC ID
104 return (ebx & 0xFF) >> 24;
105 }
106 #elif defined(__NR_getcpu)
107 unsigned int cpu;
108 if (syscall(__NR_getcpu, &cpu, NULL, NULL) < 0) {
109 return kUnknownCPU;
110 } else {
111 return static_cast<int>(cpu);
112 }
113 #endif
114 return kUnknownCPU;
115 }
116
NumHyperthreadsPerCore()117 int NumHyperthreadsPerCore() {
118 static const int ht_per_core = tensorflow::port::CPUIDNumSMT();
119 return (ht_per_core > 0) ? ht_per_core : 1;
120 }
121
122 #ifdef TENSORFLOW_USE_NUMA
123 namespace {
124 static hwloc_topology_t hwloc_topology_handle;
125
HaveHWLocTopology()126 bool HaveHWLocTopology() {
127 // One time initialization
128 static bool init = []() {
129 if (hwloc_topology_init(&hwloc_topology_handle)) {
130 LOG(ERROR) << "Call to hwloc_topology_init() failed";
131 return false;
132 }
133 if (hwloc_topology_load(hwloc_topology_handle)) {
134 LOG(ERROR) << "Call to hwloc_topology_load() failed";
135 return false;
136 }
137 return true;
138 }();
139 return init;
140 }
141
142 // Return the first hwloc object of the given type whose os_index
143 // matches 'index'.
GetHWLocTypeIndex(hwloc_obj_type_t tp,int index)144 hwloc_obj_t GetHWLocTypeIndex(hwloc_obj_type_t tp, int index) {
145 hwloc_obj_t obj = nullptr;
146 if (index >= 0) {
147 while ((obj = hwloc_get_next_obj_by_type(hwloc_topology_handle, tp, obj)) !=
148 nullptr) {
149 if (obj->os_index == index) break;
150 }
151 }
152 return obj;
153 }
154 } // namespace
155 #endif // TENSORFLOW_USE_NUMA
156
NUMAEnabled()157 bool NUMAEnabled() { return (NUMANumNodes() > 1); }
158
NUMANumNodes()159 int NUMANumNodes() {
160 #ifdef TENSORFLOW_USE_NUMA
161 if (HaveHWLocTopology()) {
162 int num_numanodes =
163 hwloc_get_nbobjs_by_type(hwloc_topology_handle, HWLOC_OBJ_NUMANODE);
164 return std::max(1, num_numanodes);
165 } else {
166 return 1;
167 }
168 #else
169 return 1;
170 #endif // TENSORFLOW_USE_NUMA
171 }
172
NUMASetThreadNodeAffinity(int node)173 void NUMASetThreadNodeAffinity(int node) {
174 #ifdef TENSORFLOW_USE_NUMA
175 if (HaveHWLocTopology()) {
176 // Find the corresponding NUMA node topology object.
177 hwloc_obj_t obj = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
178 if (obj) {
179 hwloc_set_cpubind(hwloc_topology_handle, obj->cpuset,
180 HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT);
181 } else {
182 LOG(ERROR) << "Could not find hwloc NUMA node " << node;
183 }
184 }
185 #endif // TENSORFLOW_USE_NUMA
186 }
187
NUMAGetThreadNodeAffinity()188 int NUMAGetThreadNodeAffinity() {
189 int node_index = kNUMANoAffinity;
190 #ifdef TENSORFLOW_USE_NUMA
191 if (HaveHWLocTopology()) {
192 hwloc_cpuset_t thread_cpuset = hwloc_bitmap_alloc();
193 hwloc_get_cpubind(hwloc_topology_handle, thread_cpuset,
194 HWLOC_CPUBIND_THREAD);
195 hwloc_obj_t obj = nullptr;
196 // Return the first NUMA node whose cpuset is a (non-proper) superset of
197 // that of the current thread.
198 while ((obj = hwloc_get_next_obj_by_type(
199 hwloc_topology_handle, HWLOC_OBJ_NUMANODE, obj)) != nullptr) {
200 if (hwloc_bitmap_isincluded(thread_cpuset, obj->cpuset)) {
201 node_index = obj->os_index;
202 break;
203 }
204 }
205 hwloc_bitmap_free(thread_cpuset);
206 }
207 #endif // TENSORFLOW_USE_NUMA
208 return node_index;
209 }
210
AlignedMalloc(size_t size,int minimum_alignment)211 void* AlignedMalloc(size_t size, int minimum_alignment) {
212 #if defined(__ANDROID__)
213 return memalign(minimum_alignment, size);
214 #else // !defined(__ANDROID__)
215 void* ptr = nullptr;
216 // posix_memalign requires that the requested alignment be at least
217 // sizeof(void*). In this case, fall back on malloc which should return
218 // memory aligned to at least the size of a pointer.
219 const int required_alignment = sizeof(void*);
220 if (minimum_alignment < required_alignment) return Malloc(size);
221 int err = posix_memalign(&ptr, minimum_alignment, size);
222 if (err != 0) {
223 return nullptr;
224 } else {
225 return ptr;
226 }
227 #endif
228 }
229
AlignedFree(void * aligned_memory)230 void AlignedFree(void* aligned_memory) { Free(aligned_memory); }
231
Malloc(size_t size)232 void* Malloc(size_t size) { return malloc(size); }
233
Realloc(void * ptr,size_t size)234 void* Realloc(void* ptr, size_t size) { return realloc(ptr, size); }
235
Free(void * ptr)236 void Free(void* ptr) { free(ptr); }
237
NUMAMalloc(int node,size_t size,int minimum_alignment)238 void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
239 #ifdef TENSORFLOW_USE_NUMA
240 if (HaveHWLocTopology()) {
241 hwloc_obj_t numa_node = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
242 if (numa_node) {
243 return hwloc_alloc_membind(hwloc_topology_handle, size,
244 numa_node->nodeset, HWLOC_MEMBIND_BIND,
245 HWLOC_MEMBIND_BYNODESET);
246 } else {
247 LOG(ERROR) << "Failed to find hwloc NUMA node " << node;
248 }
249 }
250 #endif // TENSORFLOW_USE_NUMA
251 return AlignedMalloc(size, minimum_alignment);
252 }
253
NUMAFree(void * ptr,size_t size)254 void NUMAFree(void* ptr, size_t size) {
255 #ifdef TENSORFLOW_USE_NUMA
256 if (HaveHWLocTopology()) {
257 hwloc_free(hwloc_topology_handle, ptr, size);
258 return;
259 }
260 #endif // TENSORFLOW_USE_NUMA
261 Free(ptr);
262 }
263
NUMAGetMemAffinity(const void * addr)264 int NUMAGetMemAffinity(const void* addr) {
265 int node = kNUMANoAffinity;
266 #ifdef TENSORFLOW_USE_NUMA
267 if (HaveHWLocTopology() && addr) {
268 hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
269 if (!hwloc_get_area_memlocation(hwloc_topology_handle, addr, 4, nodeset,
270 HWLOC_MEMBIND_BYNODESET)) {
271 hwloc_obj_t obj = nullptr;
272 while ((obj = hwloc_get_next_obj_by_type(
273 hwloc_topology_handle, HWLOC_OBJ_NUMANODE, obj)) != nullptr) {
274 if (hwloc_bitmap_isincluded(nodeset, obj->nodeset)) {
275 node = obj->os_index;
276 break;
277 }
278 }
279 hwloc_bitmap_free(nodeset);
280 } else {
281 LOG(ERROR) << "Failed call to hwloc_get_area_memlocation.";
282 }
283 }
284 #endif // TENSORFLOW_USE_NUMA
285 return node;
286 }
287
MallocExtension_ReleaseToSystem(std::size_t num_bytes)288 void MallocExtension_ReleaseToSystem(std::size_t num_bytes) {
289 // No-op.
290 }
291
MallocExtension_GetAllocatedSize(const void * p)292 std::size_t MallocExtension_GetAllocatedSize(const void* p) { return 0; }
293
AdjustFilenameForLogging(string * filename)294 void AdjustFilenameForLogging(string* filename) {
295 // Nothing to do
296 }
297
Snappy_Compress(const char * input,size_t length,string * output)298 bool Snappy_Compress(const char* input, size_t length, string* output) {
299 #ifdef TF_USE_SNAPPY
300 output->resize(snappy::MaxCompressedLength(length));
301 size_t outlen;
302 snappy::RawCompress(input, length, &(*output)[0], &outlen);
303 output->resize(outlen);
304 return true;
305 #else
306 return false;
307 #endif
308 }
309
Snappy_GetUncompressedLength(const char * input,size_t length,size_t * result)310 bool Snappy_GetUncompressedLength(const char* input, size_t length,
311 size_t* result) {
312 #ifdef TF_USE_SNAPPY
313 return snappy::GetUncompressedLength(input, length, result);
314 #else
315 return false;
316 #endif
317 }
318
Snappy_Uncompress(const char * input,size_t length,char * output)319 bool Snappy_Uncompress(const char* input, size_t length, char* output) {
320 #ifdef TF_USE_SNAPPY
321 return snappy::RawUncompress(input, length, output);
322 #else
323 return false;
324 #endif
325 }
326
Demangle(const char * mangled)327 string Demangle(const char* mangled) { return mangled; }
328
NominalCPUFrequency()329 double NominalCPUFrequency() {
330 return absl::base_internal::NominalCPUFrequency();
331 }
332
AvailableRam()333 int64 AvailableRam() {
334 #if defined(__linux__) && !defined(__ANDROID__)
335 struct sysinfo info;
336 int err = sysinfo(&info);
337 if (err == 0) {
338 return info.freeram;
339 }
340 #endif
341 return INT64_MAX;
342 }
343
344 } // namespace port
345 } // namespace tensorflow
346