• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "absl/base/internal/sysinfo.h"
17 
18 #include "tensorflow/core/platform/cpu_info.h"
19 #include "tensorflow/core/platform/logging.h"
20 #include "tensorflow/core/platform/mem.h"
21 #include "tensorflow/core/platform/numa.h"
22 #include "tensorflow/core/platform/snappy.h"
23 #include "tensorflow/core/platform/types.h"
24 
25 #if defined(__linux__) && !defined(__ANDROID__)
26 #include <sched.h>
27 #include <sys/sysinfo.h>
28 #else
29 #include <sys/syscall.h>
30 #endif
31 
32 #if (__x86_64__ || __i386__)
33 #include <cpuid.h>
34 #endif
35 
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40 #ifdef TF_USE_SNAPPY
41 #include "snappy.h"
42 #endif
43 #if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \
44     defined(__HAIKU__)
45 #include <thread>
46 #endif
47 
48 #if TENSORFLOW_USE_NUMA
49 #include "hwloc.h"  // TF:hwloc
50 #endif
51 
52 namespace tensorflow {
53 namespace port {
54 
InitMain(const char * usage,int * argc,char *** argv)55 void InitMain(const char* usage, int* argc, char*** argv) {}
56 
Hostname()57 string Hostname() {
58   char hostname[1024];
59   gethostname(hostname, sizeof hostname);
60   hostname[sizeof hostname - 1] = 0;
61   return string(hostname);
62 }
63 
NumSchedulableCPUs()64 int NumSchedulableCPUs() {
65 #if defined(__linux__) && !defined(__ANDROID__)
66   cpu_set_t cpuset;
67   if (sched_getaffinity(0, sizeof(cpu_set_t), &cpuset) == 0) {
68     return CPU_COUNT(&cpuset);
69   }
70   perror("sched_getaffinity");
71 #endif
72 #if (defined(__APPLE__) && defined(__MACH__)) || defined(__FreeBSD__) || \
73     defined(__HAIKU__)
74   unsigned int count = std::thread::hardware_concurrency();
75   if (count > 0) return static_cast<int>(count);
76 #endif
77   const int kDefaultCores = 4;  // Semi-conservative guess
78   fprintf(stderr, "can't determine number of CPU cores: assuming %d\n",
79           kDefaultCores);
80   return kDefaultCores;
81 }
82 
NumTotalCPUs()83 int NumTotalCPUs() {
84   int count = absl::base_internal::NumCPUs();
85   return (count <= 0) ? kUnknownCPU : count;
86 }
87 
GetCurrentCPU()88 int GetCurrentCPU() {
89 #if defined(__EMSCRIPTEN__)
90   return sched_getcpu();
91 #elif defined(__linux__) && !defined(__ANDROID__)
92   return sched_getcpu();
93   // Attempt to use cpuid on all other platforms.  If that fails, perform a
94   // syscall.
95 #elif defined(__cpuid) && !defined(__APPLE__)
96   // TODO(b/120919972): __cpuid returns invalid APIC ids on OS X.
97   uint32_t eax = 0;
98   uint32_t ebx = 0;
99   uint32_t ecx = 0;
100   uint32_t edx = 0;
101   __cpuid(/*level=*/1, eax, ebx, ecx, edx);
102   if ((edx & /*bit_APIC=*/(1 << 9)) != 0) {
103     // EBX bits 24-31 are APIC ID
104     return (ebx & 0xFF) >> 24;
105   }
106 #elif defined(__NR_getcpu)
107   unsigned int cpu;
108   if (syscall(__NR_getcpu, &cpu, NULL, NULL) < 0) {
109     return kUnknownCPU;
110   } else {
111     return static_cast<int>(cpu);
112   }
113 #endif
114   return kUnknownCPU;
115 }
116 
NumHyperthreadsPerCore()117 int NumHyperthreadsPerCore() {
118   static const int ht_per_core = tensorflow::port::CPUIDNumSMT();
119   return (ht_per_core > 0) ? ht_per_core : 1;
120 }
121 
122 #ifdef TENSORFLOW_USE_NUMA
123 namespace {
124 static hwloc_topology_t hwloc_topology_handle;
125 
HaveHWLocTopology()126 bool HaveHWLocTopology() {
127   // One time initialization
128   static bool init = []() {
129     if (hwloc_topology_init(&hwloc_topology_handle)) {
130       LOG(ERROR) << "Call to hwloc_topology_init() failed";
131       return false;
132     }
133     if (hwloc_topology_load(hwloc_topology_handle)) {
134       LOG(ERROR) << "Call to hwloc_topology_load() failed";
135       return false;
136     }
137     return true;
138   }();
139   return init;
140 }
141 
142 // Return the first hwloc object of the given type whose os_index
143 // matches 'index'.
GetHWLocTypeIndex(hwloc_obj_type_t tp,int index)144 hwloc_obj_t GetHWLocTypeIndex(hwloc_obj_type_t tp, int index) {
145   hwloc_obj_t obj = nullptr;
146   if (index >= 0) {
147     while ((obj = hwloc_get_next_obj_by_type(hwloc_topology_handle, tp, obj)) !=
148            nullptr) {
149       if (obj->os_index == index) break;
150     }
151   }
152   return obj;
153 }
154 }  // namespace
155 #endif  // TENSORFLOW_USE_NUMA
156 
NUMAEnabled()157 bool NUMAEnabled() { return (NUMANumNodes() > 1); }
158 
NUMANumNodes()159 int NUMANumNodes() {
160 #ifdef TENSORFLOW_USE_NUMA
161   if (HaveHWLocTopology()) {
162     int num_numanodes =
163         hwloc_get_nbobjs_by_type(hwloc_topology_handle, HWLOC_OBJ_NUMANODE);
164     return std::max(1, num_numanodes);
165   } else {
166     return 1;
167   }
168 #else
169   return 1;
170 #endif  // TENSORFLOW_USE_NUMA
171 }
172 
NUMASetThreadNodeAffinity(int node)173 void NUMASetThreadNodeAffinity(int node) {
174 #ifdef TENSORFLOW_USE_NUMA
175   if (HaveHWLocTopology()) {
176     // Find the corresponding NUMA node topology object.
177     hwloc_obj_t obj = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
178     if (obj) {
179       hwloc_set_cpubind(hwloc_topology_handle, obj->cpuset,
180                         HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT);
181     } else {
182       LOG(ERROR) << "Could not find hwloc NUMA node " << node;
183     }
184   }
185 #endif  // TENSORFLOW_USE_NUMA
186 }
187 
NUMAGetThreadNodeAffinity()188 int NUMAGetThreadNodeAffinity() {
189   int node_index = kNUMANoAffinity;
190 #ifdef TENSORFLOW_USE_NUMA
191   if (HaveHWLocTopology()) {
192     hwloc_cpuset_t thread_cpuset = hwloc_bitmap_alloc();
193     hwloc_get_cpubind(hwloc_topology_handle, thread_cpuset,
194                       HWLOC_CPUBIND_THREAD);
195     hwloc_obj_t obj = nullptr;
196     // Return the first NUMA node whose cpuset is a (non-proper) superset of
197     // that of the current thread.
198     while ((obj = hwloc_get_next_obj_by_type(
199                 hwloc_topology_handle, HWLOC_OBJ_NUMANODE, obj)) != nullptr) {
200       if (hwloc_bitmap_isincluded(thread_cpuset, obj->cpuset)) {
201         node_index = obj->os_index;
202         break;
203       }
204     }
205     hwloc_bitmap_free(thread_cpuset);
206   }
207 #endif  // TENSORFLOW_USE_NUMA
208   return node_index;
209 }
210 
AlignedMalloc(size_t size,int minimum_alignment)211 void* AlignedMalloc(size_t size, int minimum_alignment) {
212 #if defined(__ANDROID__)
213   return memalign(minimum_alignment, size);
214 #else  // !defined(__ANDROID__)
215   void* ptr = nullptr;
216   // posix_memalign requires that the requested alignment be at least
217   // sizeof(void*). In this case, fall back on malloc which should return
218   // memory aligned to at least the size of a pointer.
219   const int required_alignment = sizeof(void*);
220   if (minimum_alignment < required_alignment) return Malloc(size);
221   int err = posix_memalign(&ptr, minimum_alignment, size);
222   if (err != 0) {
223     return nullptr;
224   } else {
225     return ptr;
226   }
227 #endif
228 }
229 
AlignedFree(void * aligned_memory)230 void AlignedFree(void* aligned_memory) { Free(aligned_memory); }
231 
Malloc(size_t size)232 void* Malloc(size_t size) { return malloc(size); }
233 
Realloc(void * ptr,size_t size)234 void* Realloc(void* ptr, size_t size) { return realloc(ptr, size); }
235 
Free(void * ptr)236 void Free(void* ptr) { free(ptr); }
237 
NUMAMalloc(int node,size_t size,int minimum_alignment)238 void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
239 #ifdef TENSORFLOW_USE_NUMA
240   if (HaveHWLocTopology()) {
241     hwloc_obj_t numa_node = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
242     if (numa_node) {
243       return hwloc_alloc_membind(hwloc_topology_handle, size,
244                                  numa_node->nodeset, HWLOC_MEMBIND_BIND,
245                                  HWLOC_MEMBIND_BYNODESET);
246     } else {
247       LOG(ERROR) << "Failed to find hwloc NUMA node " << node;
248     }
249   }
250 #endif  // TENSORFLOW_USE_NUMA
251   return AlignedMalloc(size, minimum_alignment);
252 }
253 
NUMAFree(void * ptr,size_t size)254 void NUMAFree(void* ptr, size_t size) {
255 #ifdef TENSORFLOW_USE_NUMA
256   if (HaveHWLocTopology()) {
257     hwloc_free(hwloc_topology_handle, ptr, size);
258     return;
259   }
260 #endif  // TENSORFLOW_USE_NUMA
261   Free(ptr);
262 }
263 
NUMAGetMemAffinity(const void * addr)264 int NUMAGetMemAffinity(const void* addr) {
265   int node = kNUMANoAffinity;
266 #ifdef TENSORFLOW_USE_NUMA
267   if (HaveHWLocTopology() && addr) {
268     hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
269     if (!hwloc_get_area_memlocation(hwloc_topology_handle, addr, 4, nodeset,
270                                     HWLOC_MEMBIND_BYNODESET)) {
271       hwloc_obj_t obj = nullptr;
272       while ((obj = hwloc_get_next_obj_by_type(
273                   hwloc_topology_handle, HWLOC_OBJ_NUMANODE, obj)) != nullptr) {
274         if (hwloc_bitmap_isincluded(nodeset, obj->nodeset)) {
275           node = obj->os_index;
276           break;
277         }
278       }
279       hwloc_bitmap_free(nodeset);
280     } else {
281       LOG(ERROR) << "Failed call to hwloc_get_area_memlocation.";
282     }
283   }
284 #endif  // TENSORFLOW_USE_NUMA
285   return node;
286 }
287 
MallocExtension_ReleaseToSystem(std::size_t num_bytes)288 void MallocExtension_ReleaseToSystem(std::size_t num_bytes) {
289   // No-op.
290 }
291 
MallocExtension_GetAllocatedSize(const void * p)292 std::size_t MallocExtension_GetAllocatedSize(const void* p) { return 0; }
293 
AdjustFilenameForLogging(string * filename)294 void AdjustFilenameForLogging(string* filename) {
295   // Nothing to do
296 }
297 
Snappy_Compress(const char * input,size_t length,string * output)298 bool Snappy_Compress(const char* input, size_t length, string* output) {
299 #ifdef TF_USE_SNAPPY
300   output->resize(snappy::MaxCompressedLength(length));
301   size_t outlen;
302   snappy::RawCompress(input, length, &(*output)[0], &outlen);
303   output->resize(outlen);
304   return true;
305 #else
306   return false;
307 #endif
308 }
309 
Snappy_GetUncompressedLength(const char * input,size_t length,size_t * result)310 bool Snappy_GetUncompressedLength(const char* input, size_t length,
311                                   size_t* result) {
312 #ifdef TF_USE_SNAPPY
313   return snappy::GetUncompressedLength(input, length, result);
314 #else
315   return false;
316 #endif
317 }
318 
Snappy_Uncompress(const char * input,size_t length,char * output)319 bool Snappy_Uncompress(const char* input, size_t length, char* output) {
320 #ifdef TF_USE_SNAPPY
321   return snappy::RawUncompress(input, length, output);
322 #else
323   return false;
324 #endif
325 }
326 
Demangle(const char * mangled)327 string Demangle(const char* mangled) { return mangled; }
328 
NominalCPUFrequency()329 double NominalCPUFrequency() {
330   return absl::base_internal::NominalCPUFrequency();
331 }
332 
AvailableRam()333 int64 AvailableRam() {
334 #if defined(__linux__) && !defined(__ANDROID__)
335   struct sysinfo info;
336   int err = sysinfo(&info);
337   if (err == 0) {
338     return info.freeram;
339   }
340 #endif
341   return INT64_MAX;
342 }
343 
344 }  // namespace port
345 }  // namespace tensorflow
346