1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/grappler/clusters/utils.h"
17 
18 #include "third_party/eigen3/Eigen/Core"
19 
20 #if GOOGLE_CUDA
21 #include "third_party/gpus/cuda/include/cuda.h"
22 #include "third_party/gpus/cuda/include/cuda_runtime_api.h"
23 #include "third_party/gpus/cudnn/cudnn.h"
24 #endif
25 
26 #if TENSORFLOW_USE_ROCM
27 #include "rocm/include/hip/hip_runtime.h"
28 #endif
29 
30 #ifdef TENSORFLOW_USE_LIBXSMM
31 #include "include/libxsmm.h"
32 #endif
33 
34 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
35 #include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h"
36 #include "tensorflow/core/lib/core/status.h"
37 #include "tensorflow/core/lib/strings/numbers.h"
38 #include "tensorflow/core/lib/strings/strcat.h"
39 #include "tensorflow/core/platform/byte_order.h"
40 #include "tensorflow/core/platform/cpu_info.h"
41 #include "tensorflow/core/platform/mem.h"
42 
43 namespace tensorflow {
44 namespace grappler {
45 
GetLocalCPUInfo()46 DeviceProperties GetLocalCPUInfo() {
47   DeviceProperties device;
48   device.set_type("CPU");
49 
50   device.set_vendor(port::CPUVendorIDString());
51   // Combine cpu family and model into the model string.
52   device.set_model(
53       strings::StrCat((port::CPUFamily() << 4) + port::CPUModelNum()));
54   device.set_frequency(port::NominalCPUFrequency() * 1e-6);
55   device.set_num_cores(port::NumSchedulableCPUs());
56   device.set_l1_cache_size(Eigen::l1CacheSize());
57   device.set_l2_cache_size(Eigen::l2CacheSize());
58   device.set_l3_cache_size(Eigen::l3CacheSize());
59 
60   int64_t free_mem = port::AvailableRam();
61   if (free_mem < INT64_MAX) {
62     device.set_memory_size(free_mem);
63   }
64 
65   (*device.mutable_environment())["cpu_instruction_set"] =
66       Eigen::SimdInstructionSetsInUse();
67 
68   (*device.mutable_environment())["eigen"] = strings::StrCat(
69       EIGEN_WORLD_VERSION, ".", EIGEN_MAJOR_VERSION, ".", EIGEN_MINOR_VERSION);
70 #ifdef TENSORFLOW_USE_LIBXSMM
71   (*device.mutable_environment())["libxsmm"] = LIBXSMM_VERSION;
72 #endif
73 
74   return device;
75 }
76 
GetLocalGPUInfo(PlatformDeviceId platform_device_id)77 DeviceProperties GetLocalGPUInfo(PlatformDeviceId platform_device_id) {
78   DeviceProperties device;
79   device.set_type("GPU");
80 
81 #if GOOGLE_CUDA
82   cudaDeviceProp properties;
83   cudaError_t error =
84       cudaGetDeviceProperties(&properties, platform_device_id.value());
85   if (error != cudaSuccess) {
86     device.set_type("UNKNOWN");
87     LOG(ERROR) << "Failed to get device properties, error code: " << error;
88     return device;
89   }
90 
91   device.set_vendor("NVIDIA");
92   device.set_model(properties.name);
93   device.set_frequency(properties.clockRate * 1e-3);
94   device.set_num_cores(properties.multiProcessorCount);
95   device.set_num_registers(properties.regsPerMultiprocessor);
96   // For compute capability less than 5, l1 cache size is configurable to
97   // either 16 KB or 48 KB. We use the initial configuration 16 KB here. For
98   // compute capability larger or equal to 5, l1 cache (unified with texture
99   // cache) size is 24 KB. This number may need to be updated for future
100   // compute capabilities.
101   device.set_l1_cache_size((properties.major < 5) ? 16 * 1024 : 24 * 1024);
102   device.set_l2_cache_size(properties.l2CacheSize);
103   device.set_l3_cache_size(0);
104   device.set_shared_memory_size_per_multiprocessor(
105       properties.sharedMemPerMultiprocessor);
106   device.set_memory_size(properties.totalGlobalMem);
107   // 8 is the number of bits per byte. 2 is accounted for
108   // double data rate (DDR).
109   device.set_bandwidth(properties.memoryBusWidth / 8 *
110                        properties.memoryClockRate * 2);
111 
112   (*device.mutable_environment())["architecture"] =
113       strings::StrCat(properties.major, ".", properties.minor);
114   (*device.mutable_environment())["cuda"] = strings::StrCat(CUDA_VERSION);
115   (*device.mutable_environment())["cudnn"] = strings::StrCat(CUDNN_VERSION);
116 
117 #elif TENSORFLOW_USE_ROCM
118   hipDeviceProp_t properties;
119   hipError_t error =
120       hipGetDeviceProperties(&properties, platform_device_id.value());
121   if (error != hipSuccess) {
122     device.set_type("UNKNOWN");
123     LOG(ERROR) << "Failed to get device properties, error code: " << error;
124     return device;
125   }
126 
127   // ROCM TODO review if numbers here are valid
128   device.set_vendor("Advanced Micro Devices, Inc");
129   device.set_model(properties.name);
130   device.set_frequency(properties.clockRate * 1e-3);
131   device.set_num_cores(properties.multiProcessorCount);
132   device.set_num_registers(properties.regsPerBlock);
133   device.set_l1_cache_size(16 * 1024);
134   device.set_l2_cache_size(properties.l2CacheSize);
135   device.set_l3_cache_size(0);
136   device.set_shared_memory_size_per_multiprocessor(
137       properties.maxSharedMemoryPerMultiProcessor);
138   device.set_memory_size(properties.totalGlobalMem);
139   // 8 is the number of bits per byte. 2 is accounted for
140   // double data rate (DDR).
141   device.set_bandwidth(properties.memoryBusWidth / 8 *
142                        properties.memoryClockRate * 2);
143 
144   (*device.mutable_environment())["architecture"] =
145       strings::StrCat("gfx", properties.gcnArch);
146 #endif
147 
148   return device;
149 }
150 
GetDeviceInfo(const DeviceNameUtils::ParsedName & device)151 DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device) {
152   DeviceProperties unknown;
153   unknown.set_type("UNKNOWN");
154 
155   if (device.type == "CPU") {
156     return GetLocalCPUInfo();
157   } else if (device.type == "GPU") {
158     if (device.has_id) {
159       TfDeviceId tf_device_id(device.id);
160       PlatformDeviceId platform_device_id;
161       Status s =
162           GpuIdManager::TfToPlatformDeviceId(tf_device_id, &platform_device_id);
163       if (!s.ok()) {
164         LOG(ERROR) << s;
165         return unknown;
166       }
167       return GetLocalGPUInfo(platform_device_id);
168     } else {
169       return GetLocalGPUInfo(PlatformDeviceId(0));
170     }
171   }
172   return unknown;
173 }
174 
175 }  // end namespace grappler
176 }  // end namespace tensorflow
177