1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/grappler/clusters/utils.h"
17
18 #include "third_party/eigen3/Eigen/Core"
19
20 #if GOOGLE_CUDA
21 #include "cuda/include/cuda.h"
22 #include "cuda/include/cuda_runtime_api.h"
23 #include "cuda/include/cudnn.h"
24 #endif
25
26 #if TENSORFLOW_USE_ROCM
27 #include "rocm/include/hip/hip_runtime.h"
28 #endif
29
30 #ifdef EIGEN_USE_LIBXSMM
31 #include "include/libxsmm.h"
32 #endif
33
34 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
35 #include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h"
36 #include "tensorflow/core/lib/core/status.h"
37 #include "tensorflow/core/lib/strings/numbers.h"
38 #include "tensorflow/core/lib/strings/strcat.h"
39 #include "tensorflow/core/platform/byte_order.h"
40 #include "tensorflow/core/platform/cpu_info.h"
41 #include "tensorflow/core/platform/mem.h"
42
43 namespace tensorflow {
44 namespace grappler {
45
GetLocalCPUInfo()46 DeviceProperties GetLocalCPUInfo() {
47 DeviceProperties device;
48 device.set_type("CPU");
49
50 device.set_vendor(port::CPUVendorIDString());
51 // Combine cpu family and model into the model string.
52 device.set_model(
53 strings::StrCat((port::CPUFamily() << 4) + port::CPUModelNum()));
54 device.set_frequency(port::NominalCPUFrequency() * 1e-6);
55 device.set_num_cores(port::NumSchedulableCPUs());
56 device.set_l1_cache_size(Eigen::l1CacheSize());
57 device.set_l2_cache_size(Eigen::l2CacheSize());
58 device.set_l3_cache_size(Eigen::l3CacheSize());
59
60 int64 free_mem = port::AvailableRam();
61 if (free_mem < INT64_MAX) {
62 device.set_memory_size(free_mem);
63 }
64
65 (*device.mutable_environment())["cpu_instruction_set"] =
66 Eigen::SimdInstructionSetsInUse();
67
68 (*device.mutable_environment())["eigen"] = strings::StrCat(
69 EIGEN_WORLD_VERSION, ".", EIGEN_MAJOR_VERSION, ".", EIGEN_MINOR_VERSION);
70 #ifdef EIGEN_USE_LIBXSMM
71 (*device.mutable_environment())["libxsmm"] = LIBXSMM_VERSION;
72 #endif
73
74 return device;
75 }
76
GetLocalGPUInfo(PlatformGpuId platform_gpu_id)77 DeviceProperties GetLocalGPUInfo(PlatformGpuId platform_gpu_id) {
78 DeviceProperties device;
79 device.set_type("GPU");
80
81 #if GOOGLE_CUDA
82 cudaDeviceProp properties;
83 cudaError_t error =
84 cudaGetDeviceProperties(&properties, platform_gpu_id.value());
85 if (error != cudaSuccess) {
86 device.set_type("UNKNOWN");
87 LOG(ERROR) << "Failed to get device properties, error code: " << error;
88 return device;
89 }
90
91 device.set_vendor("NVIDIA");
92 device.set_model(properties.name);
93 device.set_frequency(properties.clockRate * 1e-3);
94 device.set_num_cores(properties.multiProcessorCount);
95 device.set_num_registers(properties.regsPerMultiprocessor);
96 // For compute capability less than 5, l1 cache size is configurable to
97 // either 16 KB or 48 KB. We use the initial configuration 16 KB here. For
98 // compute capability larger or equal to 5, l1 cache (unified with texture
99 // cache) size is 24 KB. This number may need to be updated for future
100 // compute capabilities.
101 device.set_l1_cache_size((properties.major < 5) ? 16 * 1024 : 24 * 1024);
102 device.set_l2_cache_size(properties.l2CacheSize);
103 device.set_l3_cache_size(0);
104 device.set_shared_memory_size_per_multiprocessor(
105 properties.sharedMemPerMultiprocessor);
106 device.set_memory_size(properties.totalGlobalMem);
107 // 8 is the number of bits per byte. 2 is accounted for
108 // double data rate (DDR).
109 device.set_bandwidth(properties.memoryBusWidth / 8 *
110 properties.memoryClockRate * 2);
111
112 (*device.mutable_environment())["architecture"] =
113 strings::StrCat(properties.major, ".", properties.minor);
114 (*device.mutable_environment())["cuda"] = strings::StrCat(CUDA_VERSION);
115 (*device.mutable_environment())["cudnn"] = strings::StrCat(CUDNN_VERSION);
116
117 #elif TENSORFLOW_USE_ROCM
118 hipDeviceProp_t properties;
119 hipError_t error =
120 hipGetDeviceProperties(&properties, platform_gpu_id.value());
121 if (error != hipSuccess) {
122 device.set_type("UNKNOWN");
123 LOG(ERROR) << "Failed to get device properties, error code: " << error;
124 return device;
125 }
126
127 // ROCM TODO review if numbers here are valid
128 device.set_vendor("Advanced Micro Devices, Inc");
129 device.set_model(properties.name);
130 device.set_frequency(properties.clockRate * 1e-3);
131 device.set_num_cores(properties.multiProcessorCount);
132 device.set_num_registers(properties.regsPerBlock);
133 device.set_l1_cache_size(16 * 1024);
134 device.set_l2_cache_size(properties.l2CacheSize);
135 device.set_l3_cache_size(0);
136 device.set_shared_memory_size_per_multiprocessor(
137 properties.maxSharedMemoryPerMultiProcessor);
138 device.set_memory_size(properties.totalGlobalMem);
139 // 8 is the number of bits per byte. 2 is accounted for
140 // double data rate (DDR).
141 device.set_bandwidth(properties.memoryBusWidth / 8 *
142 properties.memoryClockRate * 2);
143
144 (*device.mutable_environment())["architecture"] =
145 strings::StrCat("gfx", properties.gcnArch);
146 #endif
147
148 return device;
149 }
150
GetDeviceInfo(const DeviceNameUtils::ParsedName & device)151 DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device) {
152 DeviceProperties unknown;
153 unknown.set_type("UNKNOWN");
154
155 if (device.type == "CPU") {
156 return GetLocalCPUInfo();
157 } else if (device.type == "GPU") {
158 if (device.has_id) {
159 TfGpuId tf_gpu_id(device.id);
160 PlatformGpuId platform_gpu_id;
161 Status s = GpuIdManager::TfToPlatformGpuId(tf_gpu_id, &platform_gpu_id);
162 if (!s.ok()) {
163 LOG(ERROR) << s;
164 return unknown;
165 }
166 return GetLocalGPUInfo(platform_gpu_id);
167 } else {
168 return GetLocalGPUInfo(PlatformGpuId(0));
169 }
170 }
171 return unknown;
172 }
173
174 } // end namespace grappler
175 } // end namespace tensorflow
176