1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/compiler/xla/service/platform_util.h"
17
18 #include <algorithm>
19 #include <string>
20 #include <utility>
21
22 #include "absl/strings/ascii.h"
23 #include "absl/strings/str_join.h"
24 #include "tensorflow/compiler/xla/debug_options_flags.h"
25 #include "tensorflow/compiler/xla/service/compiler.h"
26 #include "tensorflow/compiler/xla/status_macros.h"
27 #include "tensorflow/compiler/xla/statusor.h"
28 #include "tensorflow/compiler/xla/types.h"
29 #include "tensorflow/compiler/xla/util.h"
30 #include "tensorflow/core/lib/core/threadpool.h"
31 #include "tensorflow/core/platform/logging.h"
32 #include "tensorflow/core/platform/stream_executor_no_cuda.h"
33
34 namespace xla {
35
36 // Minimum supported CUDA compute capability is 3.5.
37 constexpr int kMinCudaComputeCapabilityMajor = 3;
38 constexpr int kMinCudaComputeCapabilityMinor = 5;
39
40 // Minimum supported AMDGPU ISA version is 803.
41 constexpr int kMinAMDGPUISAVersion = 803;
42
43 // The name of the interpreter platform.
44 constexpr char kInterpreter[] = "interpreter";
45
46 namespace {
47
CanonicalPlatformName(const string & platform_name)48 string CanonicalPlatformName(const string& platform_name) {
49 string lowercase_platform_name = absl::AsciiStrToLower(platform_name);
50 // "cpu" and "host" mean the same thing.
51 if (lowercase_platform_name == "cpu") {
52 return "host";
53 }
54 // When configured on CUDA, "gpu" and "cuda" mean the same thing.
55 // When configured on ROCm, "gpu" and "rocm" mean the same thing.
56 if (lowercase_platform_name == "gpu") {
57 #if TENSORFLOW_USE_ROCM
58 return "rocm";
59 #else
60 return "cuda";
61 #endif
62 }
63 return lowercase_platform_name;
64 }
65
GetSupportedPlatforms()66 StatusOr<std::vector<se::Platform*>> GetSupportedPlatforms() {
67 return se::MultiPlatformManager::PlatformsWithFilter(
68 [](const se::Platform* platform) {
69 auto compiler_status = Compiler::GetForPlatform(platform);
70 bool supported = compiler_status.ok();
71 if (!supported) {
72 LOG(INFO) << "platform " << platform->Name() << " present but no "
73 << "XLA compiler available: "
74 << compiler_status.status().error_message();
75 }
76 return supported;
77 });
78 }
79
80 } // namespace
81
82 /* static */ StatusOr<std::vector<se::Platform*>>
GetSupportedPlatforms()83 PlatformUtil::GetSupportedPlatforms() {
84 // Gather all platforms which have an XLA compiler.
85 return xla::GetSupportedPlatforms();
86 }
87
GetDefaultPlatform()88 /* static */ StatusOr<se::Platform*> PlatformUtil::GetDefaultPlatform() {
89 TF_ASSIGN_OR_RETURN(auto platforms, GetSupportedPlatforms());
90
91 se::Platform* platform = nullptr;
92 if (platforms.empty()) {
93 return NotFound("no platforms found");
94 } else if (platforms.size() == 1) {
95 platform = platforms[0];
96 } else if (platforms.size() == 2) {
97 for (int i = 0; i < 2; i++) {
98 if (absl::AsciiStrToLower(platforms[i]->Name()) == kInterpreter &&
99 absl::AsciiStrToLower(platforms[1 - i]->Name()) != kInterpreter) {
100 platform = platforms[1 - i];
101 break;
102 }
103 }
104 }
105 if (platform != nullptr) {
106 return platform;
107 }
108
109 // Multiple platforms present and we can't pick a reasonable default.
110 string platforms_string = absl::StrJoin(
111 platforms, ", ",
112 [](string* out, const se::Platform* p) { out->append(p->Name()); });
113 return InvalidArgument(
114 "must specify platform because more than one platform (except for the "
115 "interpreter platform) found: %s",
116 platforms_string);
117 }
118
GetPlatform(const string & platform_name)119 /*static*/ StatusOr<se::Platform*> PlatformUtil::GetPlatform(
120 const string& platform_name) {
121 TF_ASSIGN_OR_RETURN(se::Platform * platform,
122 se::MultiPlatformManager::PlatformWithName(
123 CanonicalPlatformName(platform_name)));
124 TF_RETURN_IF_ERROR(Compiler::GetForPlatform(platform).status());
125 return platform;
126 }
127
128 // Returns whether the device underlying the given StreamExecutor is supported
129 // by XLA.
IsDeviceSupported(se::StreamExecutor * executor)130 static bool IsDeviceSupported(se::StreamExecutor* executor) {
131 const auto& description = executor->GetDeviceDescription();
132 if (executor->platform()->id() == se::cuda::kCudaPlatformId) {
133 // CUDA devices must have a minimum compute capability.
134 int major_version, minor_version;
135 if (description.cuda_compute_capability(&major_version, &minor_version)) {
136 if (major_version < kMinCudaComputeCapabilityMajor ||
137 (major_version == kMinCudaComputeCapabilityMajor &&
138 minor_version < kMinCudaComputeCapabilityMinor)) {
139 LOG(INFO) << "StreamExecutor cuda device ("
140 << executor->device_ordinal() << ") is of "
141 << "insufficient compute capability: "
142 << kMinCudaComputeCapabilityMajor << "."
143 << kMinCudaComputeCapabilityMinor << " required, "
144 << "device is " << major_version << "." << minor_version;
145 return false;
146 }
147 }
148 } else if (executor->platform()->id() == se::rocm::kROCmPlatformId) {
149 int isa_version = 0;
150 if (description.rocm_amdgpu_isa_version(&isa_version)) {
151 if (isa_version < kMinAMDGPUISAVersion) {
152 LOG(INFO) << "StreamExecutor ROCM device ("
153 << executor->device_ordinal() << ") is of "
154 << "obsolete AMDGPU ISA version: "
155 << "gfx" << kMinAMDGPUISAVersion << " required, "
156 << "device is gfx" << isa_version;
157 return false;
158 }
159 }
160 }
161 return true;
162 }
163
164 /* static */ StatusOr<std::vector<se::StreamExecutor*>>
GetStreamExecutors(se::Platform * platform,const absl::optional<std::set<int>> & allowed_devices)165 PlatformUtil::GetStreamExecutors(
166 se::Platform* platform,
167 const absl::optional<std::set<int>>& allowed_devices) {
168 int device_count = platform->VisibleDeviceCount();
169 if (device_count <= 0) {
170 return NotFound("no %s devices found", platform->Name());
171 }
172 if (platform->id() == se::host::kHostPlatformId) {
173 // On host "devices", StreamExecutor exports a device for each hardware
174 // thread. Because we parallelize a single computation across threads, it
175 // doesn't make sense to expose these as separate devices, so by default we
176 // fix the number of devices to one. However we do let the user override
177 // this behavior to help run tests on the host that run models in parallel
178 // across multiple devices.
179 device_count =
180 GetDebugOptionsFromFlags().xla_force_host_platform_device_count();
181 }
182 std::vector<se::StreamExecutor*> stream_executors(device_count, nullptr);
183 VLOG(1) << "Initializing devices";
184 {
185 tensorflow::thread::ThreadPool thread_pool(
186 tensorflow::Env::Default(), "device_initialization", device_count);
187 for (int i = 0; i < device_count; ++i) {
188 // Once a stream executor is instantiated it will cause allocations on
189 // the device, for example for GPUs cuda context, cudnn handles etc. will
190 // be constructed. By constructing stream executors only on the
191 // allowed_devices, we don't make any allocations on other devices.
192 // This helps in multi-process executions on the same host like horovod or
193 // shared hosts.
194 if (allowed_devices && allowed_devices->count(i) == 0) {
195 VLOG(1) << "Not initializing StreamExecutor for device " << i
196 << " since it is not in the visible device list";
197 continue;
198 }
199 thread_pool.Schedule([platform, i, &stream_executors]() {
200 VLOG(1) << "Started device init " << i;
201 se::StreamExecutorConfig config;
202 config.ordinal = i;
203 auto executor_status = platform->GetExecutor(config);
204 if (executor_status.ok()) {
205 se::StreamExecutor* executor = executor_status.ValueOrDie();
206 if (IsDeviceSupported(executor)) {
207 stream_executors[i] = executor;
208 }
209 } else {
210 LOG(WARNING) << "unable to create StreamExecutor for "
211 << platform->Name() << ":" << i << ": "
212 << executor_status.status().error_message();
213 }
214 VLOG(1) << "Finished device init " << i;
215 });
216 }
217 // Block here in thread_pool destructor until all devices are initialized.
218 }
219 VLOG(1) << "Device initialization complete";
220
221 std::vector<se::StreamExecutor*> out;
222 for (se::StreamExecutor* executor : stream_executors) {
223 if (executor != nullptr) {
224 out.push_back(executor);
225 }
226 }
227 if (out.empty()) {
228 return InternalError("no supported devices found for platform %s",
229 platform->Name());
230 }
231 return out;
232 }
233
234 } // namespace xla
235