1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/stream_executor/rocm/rocm_platform.h"
17
18 #include "absl/base/call_once.h"
19 #include "absl/memory/memory.h"
20 #include "absl/strings/str_format.h"
21 #include "tensorflow/stream_executor/gpu/gpu_driver.h"
22 #include "tensorflow/stream_executor/gpu/gpu_executor.h"
23 #include "tensorflow/stream_executor/lib/error.h"
24 #include "tensorflow/stream_executor/lib/initialize.h"
25 #include "tensorflow/stream_executor/lib/status.h"
26 #include "tensorflow/stream_executor/rocm/rocm_platform_id.h"
27
28 namespace stream_executor {
29 namespace gpu {
30
ROCmPlatform()31 ROCmPlatform::ROCmPlatform()
32 : name_("ROCM"), min_numa_node_(0), limit_numa_node_(0) {}
33
~ROCmPlatform()34 ROCmPlatform::~ROCmPlatform() {}
35
36 // Due to legacy issues in user code, we can't currently call InpectNumaNodes
37 // at module initialization time, because non-GPU programs still include this
38 // plugin via various methods, so instead, it has to be init-on-reference.
InspectNumaNodes()39 void ROCmPlatform::InspectNumaNodes() {
40 // To get NUMA node information, we need to create all executors, so we can
41 // examine their device descriptions to see their bus assignments.
42 absl::once_flag once;
43 absl::call_once(once, [&] {
44 StreamExecutorConfig config;
45 for (int i = 0; i < VisibleDeviceCount(); i++) {
46 config.ordinal = i;
47 StreamExecutor* exec = GetExecutor(config).ValueOrDie();
48 if (i == 0) {
49 // NUMA nodes may not start at 0, so set the minimum node based on the
50 // first executor we see.
51 min_numa_node_ = exec->GetDeviceDescription().numa_node();
52 limit_numa_node_ = min_numa_node_ + 1;
53 } else {
54 min_numa_node_ =
55 std::min(min_numa_node_, exec->GetDeviceDescription().numa_node());
56 limit_numa_node_ = std::max(
57 limit_numa_node_, exec->GetDeviceDescription().numa_node() + 1);
58 }
59 }
60 });
61 }
62
BusCount()63 int ROCmPlatform::BusCount() {
64 InspectNumaNodes();
65 return limit_numa_node_ - min_numa_node_;
66 }
67
DeviceToBus(int device_ordinal)68 int ROCmPlatform::DeviceToBus(int device_ordinal) {
69 StreamExecutorConfig config;
70 config.ordinal = device_ordinal;
71 StreamExecutor* exec = GetExecutor(config).ValueOrDie();
72 return exec->GetDeviceDescription().numa_node() - min_numa_node_;
73 }
74
FirstExecutorForBus(int bus_ordinal)75 port::StatusOr<StreamExecutor*> ROCmPlatform::FirstExecutorForBus(
76 int bus_ordinal) {
77 InspectNumaNodes();
78 CHECK_LT(bus_ordinal, BusCount()) << "bus ordinal out of available range";
79 for (int i = 0; i < VisibleDeviceCount(); i++) {
80 if (DeviceToBus(i) == bus_ordinal) {
81 StreamExecutorConfig config;
82 config.ordinal = i;
83 return GetExecutor(config).ValueOrDie();
84 }
85 }
86
87 return port::Status{
88 port::error::NOT_FOUND,
89 absl::StrFormat("Executor for bus %d not found.", bus_ordinal)};
90 }
91
id() const92 Platform::Id ROCmPlatform::id() const { return rocm::kROCmPlatformId; }
93
VisibleDeviceCount() const94 int ROCmPlatform::VisibleDeviceCount() const {
95 // Throw away the result - it logs internally, and this [containing] function
96 // isn't in the path of user control. It's safe to call this > 1x.
97
98 if (!gpu::GpuDriver::Init().ok()) {
99 return -1;
100 }
101
102 return GpuDriver::GetDeviceCount();
103 }
104
Name() const105 const string& ROCmPlatform::Name() const { return name_; }
106
107 port::StatusOr<std::unique_ptr<DeviceDescription>>
DescriptionForDevice(int ordinal) const108 ROCmPlatform::DescriptionForDevice(int ordinal) const {
109 return GpuExecutor::CreateDeviceDescription(ordinal);
110 }
111
ExecutorForDevice(int ordinal)112 port::StatusOr<StreamExecutor*> ROCmPlatform::ExecutorForDevice(int ordinal) {
113 StreamExecutorConfig config;
114 config.ordinal = ordinal;
115 config.plugin_config = PluginConfig();
116 config.device_options = DeviceOptions::Default();
117 return GetExecutor(config);
118 }
119
ExecutorForDeviceWithPluginConfig(int device_ordinal,const PluginConfig & plugin_config)120 port::StatusOr<StreamExecutor*> ROCmPlatform::ExecutorForDeviceWithPluginConfig(
121 int device_ordinal, const PluginConfig& plugin_config) {
122 StreamExecutorConfig config;
123 config.ordinal = device_ordinal;
124 config.plugin_config = plugin_config;
125 config.device_options = DeviceOptions::Default();
126 return GetExecutor(config);
127 }
128
GetExecutor(const StreamExecutorConfig & config)129 port::StatusOr<StreamExecutor*> ROCmPlatform::GetExecutor(
130 const StreamExecutorConfig& config) {
131 return executor_cache_.GetOrCreate(
132 config, [&]() { return GetUncachedExecutor(config); });
133 }
134
135 port::StatusOr<std::unique_ptr<StreamExecutor>>
GetUncachedExecutor(const StreamExecutorConfig & config)136 ROCmPlatform::GetUncachedExecutor(const StreamExecutorConfig& config) {
137 auto executor = absl::make_unique<StreamExecutor>(
138 this, absl::make_unique<GpuExecutor>(config.plugin_config),
139 config.ordinal);
140 auto init_status = executor->Init(config.device_options);
141 if (!init_status.ok()) {
142 return port::Status{
143 port::error::INTERNAL,
144 absl::StrFormat(
145 "failed initializing StreamExecutor for ROCM device ordinal %d: %s",
146 config.ordinal, init_status.ToString().c_str())};
147 }
148
149 return std::move(executor);
150 }
151
RegisterTraceListener(std::unique_ptr<TraceListener> listener)152 void ROCmPlatform::RegisterTraceListener(
153 std::unique_ptr<TraceListener> listener) {
154 LOG(FATAL) << "not yet implemented: register ROCM trace listener";
155 }
156
UnregisterTraceListener(TraceListener * listener)157 void ROCmPlatform::UnregisterTraceListener(TraceListener* listener) {
158 LOG(FATAL) << "not yet implemented: unregister ROCM trace listener";
159 }
160
161 } // namespace gpu
162
InitializeROCmPlatform()163 static void InitializeROCmPlatform() {
164 // Disabling leak checking, MultiPlatformManager does not destroy its
165 // registered platforms.
166 auto status = MultiPlatformManager::PlatformWithName("ROCM");
167 if (!status.ok()) {
168 std::unique_ptr<gpu::ROCmPlatform> platform(new gpu::ROCmPlatform);
169 SE_CHECK_OK(MultiPlatformManager::RegisterPlatform(std::move(platform)));
170 }
171 }
172
173 } // namespace stream_executor
174
175 REGISTER_MODULE_INITIALIZER(rocm_platform,
176 stream_executor::InitializeROCmPlatform());
177
178 DECLARE_MODULE_INITIALIZER(multi_platform_manager);
179 // Note that module initialization sequencing is not supported in the
180 // open-source project, so this will be a no-op there.
181 REGISTER_MODULE_INITIALIZER_SEQUENCE(rocm_platform, multi_platform_manager);
182