• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/stream_executor/rocm/rocm_platform.h"
17 
18 #include "absl/base/call_once.h"
19 #include "absl/memory/memory.h"
20 #include "absl/strings/str_format.h"
21 #include "tensorflow/stream_executor/gpu/gpu_driver.h"
22 #include "tensorflow/stream_executor/gpu/gpu_executor.h"
23 #include "tensorflow/stream_executor/lib/error.h"
24 #include "tensorflow/stream_executor/lib/initialize.h"
25 #include "tensorflow/stream_executor/lib/status.h"
26 #include "tensorflow/stream_executor/rocm/rocm_platform_id.h"
27 
28 namespace stream_executor {
29 namespace gpu {
30 
ROCmPlatform()31 ROCmPlatform::ROCmPlatform()
32     : name_("ROCM"), min_numa_node_(0), limit_numa_node_(0) {}
33 
~ROCmPlatform()34 ROCmPlatform::~ROCmPlatform() {}
35 
36 // Due to legacy issues in user code, we can't currently call InpectNumaNodes
37 // at module initialization time, because non-GPU programs still include this
38 // plugin via various methods, so instead, it has to be init-on-reference.
InspectNumaNodes()39 void ROCmPlatform::InspectNumaNodes() {
40   // To get NUMA node information, we need to create all executors, so we can
41   // examine their device descriptions to see their bus assignments.
42   absl::once_flag once;
43   absl::call_once(once, [&] {
44     StreamExecutorConfig config;
45     for (int i = 0; i < VisibleDeviceCount(); i++) {
46       config.ordinal = i;
47       StreamExecutor* exec = GetExecutor(config).ValueOrDie();
48       if (i == 0) {
49         // NUMA nodes may not start at 0, so set the minimum node  based on the
50         // first executor we see.
51         min_numa_node_ = exec->GetDeviceDescription().numa_node();
52         limit_numa_node_ = min_numa_node_ + 1;
53       } else {
54         min_numa_node_ =
55             std::min(min_numa_node_, exec->GetDeviceDescription().numa_node());
56         limit_numa_node_ = std::max(
57             limit_numa_node_, exec->GetDeviceDescription().numa_node() + 1);
58       }
59     }
60   });
61 }
62 
BusCount()63 int ROCmPlatform::BusCount() {
64   InspectNumaNodes();
65   return limit_numa_node_ - min_numa_node_;
66 }
67 
DeviceToBus(int device_ordinal)68 int ROCmPlatform::DeviceToBus(int device_ordinal) {
69   StreamExecutorConfig config;
70   config.ordinal = device_ordinal;
71   StreamExecutor* exec = GetExecutor(config).ValueOrDie();
72   return exec->GetDeviceDescription().numa_node() - min_numa_node_;
73 }
74 
FirstExecutorForBus(int bus_ordinal)75 port::StatusOr<StreamExecutor*> ROCmPlatform::FirstExecutorForBus(
76     int bus_ordinal) {
77   InspectNumaNodes();
78   CHECK_LT(bus_ordinal, BusCount()) << "bus ordinal out of available range";
79   for (int i = 0; i < VisibleDeviceCount(); i++) {
80     if (DeviceToBus(i) == bus_ordinal) {
81       StreamExecutorConfig config;
82       config.ordinal = i;
83       return GetExecutor(config).ValueOrDie();
84     }
85   }
86 
87   return port::Status{
88       port::error::NOT_FOUND,
89       absl::StrFormat("Executor for bus %d not found.", bus_ordinal)};
90 }
91 
id() const92 Platform::Id ROCmPlatform::id() const { return rocm::kROCmPlatformId; }
93 
VisibleDeviceCount() const94 int ROCmPlatform::VisibleDeviceCount() const {
95   // Throw away the result - it logs internally, and this [containing] function
96   // isn't in the path of user control. It's safe to call this > 1x.
97 
98   if (!gpu::GpuDriver::Init().ok()) {
99     return -1;
100   }
101 
102   return GpuDriver::GetDeviceCount();
103 }
104 
Name() const105 const string& ROCmPlatform::Name() const { return name_; }
106 
107 port::StatusOr<std::unique_ptr<DeviceDescription>>
DescriptionForDevice(int ordinal) const108 ROCmPlatform::DescriptionForDevice(int ordinal) const {
109   return GpuExecutor::CreateDeviceDescription(ordinal);
110 }
111 
ExecutorForDevice(int ordinal)112 port::StatusOr<StreamExecutor*> ROCmPlatform::ExecutorForDevice(int ordinal) {
113   StreamExecutorConfig config;
114   config.ordinal = ordinal;
115   config.plugin_config = PluginConfig();
116   config.device_options = DeviceOptions::Default();
117   return GetExecutor(config);
118 }
119 
ExecutorForDeviceWithPluginConfig(int device_ordinal,const PluginConfig & plugin_config)120 port::StatusOr<StreamExecutor*> ROCmPlatform::ExecutorForDeviceWithPluginConfig(
121     int device_ordinal, const PluginConfig& plugin_config) {
122   StreamExecutorConfig config;
123   config.ordinal = device_ordinal;
124   config.plugin_config = plugin_config;
125   config.device_options = DeviceOptions::Default();
126   return GetExecutor(config);
127 }
128 
GetExecutor(const StreamExecutorConfig & config)129 port::StatusOr<StreamExecutor*> ROCmPlatform::GetExecutor(
130     const StreamExecutorConfig& config) {
131   return executor_cache_.GetOrCreate(
132       config, [&]() { return GetUncachedExecutor(config); });
133 }
134 
135 port::StatusOr<std::unique_ptr<StreamExecutor>>
GetUncachedExecutor(const StreamExecutorConfig & config)136 ROCmPlatform::GetUncachedExecutor(const StreamExecutorConfig& config) {
137   auto executor = absl::make_unique<StreamExecutor>(
138       this, absl::make_unique<GpuExecutor>(config.plugin_config),
139       config.ordinal);
140   auto init_status = executor->Init(config.device_options);
141   if (!init_status.ok()) {
142     return port::Status{
143         port::error::INTERNAL,
144         absl::StrFormat(
145             "failed initializing StreamExecutor for ROCM device ordinal %d: %s",
146             config.ordinal, init_status.ToString().c_str())};
147   }
148 
149   return std::move(executor);
150 }
151 
RegisterTraceListener(std::unique_ptr<TraceListener> listener)152 void ROCmPlatform::RegisterTraceListener(
153     std::unique_ptr<TraceListener> listener) {
154   LOG(FATAL) << "not yet implemented: register ROCM trace listener";
155 }
156 
UnregisterTraceListener(TraceListener * listener)157 void ROCmPlatform::UnregisterTraceListener(TraceListener* listener) {
158   LOG(FATAL) << "not yet implemented: unregister ROCM trace listener";
159 }
160 
161 }  // namespace gpu
162 
InitializeROCmPlatform()163 static void InitializeROCmPlatform() {
164   // Disabling leak checking, MultiPlatformManager does not destroy its
165   // registered platforms.
166   auto status = MultiPlatformManager::PlatformWithName("ROCM");
167   if (!status.ok()) {
168     std::unique_ptr<gpu::ROCmPlatform> platform(new gpu::ROCmPlatform);
169     SE_CHECK_OK(MultiPlatformManager::RegisterPlatform(std::move(platform)));
170   }
171 }
172 
173 }  // namespace stream_executor
174 
175 REGISTER_MODULE_INITIALIZER(rocm_platform,
176                             stream_executor::InitializeROCmPlatform());
177 
178 DECLARE_MODULE_INITIALIZER(multi_platform_manager);
179 // Note that module initialization sequencing is not supported in the
180 // open-source project, so this will be a no-op there.
181 REGISTER_MODULE_INITIALIZER_SEQUENCE(rocm_platform, multi_platform_manager);
182