• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 // Classes for managing access to XLA resources.
17 
18 #include "tensorflow/compiler/xrt/xrt_device.h"
19 
20 #include <map>
21 
22 #include "absl/container/node_hash_map.h"
23 #include "tensorflow/compiler/jit/xla_device.h"
24 #include "tensorflow/core/common_runtime/gpu/gpu_process_state.h"
25 #include "tensorflow/core/framework/op_kernel.h"
26 #include "tensorflow/core/framework/resource_mgr.h"
27 #include "tensorflow/core/lib/core/status.h"
28 
29 namespace tensorflow {
30 namespace {
31 
32 class ResourceMgrArena {
33  public:
Get()34   static ResourceMgrArena* Get() {
35     static ResourceMgrArena* arena = new ResourceMgrArena();
36     return arena;
37   }
38 
GetResourceMgr(const std::string & platform_name)39   ResourceMgr* GetResourceMgr(const std::string& platform_name) {
40     mutex_lock lock(mutex_);
41     auto it = resource_managers_.find(platform_name);
42     if (it == resource_managers_.end()) {
43       it = resource_managers_.emplace(platform_name, new ResourceMgr()).first;
44     }
45     return it->second;
46   }
47 
48  private:
49   mutex mutex_;
50   std::map<std::string, ResourceMgr*> resource_managers_;
51 };
52 
53 }  // namespace
54 
GetResourceManager(OpKernelContext * ctx,ResourceMgr ** rm)55 /*static*/ Status XRTGenericDeviceAccessor::GetResourceManager(
56     OpKernelContext* ctx, ResourceMgr** rm) {
57   const XlaDevice::Metadata* metadata;
58   TF_RETURN_IF_ERROR(XlaDevice::GetMetadata(ctx, &metadata));
59   *rm = ResourceMgrArena::Get()->GetResourceMgr(metadata->platform()->Name());
60   return Status::OK();
61 }
62 
63 /* static */ xla::StatusOr<RefPtr<XRTCompilationCache>>
GetOrCreateCompilationCache(OpKernelContext * ctx,int64_t max_number_of_entries)64 XRTGenericDeviceAccessor::GetOrCreateCompilationCache(
65     OpKernelContext* ctx, int64_t max_number_of_entries) {
66   ResourceMgr* rm;
67   TF_RETURN_IF_ERROR(GetResourceManager(ctx, &rm));
68   return tensorflow::GetOrCreateCompilationCache(rm, max_number_of_entries);
69 }
70 
InitScopedRef(OpKernelContext * ctx,int device_ordinal,ScopedRef * scoped_ref)71 /*static*/ Status XRTGenericDeviceAccessor::InitScopedRef(
72     OpKernelContext* ctx, int device_ordinal, ScopedRef* scoped_ref) {
73   const XlaDevice::Metadata* metadata;
74   TF_RETURN_IF_ERROR(XlaDevice::GetMetadata(ctx, &metadata));
75   if (device_ordinal != metadata->device_ordinal()) {
76     return errors::Internal("XRT device ordinal requested ", device_ordinal,
77                             " on device with ordinal ",
78                             metadata->device_ordinal());
79   }
80   scoped_ref->Acquire(metadata->client(), device_ordinal,
81                       metadata->platform()->Name(), ctx);
82   return Status::OK();
83 }
84 
InitScopedRef(OpKernelContext * ctx,ScopedRef * scoped_ref)85 /*static*/ Status XRTGenericDeviceAccessor::InitScopedRef(
86     OpKernelContext* ctx, ScopedRef* scoped_ref) {
87   const XlaDevice::Metadata* metadata;
88   TF_RETURN_IF_ERROR(XlaDevice::GetMetadata(ctx, &metadata));
89   scoped_ref->Acquire(metadata->client(), metadata->device_ordinal(),
90                       metadata->platform()->Name(), ctx);
91   return Status::OK();
92 }
93 
94 /* static */ tensorflow::mutex
95     XRTGenericDeviceAccessor::ScopedRef::cuda_allocator_mutex_(
96         tensorflow::LINKER_INITIALIZED);
97 /* static */ absl::flat_hash_map<stream_executor::Stream*,
98                                  std::unique_ptr<se::TfAllocatorAdapter>>*
99     XRTGenericDeviceAccessor::ScopedRef::cuda_allocators_ =
100         new absl::flat_hash_map<stream_executor::Stream*,
101                                 std::unique_ptr<se::TfAllocatorAdapter>>;
102 
Acquire(xla::LocalClient * client,int ordinal,const std::string & platform_name,OpKernelContext * ctx)103 void XRTGenericDeviceAccessor::ScopedRef::Acquire(
104     xla::LocalClient* client, int ordinal, const std::string& platform_name,
105     OpKernelContext* ctx) {
106   client_ = client;
107   ordinal_ = ordinal;
108   allocator_ = client_->mutable_backend()->memory_allocator();
109 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
110     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
111   if (platform_name == "CUDA") {
112     // Use BfcAllocator for the CUDA.
113     auto stream = ctx->op_device_context()->stream();
114     if (!cuda_allocators_->count(stream)) {
115       mutex_lock lock(cuda_allocator_mutex_);
116       if (!cuda_allocators_->count(stream)) {
117         GPUOptions gpu_options;
118         Allocator* raw_allocator =
119             GPUProcessState::singleton()->GetGPUAllocator(TfDeviceId(ordinal_));
120         (*cuda_allocators_)[stream] =
121             std::make_unique<se::TfAllocatorAdapter>(raw_allocator, stream);
122       }
123     }
124     allocator_ = static_cast<se::DeviceMemoryAllocator*>(
125         (*cuda_allocators_)[stream].get());
126   }
127 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
128 }
129 }  // namespace tensorflow
130