1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_STREAM_EXECUTOR_TF_ALLOCATOR_ADAPTER_H_ 17 #define TENSORFLOW_STREAM_EXECUTOR_TF_ALLOCATOR_ADAPTER_H_ 18 19 #include "tensorflow/core/framework/allocator.h" 20 #include "tensorflow/stream_executor/device_memory.h" 21 #include "tensorflow/stream_executor/device_memory_allocator.h" 22 #include "tensorflow/stream_executor/lib/statusor.h" 23 #include "tensorflow/stream_executor/platform.h" 24 25 namespace stream_executor { 26 27 // Adapter class that wraps a Tensorflow allocator. 28 // 29 // Assumes that the Tensorflow allocator permits asynchronous deallocation: 30 // see comment on `AllowsAsynchronousDeallocation()`. 31 class TfAllocatorAdapter : public DeviceMemoryAllocator { 32 public: 33 // stream: a Stream on which the allocator can only be used. If non-null, the 34 // allocator can not be used on any other stream. 35 TfAllocatorAdapter(tensorflow::Allocator *wrapped, Stream *stream); 36 37 // Constructor for the cases where `stream` can not be provided. 38 TfAllocatorAdapter(tensorflow::Allocator *wrapped, Platform *platform); 39 40 ~TfAllocatorAdapter() override; 41 42 port::StatusOr<OwningDeviceMemory> Allocate(int device_ordinal, uint64 size, 43 bool retry_on_failure, 44 int64 memory_space) override; 45 46 port::Status Deallocate(int device_ordinal, DeviceMemoryBase mem) override; 47 48 // The Tensorflow BFC allocator used on GPU allows host-side deallocation 49 // before GPU execution takes place. Tensorflow uses the ordering of the main 50 // compute stream to enforce a happens-before relationship between a memory 51 // allocation and code that reuses the same memory. If Tensorflow adds 52 // support for multiple GPU streams or allocators with different ordering 53 // requirements, this code may need to change. 54 // (This attribute has no effect on CPU.) AllowsAsynchronousDeallocation()55 bool AllowsAsynchronousDeallocation() const override { return true; } 56 57 port::StatusOr<Stream *> GetStream(int device_ordinal) override; 58 59 private: 60 tensorflow::Allocator *wrapped_; 61 Stream *stream_; 62 }; 63 64 // Adapter class that wraps per-device TF allocators with corresponding streams 65 // as a TfAllocatorAdapter. Assumes that the Tensorflow allocator permits 66 // asynchronous deallocation; see comment on `AllowsAsynchronousDeallocation()`. 67 class MultiDeviceAdapter : public DeviceMemoryAllocator { 68 public: 69 using AllocatorWithStream = 70 std::pair<std::unique_ptr<tensorflow::Allocator>, Stream *>; MultiDeviceAdapter(const Platform * platform,std::vector<AllocatorWithStream> tf_allocators)71 MultiDeviceAdapter(const Platform *platform, 72 std::vector<AllocatorWithStream> tf_allocators) 73 : DeviceMemoryAllocator(platform) { 74 tf_allocators_.reserve(tf_allocators.size()); 75 for (AllocatorWithStream &p : tf_allocators) { 76 per_device_allocators_.emplace_back(p.first.get(), p.second); 77 tf_allocators_.push_back(std::move(p.first)); 78 } 79 } 80 Allocate(int device_ordinal,uint64 size,bool retry_on_failure,int64 memory_space)81 port::StatusOr<OwningDeviceMemory> Allocate(int device_ordinal, uint64 size, 82 bool retry_on_failure, 83 int64 memory_space) override { 84 CHECK_LT(device_ordinal, per_device_allocators_.size()); 85 return per_device_allocators_[device_ordinal].Allocate( 86 device_ordinal, size, retry_on_failure, memory_space); 87 } 88 Deallocate(int device_ordinal,DeviceMemoryBase mem)89 port::Status Deallocate(int device_ordinal, DeviceMemoryBase mem) override { 90 CHECK_LT(device_ordinal, per_device_allocators_.size()); 91 return per_device_allocators_[device_ordinal].Deallocate(device_ordinal, 92 mem); 93 } 94 95 // The Tensorflow BFC allocator used on GPU allows host-side deallocation 96 // before GPU execution takes place. Tensorflow uses the ordering of the main 97 // compute stream to enforce a happens-before relationship between a memory 98 // allocation and code that reuses the same memory. If Tensorflow adds 99 // support for multiple GPU streams or allocators with different ordering 100 // requirements, this code may need to change. 101 // (This attribute has no effect on CPU.) AllowsAsynchronousDeallocation()102 bool AllowsAsynchronousDeallocation() const override { return true; } 103 GetStream(int device_ordinal)104 port::StatusOr<Stream *> GetStream(int device_ordinal) override { 105 return per_device_allocators_[device_ordinal].GetStream(device_ordinal); 106 } 107 108 private: 109 std::vector<TfAllocatorAdapter> per_device_allocators_; 110 // The wrapped TF allocators backing per_device_allocators_ 111 // (TfAllocatorAdapter does not take ownership of its underlying Allocator). 112 std::vector<std::unique_ptr<tensorflow::Allocator>> tf_allocators_; 113 }; 114 115 } // namespace stream_executor 116 117 #endif // TENSORFLOW_STREAM_EXECUTOR_TF_ALLOCATOR_ADAPTER_H_ 118