1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_PROCESS_STATE_H_ 17 #define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_PROCESS_STATE_H_ 18 19 #include <functional> 20 #include <map> 21 #include <unordered_map> 22 #include <vector> 23 24 #include "tensorflow/core/common_runtime/gpu/gpu_id.h" 25 #include "tensorflow/core/common_runtime/process_state.h" 26 #include "tensorflow/core/common_runtime/shared_counter.h" 27 #include "tensorflow/core/framework/allocator.h" 28 #include "tensorflow/core/platform/mutex.h" 29 #include "tensorflow/core/platform/thread_annotations.h" 30 #include "tensorflow/core/platform/types.h" 31 #include "tensorflow/core/protobuf/config.pb.h" 32 33 namespace tensorflow { 34 35 class Allocator; 36 class GPUBFCAllocator; 37 class PoolAllocator; 38 class SharedCounter; 39 40 // Singleton that manages per-process state when GPUs are present. 41 class GPUProcessState { 42 public: 43 // If ps == nullptr, returns pointer to the single instance of this class to 44 // be used within this process. 45 // 46 // If ps != nullptrs, accepts a value to be returned by all subsequent calls. 47 // A non-null ps may ONLY be provided during program static storage 48 // initialization. Must not be called more than once with a non-null ps. 49 // 50 // If a derived class of GPUProcessState is ever used in a process, it must 51 // always be used in place of this class. In order to ensure that existing 52 // calls to GPUProcessState::singleton() all resolve to the derived instance 53 // instead, this function must be called once during startup, supplying the 54 // derived instance value, prior to any accessor call to this function. 55 static GPUProcessState* singleton(GPUProcessState* ps = nullptr); 56 57 // Query whether any GPU device has been created so far. 58 // Disable thread safety analysis since a race is benign here. HasGPUDevice()59 bool HasGPUDevice() const TF_NO_THREAD_SAFETY_ANALYSIS { 60 return gpu_device_enabled_; 61 } 62 63 // Set the flag to indicate a GPU device has been created. 64 // Disable thread safety analysis since a race is benign here. EnableGPUDevice()65 void EnableGPUDevice() TF_NO_THREAD_SAFETY_ANALYSIS { 66 gpu_device_enabled_ = true; 67 } 68 69 // Returns the one GPU allocator used for the indexed GPU. 70 // Note that this is a system GPU index, not (necessarily) a brain 71 // device index. 72 // 73 // 'total_bytes' is the total number of bytes that should be made 74 // available to the allocator. The first call to this function for 75 // a given tf_gpu_id creates the allocator, so only the total_bytes 76 // used on that first call is used. 77 // 78 // "Allocator type" describes the type of algorithm to use for the 79 // underlying allocator. REQUIRES: Must be a valid type (see 80 // config.proto for the list of supported strings.). 81 // 82 // REQUIRES: tf_gpu_id must be a valid id for a BaseGPUDevice available in the 83 // current system environment. Otherwise returns nullptr. 84 virtual Allocator* GetGPUAllocator(const GPUOptions& options, 85 TfGpuId tf_gpu_id, size_t total_bytes, 86 const std::vector<TfGpuId>& peer_gpu_ids); 87 NumGPUAllocators()88 int NumGPUAllocators() { 89 mutex_lock l(mu_); 90 return gpu_allocators_.size(); 91 } 92 93 virtual Allocator* GetGpuHostAllocator(int numa_node); 94 95 // Registers a Visitor to be invoked on new chunks of memory allocated by the 96 // SubAllocator of every GPU proximate to the specified bus. The AllocVisitor 97 // is provided with a memory pointer, a GPU id, and the size of the area it 98 // identifies. The pointer is not guaranteed to be valid after the call 99 // terminates. The intention is for this interface to be used for network 100 // device memory registration. "bus_id" is platform-specific. On many 101 // platforms it should be 0. On machines with multiple PCIe buses, it should 102 // be the index of one of the PCIe buses (maybe the NUMA node at which the 103 // PCIe is rooted). If the bus_id is invalid, results are undefined. 104 virtual void AddGPUAllocVisitor(int bus_id, 105 const SubAllocator::Visitor& visitor); 106 107 // Registers a Visitor to be invoked on new chunks of memory allocated by 108 // the SubAllocator of the GpuHostAllocator for the given numa_node. 109 virtual void AddGpuHostAllocVisitor(int numa_node, 110 const SubAllocator::Visitor& visitor); 111 112 // Registers a Visitor to be invoked on each chunk handed back for freeing to 113 // the SubAllocator of the GpuHostAllocator for the given numa_node. 114 virtual void AddGpuHostFreeVisitor(int numa_node, 115 const SubAllocator::Visitor& visitor); 116 117 // Returns bus_id for the given GPU id. 118 virtual int BusIdForGPU(TfGpuId tf_gpu_id); 119 120 SharedCounter* GPUAllocatorCounter(TfGpuId tf_gpu_id); 121 122 protected: 123 // GPUProcessState is a singleton that should not normally be deleted except 124 // at process shutdown. 125 GPUProcessState(); ~GPUProcessState()126 virtual ~GPUProcessState() {} 127 friend class GPUDeviceTest; 128 129 // Helper method for unit tests to reset the ProcessState singleton by 130 // cleaning up everything. Never use in production. 131 virtual void TestOnlyReset(); 132 mem_desc_map()133 ProcessState::MDMap* mem_desc_map() { 134 if (process_state_) return &process_state_->mem_desc_map_; 135 return nullptr; 136 } 137 138 static GPUProcessState* instance_; 139 ProcessState* process_state_; // Not owned. 140 bool gpu_device_enabled_; 141 142 mutex mu_; 143 144 struct AllocatorParts { 145 std::unique_ptr<Allocator> allocator; 146 std::unique_ptr<SharedCounter> counter; 147 GPUBFCAllocator* bfc_allocator; 148 SubAllocator* sub_allocator; // owned by allocator 149 std::unique_ptr<Allocator> recording_allocator; 150 }; 151 std::vector<AllocatorParts> gpu_allocators_ TF_GUARDED_BY(mu_); 152 std::vector<std::vector<SubAllocator::Visitor>> gpu_visitors_ 153 TF_GUARDED_BY(mu_); 154 155 std::vector<AllocatorParts> gpu_host_allocators_ TF_GUARDED_BY(mu_); 156 std::vector<std::vector<SubAllocator::Visitor>> gpu_host_alloc_visitors_ 157 TF_GUARDED_BY(mu_); 158 std::vector<std::vector<SubAllocator::Visitor>> gpu_host_free_visitors_ 159 TF_GUARDED_BY(mu_); 160 }; 161 162 } // namespace tensorflow 163 #endif // TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_PROCESS_STATE_H_ 164