1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ 17 #define TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ 18 19 // Simple LRU pool allocators for various flavors of CPU RAM. 20 21 #include <atomic> 22 #include <map> 23 #include <memory> 24 #include <vector> 25 26 #include "tensorflow/core/framework/allocator.h" 27 #include "tensorflow/core/lib/core/bits.h" 28 #include "tensorflow/core/platform/logging.h" 29 #include "tensorflow/core/platform/macros.h" 30 #include "tensorflow/core/platform/mutex.h" 31 #include "tensorflow/core/platform/types.h" 32 33 namespace tensorflow { 34 35 // Interface of an object that rounds up integers. 36 class RoundUpInterface { 37 public: ~RoundUpInterface()38 virtual ~RoundUpInterface() {} 39 virtual size_t RoundUp(size_t num_bytes) = 0; 40 }; 41 42 // Size-limited pool of memory buffers obtained from a SubAllocator 43 // instance. Pool eviction policy is LRU. 44 class PoolAllocator : public Allocator { 45 public: 46 // "pool_size_limit" is the maximum number of returned, re-usable 47 // memory buffers to keep in the pool. If pool_size_limit == 0, the 48 // pool is effectively a thin wrapper around the allocator. 49 // If "auto_resize" is true, then the pool_size_limit will gradually 50 // be raised so that deallocations happen very rarely, if at all. 51 // Transitory start-up objects may deallocate, but the long-term 52 // working-set should not. Auto-resizing can raise pool_size_limit 53 // but will never lower it. 54 // "allocator" is the object that performs the underlying memory 55 // malloc/free operations. This object takes ownership of allocator. 56 PoolAllocator(size_t pool_size_limit, bool auto_resize, 57 SubAllocator* allocator, RoundUpInterface* size_rounder, 58 string name); 59 ~PoolAllocator() override; 60 Name()61 string Name() override { return name_; } 62 63 void* AllocateRaw(size_t alignment, size_t num_bytes) override; 64 65 void DeallocateRaw(void* ptr) override; 66 67 // Allocate an unused memory region of size "num_bytes". Fetch from 68 // the pool if available, otherwise call allocator_. 69 void* Get(size_t num_bytes); 70 71 // Return a no-longer needed memory region to the pool. It is an error 72 // to deference "ptr" after this call. If the pool is full, the least 73 // recently used region will be deallocated. 74 void Put(void* ptr, size_t num_bytes); 75 76 // Reset the pool to empty. 77 void Clear(); 78 79 // The following accessors permit monitoring the effectiveness of 80 // the pool at avoiding repeated malloc/frees on the underlying 81 // allocator. Read locks are not taken on the theory that value 82 // consistency with other threads is not important. 83 84 // Number of Get() requests satisfied from pool. get_from_pool_count()85 int64 get_from_pool_count() const TF_NO_THREAD_SAFETY_ANALYSIS { 86 return get_from_pool_count_; 87 } 88 // Number of Put() requests. put_count()89 int64 put_count() const TF_NO_THREAD_SAFETY_ANALYSIS { return put_count_; } 90 // Number of Get() requests requiring a fresh allocation. allocated_count()91 int64 allocated_count() const TF_NO_THREAD_SAFETY_ANALYSIS { 92 return allocated_count_; 93 } 94 // Number of pool evictions. evicted_count()95 int64 evicted_count() const TF_NO_THREAD_SAFETY_ANALYSIS { 96 return evicted_count_; 97 } 98 // Current size limit. size_limit()99 size_t size_limit() const TF_NO_THREAD_SAFETY_ANALYSIS { 100 return pool_size_limit_; 101 } 102 103 private: 104 struct PtrRecord { 105 void* ptr; 106 size_t num_bytes; 107 PtrRecord* prev; 108 PtrRecord* next; 109 }; 110 111 // Remove "pr" from the double-linked LRU list. 112 void RemoveFromList(PtrRecord* pr) TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 113 114 // Add "pr" to the head of the double-linked LRU list. 115 void AddToList(PtrRecord* pr) TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 116 117 // Delete the least recently used record. 118 void EvictOne() TF_EXCLUSIVE_LOCKS_REQUIRED(mutex_); 119 120 const string name_; 121 const bool has_size_limit_; 122 const bool auto_resize_; 123 size_t pool_size_limit_; 124 std::unique_ptr<SubAllocator> allocator_; 125 std::unique_ptr<RoundUpInterface> size_rounder_; 126 mutex mutex_; 127 std::multimap<const size_t, PtrRecord*> pool_ TF_GUARDED_BY(mutex_); 128 PtrRecord* lru_head_ TF_GUARDED_BY(mutex_) = nullptr; 129 PtrRecord* lru_tail_ TF_GUARDED_BY(mutex_) = nullptr; 130 int64 get_from_pool_count_ TF_GUARDED_BY(mutex_) = 0; 131 int64 put_count_ TF_GUARDED_BY(mutex_) = 0; 132 int64 allocated_count_ TF_GUARDED_BY(mutex_) = 0; 133 int64 evicted_count_ TF_GUARDED_BY(mutex_) = 0; 134 }; 135 136 // Do-nothing rounder. Passes through sizes unchanged. 137 class NoopRounder : public RoundUpInterface { 138 public: RoundUp(size_t num_bytes)139 size_t RoundUp(size_t num_bytes) override { return num_bytes; } 140 }; 141 142 // Power of 2 rounder: rounds up to nearest power of 2 size. 143 class Pow2Rounder : public RoundUpInterface { 144 public: RoundUp(size_t num_bytes)145 size_t RoundUp(size_t num_bytes) override { 146 return 1uLL << Log2Ceiling64(num_bytes); 147 } 148 }; 149 150 class BasicCPUAllocator : public SubAllocator { 151 public: BasicCPUAllocator(int numa_node,const std::vector<Visitor> & alloc_visitors,const std::vector<Visitor> & free_visitors)152 BasicCPUAllocator(int numa_node, const std::vector<Visitor>& alloc_visitors, 153 const std::vector<Visitor>& free_visitors) 154 : SubAllocator(alloc_visitors, free_visitors), numa_node_(numa_node) {} 155 ~BasicCPUAllocator()156 ~BasicCPUAllocator() override {} 157 158 void* Alloc(size_t alignment, size_t num_bytes, 159 size_t* bytes_received) override; 160 161 void Free(void* ptr, size_t num_bytes) override; 162 SupportsCoalescing()163 bool SupportsCoalescing() const override { return false; } 164 165 private: 166 int numa_node_; 167 168 TF_DISALLOW_COPY_AND_ASSIGN(BasicCPUAllocator); 169 }; 170 171 } // namespace tensorflow 172 #endif // TENSORFLOW_CORE_COMMON_RUNTIME_POOL_ALLOCATOR_H_ 173