1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_BFC_ALLOCATOR_H_ 17 #define TENSORFLOW_CORE_COMMON_RUNTIME_BFC_ALLOCATOR_H_ 18 19 #include <array> 20 #include <deque> 21 #include <memory> 22 #include <string> 23 #include <unordered_map> 24 #include <vector> 25 26 #include "absl/container/flat_hash_set.h" 27 #include "tensorflow/core/common_runtime/allocator_retry.h" 28 #include "tensorflow/core/common_runtime/shared_counter.h" 29 #include "tensorflow/core/framework/allocator.h" 30 #include "tensorflow/core/lib/strings/numbers.h" 31 #include "tensorflow/core/lib/strings/strcat.h" 32 #include "tensorflow/core/platform/macros.h" 33 #include "tensorflow/core/platform/mutex.h" 34 #include "tensorflow/core/platform/thread_annotations.h" 35 #include "tensorflow/core/platform/types.h" 36 37 namespace tensorflow { 38 39 class MemoryDump; 40 41 // A memory allocator that implements a 'best-fit with coalescing' 42 // algorithm. This is essentially a very simple version of Doug Lea's 43 // malloc (dlmalloc). 44 // 45 // The goal of this allocator is to support defragmentation via 46 // coalescing. One assumption we make is that the process using this 47 // allocator owns pretty much all of the memory, and that nearly 48 // all requests to allocate memory go through this interface. 49 class BFCAllocator : public Allocator { 50 public: 51 // Takes ownership of sub_allocator. 52 BFCAllocator(SubAllocator* sub_allocator, size_t total_memory, 53 bool allow_growth, const string& name, 54 bool garbage_collection = false); 55 ~BFCAllocator() override; 56 Name()57 string Name() override { return name_; } 58 AllocateRaw(size_t alignment,size_t num_bytes)59 void* AllocateRaw(size_t alignment, size_t num_bytes) override { 60 return AllocateRaw(alignment, num_bytes, AllocationAttributes()); 61 } 62 63 void* AllocateRaw(size_t alignment, size_t num_bytes, 64 const AllocationAttributes& allocation_attr) override; 65 66 void DeallocateRaw(void* ptr) override; 67 68 bool TracksAllocationSizes() const override; 69 70 size_t RequestedSize(const void* ptr) const override; 71 72 size_t AllocatedSize(const void* ptr) const override; 73 74 int64 AllocationId(const void* ptr) const override; 75 76 absl::optional<AllocatorStats> GetStats() override; 77 78 void ClearStats() override; 79 SetTimingCounter(SharedCounter * sc)80 void SetTimingCounter(SharedCounter* sc) { timing_counter_ = sc; } 81 82 void SetSafeFrontier(uint64 count) override; 83 ShouldRecordOpName()84 bool ShouldRecordOpName() const { return true; } 85 86 MemoryDump RecordMemoryMap(); 87 88 private: 89 struct Bin; 90 91 void* AllocateRawInternal(size_t alignment, size_t num_bytes, 92 bool dump_log_on_failure, 93 uint64 freed_before_count); 94 95 void* AllocateRawInternalWithRetry( 96 size_t alignment, size_t num_bytes, 97 const AllocationAttributes& allocation_attr); 98 99 void DeallocateRawInternal(void* ptr); 100 101 // Chunks whose freed_at_count is later than the safe frontier value are kept 102 // on a special list and not subject to merging immediately upon being freed. 103 // 104 // This function sweeps that list looking for Chunks whose timestamp is now 105 // safe. When found their freed_at_count is set to 0 and we attempt to merge 106 // them with their neighbors. 107 // 108 // If required_bytes > 0 then this function is being called in the context of 109 // a need for this many bytes that could not be satisfied without merging 110 // unsafe chunks, so we go ahead and merge the unsafe chunks too, just up to 111 // the point that a free chunk of required_bytes is produced. Note that 112 // unsafe merged chunks adopt the most conservative timestamp from their 113 // constituents so they're only useful for allocations not requiring a 114 // particular timestamp. 115 bool MergeTimestampedChunks(size_t required_bytes) 116 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 117 118 // Return the largest free chunk bytes from the largest bin in constant time. 119 // The free chunks are sorted by size (and then address) in a bin. 120 int64 LargestFreeChunk() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 121 122 // Add TraceMe (in memory allocation and deallocation) for memory stats 123 // profiling. The chunk_ptr is passed to get information such as address, 124 // chunk size and requested_size. 125 void AddTraceMe(absl::string_view traceme_name, const void* ptr) 126 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 127 128 // Overloaded AddTraceMe function with chunk information. 129 void AddTraceMe(absl::string_view traceme_name, const void* chunk_ptr, 130 int64 req_bytes, int64 alloc_bytes) 131 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 132 133 // A ChunkHandle is an index into the chunks_ vector in BFCAllocator 134 // kInvalidChunkHandle means an invalid chunk 135 typedef size_t ChunkHandle; 136 static constexpr ChunkHandle kInvalidChunkHandle = SIZE_MAX; 137 138 typedef int BinNum; 139 static constexpr int kInvalidBinNum = -1; 140 // The following means that the largest bin'd chunk size is 256 << 21 = 512MB. 141 static constexpr int kNumBins = 21; 142 143 // A Chunk points to a piece of memory that's either entirely free or entirely 144 // in use by one user memory allocation. 145 // 146 // An AllocationRegion's memory is split up into one or more disjoint Chunks, 147 // which together cover the whole region without gaps. Chunks participate in 148 // a doubly-linked list, and the prev/next pointers point to the physically 149 // adjacent chunks. 150 // 151 // Since a chunk cannot be partially in use, we may need to split a free chunk 152 // in order to service a user allocation. We always merge adjacent free 153 // chunks. 154 // 155 // Chunks contain information about whether they are in use or whether they 156 // are free, and contain a pointer to the bin they are in. 157 struct Chunk { 158 size_t size = 0; // Full size of buffer. 159 160 // We sometimes give chunks that are larger than needed to reduce 161 // fragmentation. requested_size keeps track of what the client 162 // actually wanted so we can understand whether our splitting 163 // strategy is efficient. 164 size_t requested_size = 0; 165 166 // allocation_id is set to -1 when the chunk is not in use. It is assigned a 167 // value greater than zero before the chunk is returned from 168 // AllocateRaw, and this value is unique among values assigned by 169 // the parent allocator. 170 int64 allocation_id = -1; 171 void* ptr = nullptr; // pointer to granted subbuffer. 172 173 // If not kInvalidChunkHandle, the memory referred to by 'prev' is directly 174 // preceding the memory used by this chunk. E.g., It should start 175 // at 'ptr - prev->size' 176 ChunkHandle prev = kInvalidChunkHandle; 177 178 // If not kInvalidChunkHandle, the memory referred to by 'next' is directly 179 // following the memory used by this chunk. E.g., It should be at 180 // 'ptr + size' 181 ChunkHandle next = kInvalidChunkHandle; 182 183 // What bin are we in? 184 BinNum bin_num = kInvalidBinNum; 185 186 // Optional count when this chunk was most recently made free. 187 uint64 freed_at_count = 0; 188 in_useChunk189 bool in_use() const { return allocation_id != -1; } 190 191 // optional debugging info 192 const char* op_name = nullptr; 193 uint64 step_id = 0; 194 uint64 action_count = 0; 195 196 // Get the op name used for memory debugging. GetDebugOpNameChunk197 const char* GetDebugOpName() const { 198 // If chunk is not in use, although the op_name pointer is not nullptr, 199 // the corresponding OpKernel might have already been deallocated, and the 200 // op_name pointer might point to invalid memory. So in this case, return 201 // a special op name "UNUSED"; 202 if (!in_use()) 203 return "UNUSED"; 204 else if (op_name) 205 return op_name; 206 else 207 return "UNKNOWN"; 208 } 209 DebugStringChunk210 string DebugString(BFCAllocator* a, 211 bool recurse) TF_NO_THREAD_SAFETY_ANALYSIS { 212 string dbg; 213 strings::StrAppend( 214 &dbg, " Size: ", strings::HumanReadableNumBytes(size), 215 " | Requested Size: ", strings::HumanReadableNumBytes(requested_size), 216 " | in_use: ", in_use(), " | bin_num: ", bin_num); 217 if (recurse && prev != BFCAllocator::kInvalidChunkHandle) { 218 Chunk* p = a->ChunkFromHandle(prev); 219 strings::StrAppend(&dbg, ", prev: ", p->DebugString(a, false)); 220 } 221 if (recurse && next != BFCAllocator::kInvalidChunkHandle) { 222 Chunk* n = a->ChunkFromHandle(next); 223 strings::StrAppend(&dbg, ", next: ", n->DebugString(a, false)); 224 } 225 strings::StrAppend(&dbg, ", for: ", GetDebugOpName(), 226 ", stepid: ", step_id, 227 ", last_action: ", action_count); 228 return dbg; 229 } 230 }; 231 232 // A Bin is a collection of similar-sized free chunks. 233 // Allocated chunks are never in a Bin. 234 struct Bin { 235 // All chunks in this bin have >= bin_size memory. 236 size_t bin_size = 0; 237 238 class ChunkComparator { 239 public: ChunkComparatorBin240 explicit ChunkComparator(BFCAllocator* allocator) 241 : allocator_(allocator) {} 242 // Sort first by size and then use pointer address as a tie breaker. operatorBin243 bool operator()(const ChunkHandle ha, 244 const ChunkHandle hb) const TF_NO_THREAD_SAFETY_ANALYSIS { 245 const Chunk* a = allocator_->ChunkFromHandle(ha); 246 const Chunk* b = allocator_->ChunkFromHandle(hb); 247 if (a->size != b->size) { 248 return a->size < b->size; 249 } 250 return a->ptr < b->ptr; 251 } 252 253 private: 254 BFCAllocator* allocator_; // The parent allocator 255 }; 256 257 typedef std::set<ChunkHandle, ChunkComparator> FreeChunkSet; 258 // List of free chunks within the bin, sorted by chunk size. 259 // Chunk * not owned. 260 FreeChunkSet free_chunks; BinBin261 Bin(BFCAllocator* allocator, size_t bs) 262 : bin_size(bs), free_chunks(ChunkComparator(allocator)) {} 263 }; 264 265 static constexpr size_t kMinAllocationBits = 8; 266 static constexpr size_t kMinAllocationSize = 1 << kMinAllocationBits; 267 268 // BFCAllocator allocates memory into a collection of disjoint 269 // AllocationRegions. Each AllocationRegion corresponds to one call to 270 // SubAllocator::Alloc(). (Actually, if a subsequent call to 271 // SubAllocator::Alloc() returns another region immediately adjacent to the 272 // last, it will be used to extend the first AllocationRegion, not create a 273 // separate one.) 274 // 275 // An AllocationRegion contains one or more Chunks, covering all of its 276 // memory. Its primary job is to map pointers to ChunkHandles. 277 // 278 // This class is thread-compatible. 279 class AllocationRegion { 280 public: AllocationRegion(void * ptr,size_t memory_size)281 AllocationRegion(void* ptr, size_t memory_size) 282 : ptr_(ptr), 283 memory_size_(memory_size), 284 end_ptr_( 285 static_cast<void*>(static_cast<char*>(ptr_) + memory_size_)) { 286 DCHECK_EQ(0, memory_size % kMinAllocationSize); 287 const size_t n_handles = 288 (memory_size + kMinAllocationSize - 1) / kMinAllocationSize; 289 handles_.resize(n_handles, kInvalidChunkHandle); 290 } 291 292 AllocationRegion() = default; AllocationRegion(AllocationRegion && other)293 AllocationRegion(AllocationRegion&& other) { Swap(&other); } 294 AllocationRegion& operator=(AllocationRegion&& other) { 295 Swap(&other); 296 return *this; 297 } 298 ptr()299 void* ptr() const { return ptr_; } end_ptr()300 void* end_ptr() const { return end_ptr_; } memory_size()301 size_t memory_size() const { return memory_size_; } extend(size_t size)302 void extend(size_t size) { 303 memory_size_ += size; 304 DCHECK_EQ(0, memory_size_ % kMinAllocationSize); 305 306 end_ptr_ = static_cast<void*>(static_cast<char*>(end_ptr_) + size); 307 const size_t n_handles = 308 (memory_size_ + kMinAllocationSize - 1) / kMinAllocationSize; 309 handles_.resize(n_handles, kInvalidChunkHandle); 310 } get_handle(const void * p)311 ChunkHandle get_handle(const void* p) const { 312 return handles_[IndexFor(p)]; 313 } set_handle(const void * p,ChunkHandle h)314 void set_handle(const void* p, ChunkHandle h) { handles_[IndexFor(p)] = h; } erase(const void * p)315 void erase(const void* p) { set_handle(p, kInvalidChunkHandle); } 316 317 private: Swap(AllocationRegion * other)318 void Swap(AllocationRegion* other) { 319 std::swap(ptr_, other->ptr_); 320 std::swap(memory_size_, other->memory_size_); 321 std::swap(end_ptr_, other->end_ptr_); 322 std::swap(handles_, other->handles_); 323 } 324 IndexFor(const void * p)325 size_t IndexFor(const void* p) const { 326 std::uintptr_t p_int = reinterpret_cast<std::uintptr_t>(p); 327 std::uintptr_t base_int = reinterpret_cast<std::uintptr_t>(ptr_); 328 DCHECK_GE(p_int, base_int); 329 DCHECK_LT(p_int, base_int + memory_size_); 330 return static_cast<size_t>(((p_int - base_int) >> kMinAllocationBits)); 331 } 332 333 // Metadata about the allocation region. 334 void* ptr_ = nullptr; 335 size_t memory_size_ = 0; 336 void* end_ptr_ = nullptr; 337 338 // Array of size "memory_size / kMinAllocationSize". It is 339 // indexed by (p-base) / kMinAllocationSize, contains ChunkHandle 340 // for the memory allocation represented by "p" 341 std::vector<ChunkHandle> handles_; 342 343 TF_DISALLOW_COPY_AND_ASSIGN(AllocationRegion); 344 }; 345 346 // RegionManager aggregates one or more "AllocationRegions" and provides 347 // a layer of indirection from pointers to the underlying ChunkHandle, 348 // allowing allocation across multiple discontiguous memory regions. 349 // 350 // This class is thread-compatible. 351 class RegionManager { 352 public: RegionManager()353 RegionManager() {} ~RegionManager()354 ~RegionManager() {} 355 AddAllocationRegion(void * ptr,size_t memory_size)356 void AddAllocationRegion(void* ptr, size_t memory_size) { 357 // Insert sorted by end_ptr. 358 auto entry = 359 std::upper_bound(regions_.begin(), regions_.end(), ptr, &Comparator); 360 regions_.insert(entry, AllocationRegion(ptr, memory_size)); 361 } 362 363 // Adds an alloation region for the given ptr and size, potentially 364 // extending a region if ptr matches the end_ptr of an existing region. 365 // If a region is extended, returns a pointer to the extended region so that 366 // the BFC allocator can reason about chunkification. AddOrExtendAllocationRegion(void * ptr,size_t memory_size)367 AllocationRegion* AddOrExtendAllocationRegion(void* ptr, 368 size_t memory_size) { 369 // Insert sorted by end_ptr. 370 auto entry = 371 std::upper_bound(regions_.begin(), regions_.end(), ptr, &Comparator); 372 // Check if can be coalesced with preceding region. 373 if (entry != regions_.begin()) { 374 auto preceding_region = entry - 1; 375 if (preceding_region->end_ptr() == ptr) { 376 if (VLOG_IS_ON(1)) { 377 LOG(INFO) << "Extending region " << preceding_region->ptr() 378 << " of " 379 << strings::HumanReadableNumBytes( 380 preceding_region->memory_size()) 381 << " by " << strings::HumanReadableNumBytes(memory_size) 382 << " bytes"; 383 } 384 preceding_region->extend(memory_size); 385 return &*preceding_region; 386 } 387 } 388 VLOG(1) << "Inserting new region " << ptr << " of " 389 << strings::HumanReadableNumBytes(memory_size); 390 regions_.insert(entry, AllocationRegion(ptr, memory_size)); 391 return nullptr; 392 } 393 RemoveAllocationRegion(std::vector<AllocationRegion>::iterator it)394 std::vector<AllocationRegion>::iterator RemoveAllocationRegion( 395 std::vector<AllocationRegion>::iterator it) { 396 return regions_.erase(it); 397 } 398 get_handle(const void * p)399 ChunkHandle get_handle(const void* p) const { 400 return RegionFor(p)->get_handle(p); 401 } 402 set_handle(const void * p,ChunkHandle h)403 void set_handle(const void* p, ChunkHandle h) { 404 return MutableRegionFor(p)->set_handle(p, h); 405 } erase(const void * p)406 void erase(const void* p) { return MutableRegionFor(p)->erase(p); } 407 regions()408 const std::vector<AllocationRegion>& regions() const { return regions_; } 409 410 private: Comparator(const void * ptr,const AllocationRegion & other)411 static bool Comparator(const void* ptr, const AllocationRegion& other) { 412 return ptr < other.end_ptr(); 413 } 414 MutableRegionFor(const void * p)415 AllocationRegion* MutableRegionFor(const void* p) { 416 return const_cast<AllocationRegion*>(RegionFor(p)); 417 } 418 RegionFor(const void * p)419 const AllocationRegion* RegionFor(const void* p) const { 420 auto entry = 421 std::upper_bound(regions_.begin(), regions_.end(), p, &Comparator); 422 423 if (entry != regions_.end()) { 424 return &(*entry); 425 } 426 427 LOG(FATAL) << "Could not find Region for " << p; 428 return nullptr; 429 } 430 431 private: 432 std::vector<AllocationRegion> regions_; 433 }; 434 435 // Returns 'bytes' rounded up to the next highest kMinAllocationSize. 436 static size_t RoundedBytes(size_t bytes); 437 438 // Try to add a new memory region that can satisfy an allocation of 439 // 'rounded_bytes' bytes. Returns true on success and false on 440 // failure. 441 bool Extend(size_t alignment, size_t rounded_bytes) 442 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 443 444 // Deallocate free regions to give back the memory to suballocator, so that 445 // we can re-allocate a larger region. The main use scenario of this function 446 // is when OOM happens but we have free regions and the sum of sizes of free 447 // regions and unallocated bytes is larger than the requested size, implying 448 // (external) memory fragmentation. Returns true if any free regions are 449 // found and freed; false otherwise. 450 bool DeallocateFreeRegions(size_t rounded_bytes); 451 452 // Helper function to deallocate regions. 453 void DeallocateRegions(const absl::flat_hash_set<void*>& region_ptrs) 454 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 455 456 // Returns a pointer to an underlying allocated chunk of size 457 // 'rounded_bytes'. 458 void* FindChunkPtr(BinNum bin_num, size_t rounded_bytes, size_t num_bytes, 459 uint64 freed_before) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 460 461 // Splits the chunk specified by 'h' into two chunks, one at least 462 // of size 'num_bytes'. 463 void SplitChunk(ChunkHandle h, size_t num_bytes) 464 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 465 466 // Merges the two chunk handles. Requires that the chunks are 467 // contiguous in their allocation. 468 void Merge(ChunkHandle h, ChunkHandle h2) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 469 470 // Adds the chunk 'h' to the proper free bin. 471 void InsertFreeChunkIntoBin(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 472 473 // Removes the free chunk pointed to by 'c' from the set free_chunks. 474 void RemoveFreeChunkIterFromBin(Bin::FreeChunkSet* free_chunks, 475 const Bin::FreeChunkSet::iterator& c) 476 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 477 478 // Removes a free chunk from the bin. 479 void RemoveFreeChunkFromBin(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 480 void MaybeRemoveFreeChunkFromBin(ChunkHandle h) 481 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 482 483 // Removes the chunk metadata represented by 'h'. 484 void DeleteChunk(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 485 486 string RenderOccupancy() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 487 void DumpMemoryLog(size_t num_bytes) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 488 MemoryDump RecordMemoryMapInternal() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 489 void MaybeWriteMemoryMap() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 490 491 ChunkHandle AllocateChunk() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 492 void DeallocateChunk(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 493 494 Chunk* ChunkFromHandle(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 495 const Chunk* ChunkFromHandle(ChunkHandle h) const 496 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 497 498 void MarkFree(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 499 500 ChunkHandle TryToCoalesce(ChunkHandle h, bool ignore_freed_at) 501 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 502 503 // Fragmentation is calculated as the reverse ratio of the largest free chunk 504 // size over total free memory, and returns a value within [0, 1]. 505 double GetFragmentation() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 506 507 // Information about a Bin that is useful for debugging. 508 struct BinDebugInfo { 509 size_t total_bytes_in_use = 0; 510 size_t total_bytes_in_bin = 0; 511 size_t total_requested_bytes_in_use = 0; 512 size_t total_chunks_in_use = 0; 513 size_t total_chunks_in_bin = 0; 514 }; 515 516 // Computes and returns a BinDebugInfo for each Bin. 517 std::array<BinDebugInfo, kNumBins> get_bin_debug_info() 518 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 519 520 AllocatorRetry retry_helper_; 521 522 // Structures immutable after construction 523 size_t memory_limit_ = 0; 524 Log2FloorNonZeroSlow(uint64 n)525 inline int Log2FloorNonZeroSlow(uint64 n) { 526 int r = 0; 527 while (n > 0) { 528 r++; 529 n >>= 1; 530 } 531 return r - 1; 532 } 533 534 // Returns floor(log2(n)). Log2FloorNonZero(uint64 n)535 inline int Log2FloorNonZero(uint64 n) { 536 #if defined(__GNUC__) 537 return 63 ^ __builtin_clzll(n); 538 #elif defined(PLATFORM_WINDOWS) && (_WIN64) 539 unsigned long index; 540 _BitScanReverse64(&index, n); 541 return index; 542 #else 543 return Log2FloorNonZeroSlow(n); 544 #endif 545 } 546 547 // Map from bin size to Bin BinFromIndex(BinNum index)548 Bin* BinFromIndex(BinNum index) { 549 return reinterpret_cast<Bin*>(&(bins_space_[index * sizeof(Bin)])); 550 } BinNumToSize(BinNum index)551 size_t BinNumToSize(BinNum index) { 552 return static_cast<size_t>(256) << index; 553 } BinNumForSize(size_t bytes)554 BinNum BinNumForSize(size_t bytes) { 555 uint64 v = std::max<size_t>(bytes, 256) >> kMinAllocationBits; 556 int b = std::min(kNumBins - 1, Log2FloorNonZero(v)); 557 return b; 558 } BinForSize(size_t bytes)559 Bin* BinForSize(size_t bytes) { return BinFromIndex(BinNumForSize(bytes)); } 560 561 char bins_space_[sizeof(Bin) * kNumBins]; 562 563 // The size of the current region allocation. 564 size_t curr_region_allocation_bytes_; 565 566 // The total number of allocated bytes by the allocator. 567 size_t total_region_allocated_bytes_ = 0; 568 569 // An indicator that expansion of a region has hit the limits 570 // of the available memory. 571 bool started_backpedal_ = false; 572 573 // Whether the allocator will deallocate free regions to avoid OOM due to 574 // memory fragmentation. 575 const bool garbage_collection_; 576 577 // Whether the allocator will coalesce adjacent sub allocator provided 578 // AllocationRegions. This may be disabled if discrete sub allocator 579 // regions can't be treated as contiguous (e.g. if the allocation refers to 580 // device visible memory which is not adjacent to the other region in the 581 // device's address space). 582 const bool coalesce_regions_; 583 584 std::unique_ptr<SubAllocator> sub_allocator_; 585 string name_; 586 SharedCounter* timing_counter_ = nullptr; 587 std::deque<ChunkHandle> timestamped_chunks_; 588 589 std::atomic<uint64> safe_frontier_ = {0}; 590 591 // Structures mutable after construction 592 mutable mutex lock_; 593 RegionManager region_manager_ TF_GUARDED_BY(lock_); 594 595 std::vector<Chunk> chunks_ TF_GUARDED_BY(lock_); 596 597 // Pointer to head of linked list of free Chunks 598 ChunkHandle free_chunks_list_ TF_GUARDED_BY(lock_); 599 600 // Counter containing the next unique identifier to assign to a 601 // newly-created chunk. 602 int64 next_allocation_id_ TF_GUARDED_BY(lock_); 603 604 // Stats. 605 AllocatorStats stats_ TF_GUARDED_BY(lock_); 606 uint64 action_counter_ TF_GUARDED_BY(lock_); 607 608 // The circular buffer used to track memory operation history. 609 static constexpr uint64 kMemDebugHistorySize = 4096; 610 int64 size_history_[kMemDebugHistorySize]; 611 612 friend class GPUBFCAllocatorPrivateMethodsTest; 613 friend class GPUBFCAllocatorPrivateMethodsTest_SubAllocatorSpecific; 614 TF_DISALLOW_COPY_AND_ASSIGN(BFCAllocator); 615 }; 616 617 } // namespace tensorflow 618 619 #endif // TENSORFLOW_CORE_COMMON_RUNTIME_BFC_ALLOCATOR_H_ 620