1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_BFC_ALLOCATOR_H_ 17 #define TENSORFLOW_CORE_COMMON_RUNTIME_BFC_ALLOCATOR_H_ 18 19 #include <array> 20 #include <deque> 21 #include <memory> 22 #include <string> 23 #include <unordered_map> 24 #include <vector> 25 26 #include "absl/container/flat_hash_set.h" 27 #include "tensorflow/core/common_runtime/allocator_retry.h" 28 #include "tensorflow/core/common_runtime/shared_counter.h" 29 #include "tensorflow/core/framework/allocator.h" 30 #include "tensorflow/core/lib/strings/numbers.h" 31 #include "tensorflow/core/lib/strings/strcat.h" 32 #include "tensorflow/core/platform/macros.h" 33 #include "tensorflow/core/platform/mutex.h" 34 #include "tensorflow/core/platform/thread_annotations.h" 35 #include "tensorflow/core/platform/types.h" 36 37 namespace tensorflow { 38 39 class MemoryDump; 40 41 // A memory allocator that implements a 'best-fit with coalescing' 42 // algorithm. This is essentially a very simple version of Doug Lea's 43 // malloc (dlmalloc). 44 // 45 // The goal of this allocator is to support defragmentation via 46 // coalescing. One assumption we make is that the process using this 47 // allocator owns pretty much all of the memory, and that nearly 48 // all requests to allocate memory go through this interface. 49 class BFCAllocator : public Allocator { 50 public: 51 // Takes ownership of sub_allocator. 52 BFCAllocator(SubAllocator* sub_allocator, size_t total_memory, 53 bool allow_growth, const string& name, 54 bool garbage_collection = false); 55 ~BFCAllocator() override; 56 Name()57 string Name() override { return name_; } 58 AllocateRaw(size_t alignment,size_t num_bytes)59 void* AllocateRaw(size_t alignment, size_t num_bytes) override { 60 return AllocateRaw(alignment, num_bytes, AllocationAttributes()); 61 } 62 63 void* AllocateRaw(size_t alignment, size_t num_bytes, 64 const AllocationAttributes& allocation_attr) override; 65 66 void DeallocateRaw(void* ptr) override; 67 68 bool TracksAllocationSizes() const override; 69 70 size_t RequestedSize(const void* ptr) const override; 71 72 size_t AllocatedSize(const void* ptr) const override; 73 74 int64 AllocationId(const void* ptr) const override; 75 76 absl::optional<AllocatorStats> GetStats() override; 77 78 bool ClearStats() override; 79 SetTimingCounter(SharedCounter * sc)80 void SetTimingCounter(SharedCounter* sc) { timing_counter_ = sc; } 81 82 void SetSafeFrontier(uint64 count) override; 83 ShouldRecordOpName()84 bool ShouldRecordOpName() const { return true; } 85 86 MemoryDump RecordMemoryMap(); 87 88 protected: 89 // This setting controls when a chunk should be split, if its size exceeds the 90 // requested allocation size. It is not expected to be changed after 91 // initialization. SetInternalFragmentationFraction(double fraction)92 void SetInternalFragmentationFraction(double fraction) { 93 internal_fragmentation_fraction_ = fraction; 94 } 95 96 private: 97 struct Bin; 98 99 void* AllocateRawInternal(size_t alignment, size_t num_bytes, 100 bool dump_log_on_failure, 101 uint64 freed_before_count); 102 103 void* AllocateRawInternalWithRetry( 104 size_t alignment, size_t num_bytes, 105 const AllocationAttributes& allocation_attr); 106 107 void DeallocateRawInternal(void* ptr); 108 109 // Chunks whose freed_at_count is later than the safe frontier value are kept 110 // on a special list and not subject to merging immediately upon being freed. 111 // 112 // This function sweeps that list looking for Chunks whose timestamp is now 113 // safe. When found their freed_at_count is set to 0 and we attempt to merge 114 // them with their neighbors. 115 // 116 // If required_bytes > 0 then this function is being called in the context of 117 // a need for this many bytes that could not be satisfied without merging 118 // unsafe chunks, so we go ahead and merge the unsafe chunks too, just up to 119 // the point that a free chunk of required_bytes is produced. Note that 120 // unsafe merged chunks adopt the most conservative timestamp from their 121 // constituents so they're only useful for allocations not requiring a 122 // particular timestamp. 123 bool MergeTimestampedChunks(size_t required_bytes) 124 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 125 126 // Return the largest free chunk bytes from the largest bin in constant time. 127 // The free chunks are sorted by size (and then address) in a bin. 128 int64 LargestFreeChunk() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 129 130 // Add TraceMe (in memory allocation and deallocation) for memory stats 131 // profiling. The chunk_ptr is passed to get information such as address, 132 // chunk size and requested_size. 133 void AddTraceMe(absl::string_view traceme_name, const void* ptr) 134 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 135 136 // Overloaded AddTraceMe function with chunk information. 137 void AddTraceMe(absl::string_view traceme_name, const void* chunk_ptr, 138 int64_t req_bytes, int64_t alloc_bytes) 139 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 140 141 // A ChunkHandle is an index into the chunks_ vector in BFCAllocator 142 // kInvalidChunkHandle means an invalid chunk 143 typedef size_t ChunkHandle; 144 static constexpr ChunkHandle kInvalidChunkHandle = SIZE_MAX; 145 146 typedef int BinNum; 147 static constexpr int kInvalidBinNum = -1; 148 // The following means that the largest bin'd chunk size is 256 << 21 = 512MB. 149 static constexpr int kNumBins = 21; 150 151 // A Chunk points to a piece of memory that's either entirely free or entirely 152 // in use by one user memory allocation. 153 // 154 // An AllocationRegion's memory is split up into one or more disjoint Chunks, 155 // which together cover the whole region without gaps. Chunks participate in 156 // a doubly-linked list, and the prev/next pointers point to the physically 157 // adjacent chunks. 158 // 159 // Since a chunk cannot be partially in use, we may need to split a free chunk 160 // in order to service a user allocation. We always merge adjacent free 161 // chunks. 162 // 163 // Chunks contain information about whether they are in use or whether they 164 // are free, and contain a pointer to the bin they are in. 165 struct Chunk { 166 size_t size = 0; // Full size of buffer. 167 168 // We sometimes give chunks that are larger than needed to reduce 169 // fragmentation. requested_size keeps track of what the client 170 // actually wanted so we can understand whether our splitting 171 // strategy is efficient. 172 size_t requested_size = 0; 173 174 // allocation_id is set to -1 when the chunk is not in use. It is assigned a 175 // value greater than zero before the chunk is returned from 176 // AllocateRaw, and this value is unique among values assigned by 177 // the parent allocator. 178 int64 allocation_id = -1; 179 void* ptr = nullptr; // pointer to granted subbuffer. 180 181 // If not kInvalidChunkHandle, the memory referred to by 'prev' is directly 182 // preceding the memory used by this chunk. E.g., It should start 183 // at 'ptr - prev->size' 184 ChunkHandle prev = kInvalidChunkHandle; 185 186 // If not kInvalidChunkHandle, the memory referred to by 'next' is directly 187 // following the memory used by this chunk. E.g., It should be at 188 // 'ptr + size' 189 ChunkHandle next = kInvalidChunkHandle; 190 191 // What bin are we in? 192 BinNum bin_num = kInvalidBinNum; 193 194 // Optional count when this chunk was most recently made free. 195 uint64 freed_at_count = 0; 196 in_useChunk197 bool in_use() const { return allocation_id != -1; } 198 199 #ifdef TENSORFLOW_MEM_DEBUG 200 // optional debugging info 201 const char* op_name = nullptr; 202 uint64 step_id = 0; 203 int64 action_count = 0; 204 #endif 205 DebugStringChunk206 string DebugString(BFCAllocator* a, 207 bool recurse) TF_NO_THREAD_SAFETY_ANALYSIS { 208 string dbg; 209 strings::StrAppend( 210 &dbg, " Size: ", strings::HumanReadableNumBytes(size), 211 " | Requested Size: ", strings::HumanReadableNumBytes(requested_size), 212 " | in_use: ", in_use(), " | bin_num: ", bin_num); 213 if (recurse && prev != BFCAllocator::kInvalidChunkHandle) { 214 Chunk* p = a->ChunkFromHandle(prev); 215 strings::StrAppend(&dbg, ", prev: ", p->DebugString(a, false)); 216 } 217 if (recurse && next != BFCAllocator::kInvalidChunkHandle) { 218 Chunk* n = a->ChunkFromHandle(next); 219 strings::StrAppend(&dbg, ", next: ", n->DebugString(a, false)); 220 } 221 #ifdef TENSORFLOW_MEM_DEBUG 222 strings::StrAppend(&dbg, ", for: ", op_name ? op_name : "UNKNOWN", 223 ", stepid: ", step_id, 224 ", last_action: ", action_count); 225 #endif 226 return dbg; 227 } 228 }; 229 230 // A Bin is a collection of similar-sized free chunks. 231 // Allocated chunks are never in a Bin. 232 struct Bin { 233 // All chunks in this bin have >= bin_size memory. 234 size_t bin_size = 0; 235 236 class ChunkComparator { 237 public: ChunkComparatorBin238 explicit ChunkComparator(BFCAllocator* allocator) 239 : allocator_(allocator) {} 240 // Sort first by size and then use pointer address as a tie breaker. operatorBin241 bool operator()(const ChunkHandle ha, 242 const ChunkHandle hb) const TF_NO_THREAD_SAFETY_ANALYSIS { 243 const Chunk* a = allocator_->ChunkFromHandle(ha); 244 const Chunk* b = allocator_->ChunkFromHandle(hb); 245 if (a->size != b->size) { 246 return a->size < b->size; 247 } 248 return a->ptr < b->ptr; 249 } 250 251 private: 252 BFCAllocator* allocator_; // The parent allocator 253 }; 254 255 typedef std::set<ChunkHandle, ChunkComparator> FreeChunkSet; 256 // List of free chunks within the bin, sorted by chunk size. 257 // Chunk * not owned. 258 FreeChunkSet free_chunks; BinBin259 Bin(BFCAllocator* allocator, size_t bs) 260 : bin_size(bs), free_chunks(ChunkComparator(allocator)) {} 261 }; 262 263 static constexpr size_t kMinAllocationBits = 8; 264 static constexpr size_t kMinAllocationSize = 1 << kMinAllocationBits; 265 266 // BFCAllocator allocates memory into a collection of disjoint 267 // AllocationRegions. Each AllocationRegion corresponds to one call to 268 // SubAllocator::Alloc(). (Actually, if a subsequent call to 269 // SubAllocator::Alloc() returns another region immediately adjacent to the 270 // last, it will be used to extend the first AllocationRegion, not create a 271 // separate one.) 272 // 273 // An AllocationRegion contains one or more Chunks, covering all of its 274 // memory. Its primary job is to map pointers to ChunkHandles. 275 // 276 // This class is thread-compatible. 277 class AllocationRegion { 278 public: AllocationRegion(void * ptr,size_t memory_size)279 AllocationRegion(void* ptr, size_t memory_size) 280 : ptr_(ptr), 281 memory_size_(memory_size), 282 end_ptr_( 283 static_cast<void*>(static_cast<char*>(ptr_) + memory_size_)) { 284 DCHECK_EQ(0, memory_size % kMinAllocationSize); 285 const size_t n_handles = 286 (memory_size + kMinAllocationSize - 1) / kMinAllocationSize; 287 handles_.resize(n_handles, kInvalidChunkHandle); 288 } 289 290 AllocationRegion() = default; AllocationRegion(AllocationRegion && other)291 AllocationRegion(AllocationRegion&& other) { Swap(&other); } 292 AllocationRegion& operator=(AllocationRegion&& other) { 293 Swap(&other); 294 return *this; 295 } 296 ptr()297 void* ptr() const { return ptr_; } end_ptr()298 void* end_ptr() const { return end_ptr_; } memory_size()299 size_t memory_size() const { return memory_size_; } extend(size_t size)300 void extend(size_t size) { 301 memory_size_ += size; 302 DCHECK_EQ(0, memory_size_ % kMinAllocationSize); 303 304 end_ptr_ = static_cast<void*>(static_cast<char*>(end_ptr_) + size); 305 const size_t n_handles = 306 (memory_size_ + kMinAllocationSize - 1) / kMinAllocationSize; 307 handles_.resize(n_handles, kInvalidChunkHandle); 308 } get_handle(const void * p)309 ChunkHandle get_handle(const void* p) const { 310 return handles_[IndexFor(p)]; 311 } set_handle(const void * p,ChunkHandle h)312 void set_handle(const void* p, ChunkHandle h) { handles_[IndexFor(p)] = h; } erase(const void * p)313 void erase(const void* p) { set_handle(p, kInvalidChunkHandle); } 314 315 private: Swap(AllocationRegion * other)316 void Swap(AllocationRegion* other) { 317 std::swap(ptr_, other->ptr_); 318 std::swap(memory_size_, other->memory_size_); 319 std::swap(end_ptr_, other->end_ptr_); 320 std::swap(handles_, other->handles_); 321 } 322 IndexFor(const void * p)323 size_t IndexFor(const void* p) const { 324 std::uintptr_t p_int = reinterpret_cast<std::uintptr_t>(p); 325 std::uintptr_t base_int = reinterpret_cast<std::uintptr_t>(ptr_); 326 DCHECK_GE(p_int, base_int); 327 DCHECK_LT(p_int, base_int + memory_size_); 328 return static_cast<size_t>(((p_int - base_int) >> kMinAllocationBits)); 329 } 330 331 // Metadata about the allocation region. 332 void* ptr_ = nullptr; 333 size_t memory_size_ = 0; 334 void* end_ptr_ = nullptr; 335 336 // Array of size "memory_size / kMinAllocationSize". It is 337 // indexed by (p-base) / kMinAllocationSize, contains ChunkHandle 338 // for the memory allocation represented by "p" 339 std::vector<ChunkHandle> handles_; 340 341 TF_DISALLOW_COPY_AND_ASSIGN(AllocationRegion); 342 }; 343 344 // RegionManager aggregates one or more "AllocationRegions" and provides 345 // a layer of indirection from pointers to the underlying ChunkHandle, 346 // allowing allocation across multiple discontiguous memory regions. 347 // 348 // This class is thread-compatible. 349 class RegionManager { 350 public: RegionManager()351 RegionManager() {} ~RegionManager()352 ~RegionManager() {} 353 AddAllocationRegion(void * ptr,size_t memory_size)354 void AddAllocationRegion(void* ptr, size_t memory_size) { 355 // Insert sorted by end_ptr. 356 auto entry = 357 std::upper_bound(regions_.begin(), regions_.end(), ptr, &Comparator); 358 regions_.insert(entry, AllocationRegion(ptr, memory_size)); 359 } 360 361 // Adds an alloation region for the given ptr and size, potentially 362 // extending a region if ptr matches the end_ptr of an existing region. 363 // If a region is extended, returns a pointer to the extended region so that 364 // the BFC allocator can reason about chunkification. AddOrExtendAllocationRegion(void * ptr,size_t memory_size)365 AllocationRegion* AddOrExtendAllocationRegion(void* ptr, 366 size_t memory_size) { 367 // Insert sorted by end_ptr. 368 auto entry = 369 std::upper_bound(regions_.begin(), regions_.end(), ptr, &Comparator); 370 // Check if can be coalesced with preceding region. 371 if (entry != regions_.begin()) { 372 auto preceding_region = entry - 1; 373 if (preceding_region->end_ptr() == ptr) { 374 if (VLOG_IS_ON(1)) { 375 LOG(INFO) << "Extending region " << preceding_region->ptr() 376 << " of " 377 << strings::HumanReadableNumBytes( 378 preceding_region->memory_size()) 379 << " by " << strings::HumanReadableNumBytes(memory_size) 380 << " bytes"; 381 } 382 preceding_region->extend(memory_size); 383 return &*preceding_region; 384 } 385 } 386 VLOG(1) << "Inserting new region " << ptr << " of " 387 << strings::HumanReadableNumBytes(memory_size); 388 regions_.insert(entry, AllocationRegion(ptr, memory_size)); 389 return nullptr; 390 } 391 RemoveAllocationRegion(std::vector<AllocationRegion>::iterator it)392 std::vector<AllocationRegion>::iterator RemoveAllocationRegion( 393 std::vector<AllocationRegion>::iterator it) { 394 return regions_.erase(it); 395 } 396 get_handle(const void * p)397 ChunkHandle get_handle(const void* p) const { 398 return RegionFor(p)->get_handle(p); 399 } 400 set_handle(const void * p,ChunkHandle h)401 void set_handle(const void* p, ChunkHandle h) { 402 return MutableRegionFor(p)->set_handle(p, h); 403 } erase(const void * p)404 void erase(const void* p) { return MutableRegionFor(p)->erase(p); } 405 regions()406 const std::vector<AllocationRegion>& regions() const { return regions_; } 407 408 private: Comparator(const void * ptr,const AllocationRegion & other)409 static bool Comparator(const void* ptr, const AllocationRegion& other) { 410 return ptr < other.end_ptr(); 411 } 412 MutableRegionFor(const void * p)413 AllocationRegion* MutableRegionFor(const void* p) { 414 return const_cast<AllocationRegion*>(RegionFor(p)); 415 } 416 RegionFor(const void * p)417 const AllocationRegion* RegionFor(const void* p) const { 418 auto entry = 419 std::upper_bound(regions_.begin(), regions_.end(), p, &Comparator); 420 421 if (entry != regions_.end()) { 422 return &(*entry); 423 } 424 425 LOG(FATAL) << "Could not find Region for " << p; 426 return nullptr; 427 } 428 429 private: 430 std::vector<AllocationRegion> regions_; 431 }; 432 433 // Returns 'bytes' rounded up to the next highest kMinAllocationSize. 434 static size_t RoundedBytes(size_t bytes); 435 436 // Try to add a new memory region that can satisfy an allocation of 437 // 'rounded_bytes' bytes. Returns true on success and false on 438 // failure. 439 bool Extend(size_t alignment, size_t rounded_bytes) 440 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 441 442 // Deallocate free regions to give back the memory to suballocator, so that 443 // we can re-allocate a larger region. The main use scenario of this function 444 // is when OOM happens but we have free regions and the sum of sizes of free 445 // regions and unallocated bytes is larger than the requested size, implying 446 // (external) memory fragmentation. Returns true if any free regions are 447 // found and freed; false otherwise. 448 bool DeallocateFreeRegions(size_t rounded_bytes); 449 450 // Helper function to deallocate regions. 451 void DeallocateRegions(const absl::flat_hash_set<void*>& region_ptrs) 452 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 453 454 // Returns a pointer to an underlying allocated chunk of size 455 // 'rounded_bytes'. 456 void* FindChunkPtr(BinNum bin_num, size_t rounded_bytes, size_t num_bytes, 457 uint64 freed_before) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 458 459 // Splits the chunk specified by 'h' into two chunks, one at least 460 // of size 'num_bytes'. 461 void SplitChunk(ChunkHandle h, size_t num_bytes) 462 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 463 464 // Merges the two chunk handles. Requires that the chunks are 465 // contiguous in their allocation. 466 void Merge(ChunkHandle h, ChunkHandle h2) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 467 468 // Adds the chunk 'h' to the proper free bin. 469 void InsertFreeChunkIntoBin(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 470 471 // Removes the free chunk pointed to by 'c' from the set free_chunks. 472 void RemoveFreeChunkIterFromBin(Bin::FreeChunkSet* free_chunks, 473 const Bin::FreeChunkSet::iterator& c) 474 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 475 476 // Removes a free chunk from the bin. 477 void RemoveFreeChunkFromBin(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 478 void MaybeRemoveFreeChunkFromBin(ChunkHandle h) 479 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 480 481 // Removes the chunk metadata represented by 'h'. 482 void DeleteChunk(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 483 484 string RenderOccupancy() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 485 void DumpMemoryLog(size_t num_bytes) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 486 MemoryDump RecordMemoryMapInternal() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 487 void MaybeWriteMemoryMap() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 488 489 ChunkHandle AllocateChunk() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 490 void DeallocateChunk(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 491 492 Chunk* ChunkFromHandle(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 493 const Chunk* ChunkFromHandle(ChunkHandle h) const 494 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 495 496 void MarkFree(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 497 498 ChunkHandle TryToCoalesce(ChunkHandle h, bool ignore_freed_at) 499 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 500 501 // Fragmentation is calculated as the reverse ratio of the largest free chunk 502 // size over total free memory, and returns a value within [0, 1]. 503 double GetFragmentation() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 504 505 // Information about a Bin that is useful for debugging. 506 struct BinDebugInfo { 507 size_t total_bytes_in_use = 0; 508 size_t total_bytes_in_bin = 0; 509 size_t total_requested_bytes_in_use = 0; 510 size_t total_chunks_in_use = 0; 511 size_t total_chunks_in_bin = 0; 512 }; 513 514 // Computes and returns a BinDebugInfo for each Bin. 515 std::array<BinDebugInfo, kNumBins> get_bin_debug_info() 516 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_); 517 518 AllocatorRetry retry_helper_; 519 520 // Structures immutable after construction 521 size_t memory_limit_ = 0; 522 Log2FloorNonZeroSlow(uint64 n)523 inline int Log2FloorNonZeroSlow(uint64 n) { 524 int r = 0; 525 while (n > 0) { 526 r++; 527 n >>= 1; 528 } 529 return r - 1; 530 } 531 532 // Returns floor(log2(n)). Log2FloorNonZero(uint64 n)533 inline int Log2FloorNonZero(uint64 n) { 534 #if defined(__GNUC__) 535 return 63 ^ __builtin_clzll(n); 536 #elif defined(PLATFORM_WINDOWS) && (_WIN64) 537 unsigned long index; 538 _BitScanReverse64(&index, n); 539 return index; 540 #else 541 return Log2FloorNonZeroSlow(n); 542 #endif 543 } 544 545 // Map from bin size to Bin BinFromIndex(BinNum index)546 Bin* BinFromIndex(BinNum index) { 547 return reinterpret_cast<Bin*>(&(bins_space_[index * sizeof(Bin)])); 548 } BinNumToSize(BinNum index)549 size_t BinNumToSize(BinNum index) { 550 return static_cast<size_t>(256) << index; 551 } BinNumForSize(size_t bytes)552 BinNum BinNumForSize(size_t bytes) { 553 uint64 v = std::max<size_t>(bytes, 256) >> kMinAllocationBits; 554 int b = std::min(kNumBins - 1, Log2FloorNonZero(v)); 555 return b; 556 } BinForSize(size_t bytes)557 Bin* BinForSize(size_t bytes) { return BinFromIndex(BinNumForSize(bytes)); } 558 559 char bins_space_[sizeof(Bin) * kNumBins]; 560 561 // The size of the current region allocation. 562 size_t curr_region_allocation_bytes_; 563 564 // The total number of allocated bytes by the allocator. 565 size_t total_region_allocated_bytes_ = 0; 566 567 // An indicator that expansion of a region has hit the limits 568 // of the available memory. 569 bool started_backpedal_ = false; 570 571 // Whether the allocator will deallocate free regions to avoid OOM due to 572 // memory fragmentation. 573 const bool garbage_collection_; 574 575 // Whether the allocator will coalesce adjacent sub allocator provided 576 // AllocationRegions. This may be disabled if discrete sub allocator 577 // regions can't be treated as contiguous (e.g. if the allocation refers to 578 // device visible memory which is not adjacent to the other region in the 579 // device's address space). 580 const bool coalesce_regions_; 581 582 std::unique_ptr<SubAllocator> sub_allocator_; 583 string name_; 584 SharedCounter* timing_counter_ = nullptr; 585 std::deque<ChunkHandle> timestamped_chunks_; 586 587 double internal_fragmentation_fraction_ = {0.0}; 588 589 std::atomic<uint64> safe_frontier_ = {0}; 590 591 // Structures mutable after construction 592 mutable mutex lock_; 593 RegionManager region_manager_ TF_GUARDED_BY(lock_); 594 595 std::vector<Chunk> chunks_ TF_GUARDED_BY(lock_); 596 597 // Pointer to head of linked list of free Chunks 598 ChunkHandle free_chunks_list_ TF_GUARDED_BY(lock_); 599 600 // Counter containing the next unique identifier to assign to a 601 // newly-created chunk. 602 int64 next_allocation_id_ TF_GUARDED_BY(lock_); 603 604 // Stats. 605 AllocatorStats stats_ TF_GUARDED_BY(lock_); 606 #ifdef TENSORFLOW_MEM_DEBUG 607 int64 action_counter_ = 0 TF_GUARDED_BY(lock_); 608 #define MEM_DEBUG_SIZE_HISTORY_SIZE 4096 609 int64 size_history_[MEM_DEBUG_SIZE_HISTORY_SIZE]; 610 #endif 611 612 friend class GPUBFCAllocatorPrivateMethodsTest; 613 friend class GPUBFCAllocatorPrivateMethodsTest_SubAllocatorSpecific; 614 TF_DISALLOW_COPY_AND_ASSIGN(BFCAllocator); 615 }; 616 617 } // namespace tensorflow 618 619 #endif // TENSORFLOW_CORE_COMMON_RUNTIME_BFC_ALLOCATOR_H_ 620