• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_BFC_ALLOCATOR_H_
17 #define TENSORFLOW_CORE_COMMON_RUNTIME_BFC_ALLOCATOR_H_
18 
19 #include <array>
20 #include <deque>
21 #include <memory>
22 #include <string>
23 #include <unordered_map>
24 #include <vector>
25 
26 #include "absl/container/flat_hash_set.h"
27 #include "tensorflow/core/common_runtime/allocator_retry.h"
28 #include "tensorflow/core/common_runtime/shared_counter.h"
29 #include "tensorflow/core/framework/allocator.h"
30 #include "tensorflow/core/lib/strings/numbers.h"
31 #include "tensorflow/core/lib/strings/strcat.h"
32 #include "tensorflow/core/platform/macros.h"
33 #include "tensorflow/core/platform/mutex.h"
34 #include "tensorflow/core/platform/thread_annotations.h"
35 #include "tensorflow/core/platform/types.h"
36 
37 namespace tensorflow {
38 
39 class MemoryDump;
40 
41 // A memory allocator that implements a 'best-fit with coalescing'
42 // algorithm.  This is essentially a very simple version of Doug Lea's
43 // malloc (dlmalloc).
44 //
45 // The goal of this allocator is to support defragmentation via
46 // coalescing.  One assumption we make is that the process using this
47 // allocator owns pretty much all of the memory, and that nearly
48 // all requests to allocate memory go through this interface.
49 class BFCAllocator : public Allocator {
50  public:
51   // Takes ownership of sub_allocator.
52   BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
53                bool allow_growth, const string& name,
54                bool garbage_collection = false);
55   ~BFCAllocator() override;
56 
Name()57   string Name() override { return name_; }
58 
AllocateRaw(size_t alignment,size_t num_bytes)59   void* AllocateRaw(size_t alignment, size_t num_bytes) override {
60     return AllocateRaw(alignment, num_bytes, AllocationAttributes());
61   }
62 
63   void* AllocateRaw(size_t alignment, size_t num_bytes,
64                     const AllocationAttributes& allocation_attr) override;
65 
66   void DeallocateRaw(void* ptr) override;
67 
68   bool TracksAllocationSizes() const override;
69 
70   size_t RequestedSize(const void* ptr) const override;
71 
72   size_t AllocatedSize(const void* ptr) const override;
73 
74   int64 AllocationId(const void* ptr) const override;
75 
76   absl::optional<AllocatorStats> GetStats() override;
77 
78   bool ClearStats() override;
79 
SetTimingCounter(SharedCounter * sc)80   void SetTimingCounter(SharedCounter* sc) { timing_counter_ = sc; }
81 
82   void SetSafeFrontier(uint64 count) override;
83 
ShouldRecordOpName()84   bool ShouldRecordOpName() const { return true; }
85 
86   MemoryDump RecordMemoryMap();
87 
88  protected:
89   // This setting controls when a chunk should be split, if its size exceeds the
90   // requested allocation size. It is not expected to be changed after
91   // initialization.
SetInternalFragmentationFraction(double fraction)92   void SetInternalFragmentationFraction(double fraction) {
93     internal_fragmentation_fraction_ = fraction;
94   }
95 
96  private:
97   struct Bin;
98 
99   void* AllocateRawInternal(size_t alignment, size_t num_bytes,
100                             bool dump_log_on_failure,
101                             uint64 freed_before_count);
102 
103   void* AllocateRawInternalWithRetry(
104       size_t alignment, size_t num_bytes,
105       const AllocationAttributes& allocation_attr);
106 
107   void DeallocateRawInternal(void* ptr);
108 
109   // Chunks whose freed_at_count is later than the safe frontier value are kept
110   // on a special list and not subject to merging immediately upon being freed.
111   //
112   // This function sweeps that list looking for Chunks whose timestamp is now
113   // safe. When found their freed_at_count is set to 0 and we attempt to merge
114   // them with their neighbors.
115   //
116   // If required_bytes > 0 then this function is being called in the context of
117   // a need for this many bytes that could not be satisfied without merging
118   // unsafe chunks, so we go ahead and merge the unsafe chunks too, just up to
119   // the point that a free chunk of required_bytes is produced.  Note that
120   // unsafe merged chunks adopt the most conservative timestamp from their
121   // constituents so they're only useful for allocations not requiring a
122   // particular timestamp.
123   bool MergeTimestampedChunks(size_t required_bytes)
124       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
125 
126   // Return the largest free chunk bytes from the largest bin in constant time.
127   // The free chunks are sorted by size (and then address) in a bin.
128   int64 LargestFreeChunk() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
129 
130   // Add TraceMe (in memory allocation and deallocation) for memory stats
131   // profiling. The chunk_ptr is passed to get information such as address,
132   // chunk size and requested_size.
133   void AddTraceMe(absl::string_view traceme_name, const void* ptr)
134       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
135 
136   // Overloaded AddTraceMe function with chunk information.
137   void AddTraceMe(absl::string_view traceme_name, const void* chunk_ptr,
138                   int64_t req_bytes, int64_t alloc_bytes)
139       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
140 
141   // A ChunkHandle is an index into the chunks_ vector in BFCAllocator
142   // kInvalidChunkHandle means an invalid chunk
143   typedef size_t ChunkHandle;
144   static constexpr ChunkHandle kInvalidChunkHandle = SIZE_MAX;
145 
146   typedef int BinNum;
147   static constexpr int kInvalidBinNum = -1;
148   // The following means that the largest bin'd chunk size is 256 << 21 = 512MB.
149   static constexpr int kNumBins = 21;
150 
151   // A Chunk points to a piece of memory that's either entirely free or entirely
152   // in use by one user memory allocation.
153   //
154   // An AllocationRegion's memory is split up into one or more disjoint Chunks,
155   // which together cover the whole region without gaps.  Chunks participate in
156   // a doubly-linked list, and the prev/next pointers point to the physically
157   // adjacent chunks.
158   //
159   // Since a chunk cannot be partially in use, we may need to split a free chunk
160   // in order to service a user allocation.  We always merge adjacent free
161   // chunks.
162   //
163   // Chunks contain information about whether they are in use or whether they
164   // are free, and contain a pointer to the bin they are in.
165   struct Chunk {
166     size_t size = 0;  // Full size of buffer.
167 
168     // We sometimes give chunks that are larger than needed to reduce
169     // fragmentation.  requested_size keeps track of what the client
170     // actually wanted so we can understand whether our splitting
171     // strategy is efficient.
172     size_t requested_size = 0;
173 
174     // allocation_id is set to -1 when the chunk is not in use. It is assigned a
175     // value greater than zero before the chunk is returned from
176     // AllocateRaw, and this value is unique among values assigned by
177     // the parent allocator.
178     int64 allocation_id = -1;
179     void* ptr = nullptr;  // pointer to granted subbuffer.
180 
181     // If not kInvalidChunkHandle, the memory referred to by 'prev' is directly
182     // preceding the memory used by this chunk.  E.g., It should start
183     // at 'ptr - prev->size'
184     ChunkHandle prev = kInvalidChunkHandle;
185 
186     // If not kInvalidChunkHandle, the memory referred to by 'next' is directly
187     // following the memory used by this chunk.  E.g., It should be at
188     // 'ptr + size'
189     ChunkHandle next = kInvalidChunkHandle;
190 
191     // What bin are we in?
192     BinNum bin_num = kInvalidBinNum;
193 
194     // Optional count when this chunk was most recently made free.
195     uint64 freed_at_count = 0;
196 
in_useChunk197     bool in_use() const { return allocation_id != -1; }
198 
199 #ifdef TENSORFLOW_MEM_DEBUG
200     // optional debugging info
201     const char* op_name = nullptr;
202     uint64 step_id = 0;
203     int64 action_count = 0;
204 #endif
205 
DebugStringChunk206     string DebugString(BFCAllocator* a,
207                        bool recurse) TF_NO_THREAD_SAFETY_ANALYSIS {
208       string dbg;
209       strings::StrAppend(
210           &dbg, "  Size: ", strings::HumanReadableNumBytes(size),
211           " | Requested Size: ", strings::HumanReadableNumBytes(requested_size),
212           " | in_use: ", in_use(), " | bin_num: ", bin_num);
213       if (recurse && prev != BFCAllocator::kInvalidChunkHandle) {
214         Chunk* p = a->ChunkFromHandle(prev);
215         strings::StrAppend(&dbg, ", prev: ", p->DebugString(a, false));
216       }
217       if (recurse && next != BFCAllocator::kInvalidChunkHandle) {
218         Chunk* n = a->ChunkFromHandle(next);
219         strings::StrAppend(&dbg, ", next: ", n->DebugString(a, false));
220       }
221 #ifdef TENSORFLOW_MEM_DEBUG
222       strings::StrAppend(&dbg, ", for: ", op_name ? op_name : "UNKNOWN",
223                          ", stepid: ", step_id,
224                          ", last_action: ", action_count);
225 #endif
226       return dbg;
227     }
228   };
229 
230   // A Bin is a collection of similar-sized free chunks.
231   // Allocated chunks are never in a Bin.
232   struct Bin {
233     // All chunks in this bin have >= bin_size memory.
234     size_t bin_size = 0;
235 
236     class ChunkComparator {
237      public:
ChunkComparatorBin238       explicit ChunkComparator(BFCAllocator* allocator)
239           : allocator_(allocator) {}
240       // Sort first by size and then use pointer address as a tie breaker.
operatorBin241       bool operator()(const ChunkHandle ha,
242                       const ChunkHandle hb) const TF_NO_THREAD_SAFETY_ANALYSIS {
243         const Chunk* a = allocator_->ChunkFromHandle(ha);
244         const Chunk* b = allocator_->ChunkFromHandle(hb);
245         if (a->size != b->size) {
246           return a->size < b->size;
247         }
248         return a->ptr < b->ptr;
249       }
250 
251      private:
252       BFCAllocator* allocator_;  // The parent allocator
253     };
254 
255     typedef std::set<ChunkHandle, ChunkComparator> FreeChunkSet;
256     // List of free chunks within the bin, sorted by chunk size.
257     // Chunk * not owned.
258     FreeChunkSet free_chunks;
BinBin259     Bin(BFCAllocator* allocator, size_t bs)
260         : bin_size(bs), free_chunks(ChunkComparator(allocator)) {}
261   };
262 
263   static constexpr size_t kMinAllocationBits = 8;
264   static constexpr size_t kMinAllocationSize = 1 << kMinAllocationBits;
265 
266   // BFCAllocator allocates memory into a collection of disjoint
267   // AllocationRegions.  Each AllocationRegion corresponds to one call to
268   // SubAllocator::Alloc().  (Actually, if a subsequent call to
269   // SubAllocator::Alloc() returns another region immediately adjacent to the
270   // last, it will be used to extend the first AllocationRegion, not create a
271   // separate one.)
272   //
273   // An AllocationRegion contains one or more Chunks, covering all of its
274   // memory.  Its primary job is to map pointers to ChunkHandles.
275   //
276   // This class is thread-compatible.
277   class AllocationRegion {
278    public:
AllocationRegion(void * ptr,size_t memory_size)279     AllocationRegion(void* ptr, size_t memory_size)
280         : ptr_(ptr),
281           memory_size_(memory_size),
282           end_ptr_(
283               static_cast<void*>(static_cast<char*>(ptr_) + memory_size_)) {
284       DCHECK_EQ(0, memory_size % kMinAllocationSize);
285       const size_t n_handles =
286           (memory_size + kMinAllocationSize - 1) / kMinAllocationSize;
287       handles_.resize(n_handles, kInvalidChunkHandle);
288     }
289 
290     AllocationRegion() = default;
AllocationRegion(AllocationRegion && other)291     AllocationRegion(AllocationRegion&& other) { Swap(&other); }
292     AllocationRegion& operator=(AllocationRegion&& other) {
293       Swap(&other);
294       return *this;
295     }
296 
ptr()297     void* ptr() const { return ptr_; }
end_ptr()298     void* end_ptr() const { return end_ptr_; }
memory_size()299     size_t memory_size() const { return memory_size_; }
extend(size_t size)300     void extend(size_t size) {
301       memory_size_ += size;
302       DCHECK_EQ(0, memory_size_ % kMinAllocationSize);
303 
304       end_ptr_ = static_cast<void*>(static_cast<char*>(end_ptr_) + size);
305       const size_t n_handles =
306           (memory_size_ + kMinAllocationSize - 1) / kMinAllocationSize;
307       handles_.resize(n_handles, kInvalidChunkHandle);
308     }
get_handle(const void * p)309     ChunkHandle get_handle(const void* p) const {
310       return handles_[IndexFor(p)];
311     }
set_handle(const void * p,ChunkHandle h)312     void set_handle(const void* p, ChunkHandle h) { handles_[IndexFor(p)] = h; }
erase(const void * p)313     void erase(const void* p) { set_handle(p, kInvalidChunkHandle); }
314 
315    private:
Swap(AllocationRegion * other)316     void Swap(AllocationRegion* other) {
317       std::swap(ptr_, other->ptr_);
318       std::swap(memory_size_, other->memory_size_);
319       std::swap(end_ptr_, other->end_ptr_);
320       std::swap(handles_, other->handles_);
321     }
322 
IndexFor(const void * p)323     size_t IndexFor(const void* p) const {
324       std::uintptr_t p_int = reinterpret_cast<std::uintptr_t>(p);
325       std::uintptr_t base_int = reinterpret_cast<std::uintptr_t>(ptr_);
326       DCHECK_GE(p_int, base_int);
327       DCHECK_LT(p_int, base_int + memory_size_);
328       return static_cast<size_t>(((p_int - base_int) >> kMinAllocationBits));
329     }
330 
331     // Metadata about the allocation region.
332     void* ptr_ = nullptr;
333     size_t memory_size_ = 0;
334     void* end_ptr_ = nullptr;
335 
336     // Array of size "memory_size / kMinAllocationSize".  It is
337     // indexed by (p-base) / kMinAllocationSize, contains ChunkHandle
338     // for the memory allocation represented by "p"
339     std::vector<ChunkHandle> handles_;
340 
341     TF_DISALLOW_COPY_AND_ASSIGN(AllocationRegion);
342   };
343 
344   // RegionManager aggregates one or more "AllocationRegions" and provides
345   // a layer of indirection from pointers to the underlying ChunkHandle,
346   // allowing allocation across multiple discontiguous memory regions.
347   //
348   // This class is thread-compatible.
349   class RegionManager {
350    public:
RegionManager()351     RegionManager() {}
~RegionManager()352     ~RegionManager() {}
353 
AddAllocationRegion(void * ptr,size_t memory_size)354     void AddAllocationRegion(void* ptr, size_t memory_size) {
355       // Insert sorted by end_ptr.
356       auto entry =
357           std::upper_bound(regions_.begin(), regions_.end(), ptr, &Comparator);
358       regions_.insert(entry, AllocationRegion(ptr, memory_size));
359     }
360 
361     // Adds an alloation region for the given ptr and size, potentially
362     // extending a region if ptr matches the end_ptr of an existing region.
363     // If a region is extended, returns a pointer to the extended region so that
364     // the BFC allocator can reason about chunkification.
AddOrExtendAllocationRegion(void * ptr,size_t memory_size)365     AllocationRegion* AddOrExtendAllocationRegion(void* ptr,
366                                                   size_t memory_size) {
367       // Insert sorted by end_ptr.
368       auto entry =
369           std::upper_bound(regions_.begin(), regions_.end(), ptr, &Comparator);
370       // Check if can be coalesced with preceding region.
371       if (entry != regions_.begin()) {
372         auto preceding_region = entry - 1;
373         if (preceding_region->end_ptr() == ptr) {
374           if (VLOG_IS_ON(1)) {
375             LOG(INFO) << "Extending region " << preceding_region->ptr()
376                       << " of "
377                       << strings::HumanReadableNumBytes(
378                              preceding_region->memory_size())
379                       << "  by " << strings::HumanReadableNumBytes(memory_size)
380                       << " bytes";
381           }
382           preceding_region->extend(memory_size);
383           return &*preceding_region;
384         }
385       }
386       VLOG(1) << "Inserting new region " << ptr << " of "
387               << strings::HumanReadableNumBytes(memory_size);
388       regions_.insert(entry, AllocationRegion(ptr, memory_size));
389       return nullptr;
390     }
391 
RemoveAllocationRegion(std::vector<AllocationRegion>::iterator it)392     std::vector<AllocationRegion>::iterator RemoveAllocationRegion(
393         std::vector<AllocationRegion>::iterator it) {
394       return regions_.erase(it);
395     }
396 
get_handle(const void * p)397     ChunkHandle get_handle(const void* p) const {
398       return RegionFor(p)->get_handle(p);
399     }
400 
set_handle(const void * p,ChunkHandle h)401     void set_handle(const void* p, ChunkHandle h) {
402       return MutableRegionFor(p)->set_handle(p, h);
403     }
erase(const void * p)404     void erase(const void* p) { return MutableRegionFor(p)->erase(p); }
405 
regions()406     const std::vector<AllocationRegion>& regions() const { return regions_; }
407 
408    private:
Comparator(const void * ptr,const AllocationRegion & other)409     static bool Comparator(const void* ptr, const AllocationRegion& other) {
410       return ptr < other.end_ptr();
411     }
412 
MutableRegionFor(const void * p)413     AllocationRegion* MutableRegionFor(const void* p) {
414       return const_cast<AllocationRegion*>(RegionFor(p));
415     }
416 
RegionFor(const void * p)417     const AllocationRegion* RegionFor(const void* p) const {
418       auto entry =
419           std::upper_bound(regions_.begin(), regions_.end(), p, &Comparator);
420 
421       if (entry != regions_.end()) {
422         return &(*entry);
423       }
424 
425       LOG(FATAL) << "Could not find Region for " << p;
426       return nullptr;
427     }
428 
429    private:
430     std::vector<AllocationRegion> regions_;
431   };
432 
433   // Returns 'bytes' rounded up to the next highest kMinAllocationSize.
434   static size_t RoundedBytes(size_t bytes);
435 
436   // Try to add a new memory region that can satisfy an allocation of
437   // 'rounded_bytes' bytes.  Returns true on success and false on
438   // failure.
439   bool Extend(size_t alignment, size_t rounded_bytes)
440       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
441 
442   // Deallocate free regions to give back the memory to suballocator, so that
443   // we can re-allocate a larger region.  The main use scenario of this function
444   // is when OOM happens but we have free regions and the sum of sizes of free
445   // regions and unallocated bytes is larger than the requested size, implying
446   // (external) memory fragmentation.  Returns true if any free regions are
447   // found and freed; false otherwise.
448   bool DeallocateFreeRegions(size_t rounded_bytes);
449 
450   // Helper function to deallocate regions.
451   void DeallocateRegions(const absl::flat_hash_set<void*>& region_ptrs)
452       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
453 
454   // Returns a pointer to an underlying allocated chunk of size
455   // 'rounded_bytes'.
456   void* FindChunkPtr(BinNum bin_num, size_t rounded_bytes, size_t num_bytes,
457                      uint64 freed_before) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
458 
459   // Splits the chunk specified by 'h' into two chunks, one at least
460   // of size 'num_bytes'.
461   void SplitChunk(ChunkHandle h, size_t num_bytes)
462       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
463 
464   // Merges the two chunk handles.  Requires that the chunks are
465   // contiguous in their allocation.
466   void Merge(ChunkHandle h, ChunkHandle h2) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
467 
468   // Adds the chunk 'h' to the proper free bin.
469   void InsertFreeChunkIntoBin(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
470 
471   // Removes the free chunk pointed to by 'c' from the set free_chunks.
472   void RemoveFreeChunkIterFromBin(Bin::FreeChunkSet* free_chunks,
473                                   const Bin::FreeChunkSet::iterator& c)
474       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
475 
476   // Removes a free chunk from the bin.
477   void RemoveFreeChunkFromBin(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
478   void MaybeRemoveFreeChunkFromBin(ChunkHandle h)
479       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
480 
481   // Removes the chunk metadata represented by 'h'.
482   void DeleteChunk(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
483 
484   string RenderOccupancy() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
485   void DumpMemoryLog(size_t num_bytes) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
486   MemoryDump RecordMemoryMapInternal() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
487   void MaybeWriteMemoryMap() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
488 
489   ChunkHandle AllocateChunk() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
490   void DeallocateChunk(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
491 
492   Chunk* ChunkFromHandle(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
493   const Chunk* ChunkFromHandle(ChunkHandle h) const
494       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
495 
496   void MarkFree(ChunkHandle h) TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
497 
498   ChunkHandle TryToCoalesce(ChunkHandle h, bool ignore_freed_at)
499       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
500 
501   // Fragmentation is calculated as the reverse ratio of the largest free chunk
502   // size over total free memory, and returns a value within [0, 1].
503   double GetFragmentation() TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
504 
505   // Information about a Bin that is useful for debugging.
506   struct BinDebugInfo {
507     size_t total_bytes_in_use = 0;
508     size_t total_bytes_in_bin = 0;
509     size_t total_requested_bytes_in_use = 0;
510     size_t total_chunks_in_use = 0;
511     size_t total_chunks_in_bin = 0;
512   };
513 
514   // Computes and returns a BinDebugInfo for each Bin.
515   std::array<BinDebugInfo, kNumBins> get_bin_debug_info()
516       TF_EXCLUSIVE_LOCKS_REQUIRED(lock_);
517 
518   AllocatorRetry retry_helper_;
519 
520   // Structures immutable after construction
521   size_t memory_limit_ = 0;
522 
Log2FloorNonZeroSlow(uint64 n)523   inline int Log2FloorNonZeroSlow(uint64 n) {
524     int r = 0;
525     while (n > 0) {
526       r++;
527       n >>= 1;
528     }
529     return r - 1;
530   }
531 
532   // Returns floor(log2(n)).
Log2FloorNonZero(uint64 n)533   inline int Log2FloorNonZero(uint64 n) {
534 #if defined(__GNUC__)
535     return 63 ^ __builtin_clzll(n);
536 #elif defined(PLATFORM_WINDOWS) && (_WIN64)
537     unsigned long index;
538     _BitScanReverse64(&index, n);
539     return index;
540 #else
541     return Log2FloorNonZeroSlow(n);
542 #endif
543   }
544 
545   // Map from bin size to Bin
BinFromIndex(BinNum index)546   Bin* BinFromIndex(BinNum index) {
547     return reinterpret_cast<Bin*>(&(bins_space_[index * sizeof(Bin)]));
548   }
BinNumToSize(BinNum index)549   size_t BinNumToSize(BinNum index) {
550     return static_cast<size_t>(256) << index;
551   }
BinNumForSize(size_t bytes)552   BinNum BinNumForSize(size_t bytes) {
553     uint64 v = std::max<size_t>(bytes, 256) >> kMinAllocationBits;
554     int b = std::min(kNumBins - 1, Log2FloorNonZero(v));
555     return b;
556   }
BinForSize(size_t bytes)557   Bin* BinForSize(size_t bytes) { return BinFromIndex(BinNumForSize(bytes)); }
558 
559   char bins_space_[sizeof(Bin) * kNumBins];
560 
561   // The size of the current region allocation.
562   size_t curr_region_allocation_bytes_;
563 
564   // The total number of allocated bytes by the allocator.
565   size_t total_region_allocated_bytes_ = 0;
566 
567   // An indicator that expansion of a region has hit the limits
568   // of the available memory.
569   bool started_backpedal_ = false;
570 
571   // Whether the allocator will deallocate free regions to avoid OOM due to
572   // memory fragmentation.
573   const bool garbage_collection_;
574 
575   // Whether the allocator will coalesce adjacent sub allocator provided
576   // AllocationRegions. This may be disabled if discrete sub allocator
577   // regions can't be treated as contiguous (e.g. if the allocation refers to
578   // device visible memory which is not adjacent to the other region in the
579   // device's address space).
580   const bool coalesce_regions_;
581 
582   std::unique_ptr<SubAllocator> sub_allocator_;
583   string name_;
584   SharedCounter* timing_counter_ = nullptr;
585   std::deque<ChunkHandle> timestamped_chunks_;
586 
587   double internal_fragmentation_fraction_ = {0.0};
588 
589   std::atomic<uint64> safe_frontier_ = {0};
590 
591   // Structures mutable after construction
592   mutable mutex lock_;
593   RegionManager region_manager_ TF_GUARDED_BY(lock_);
594 
595   std::vector<Chunk> chunks_ TF_GUARDED_BY(lock_);
596 
597   // Pointer to head of linked list of free Chunks
598   ChunkHandle free_chunks_list_ TF_GUARDED_BY(lock_);
599 
600   // Counter containing the next unique identifier to assign to a
601   // newly-created chunk.
602   int64 next_allocation_id_ TF_GUARDED_BY(lock_);
603 
604   // Stats.
605   AllocatorStats stats_ TF_GUARDED_BY(lock_);
606 #ifdef TENSORFLOW_MEM_DEBUG
607   int64 action_counter_ = 0 TF_GUARDED_BY(lock_);
608 #define MEM_DEBUG_SIZE_HISTORY_SIZE 4096
609   int64 size_history_[MEM_DEBUG_SIZE_HISTORY_SIZE];
610 #endif
611 
612   friend class GPUBFCAllocatorPrivateMethodsTest;
613   friend class GPUBFCAllocatorPrivateMethodsTest_SubAllocatorSpecific;
614   TF_DISALLOW_COPY_AND_ASSIGN(BFCAllocator);
615 };
616 
617 }  // namespace tensorflow
618 
619 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_BFC_ALLOCATOR_H_
620