1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <atomic>
17
18 #include "tensorflow/core/common_runtime/bfc_allocator.h"
19
20 #include "tensorflow/core/common_runtime/allocator_retry.h"
21 #include "tensorflow/core/framework/device_base.h"
22 #include "tensorflow/core/lib/core/bits.h"
23 #include "tensorflow/core/lib/gtl/stl_util.h"
24 #include "tensorflow/core/lib/strings/numbers.h"
25 #include "tensorflow/core/lib/strings/str_util.h"
26 #include "tensorflow/core/lib/strings/strcat.h"
27 #include "tensorflow/core/platform/logging.h"
28 #include "tensorflow/core/platform/mutex.h"
29 #include "tensorflow/core/platform/types.h"
30
31 namespace tensorflow {
32
BFCAllocator(SubAllocator * sub_allocator,size_t total_memory,bool allow_growth,const string & name)33 BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
34 bool allow_growth, const string& name)
35 : sub_allocator_(sub_allocator),
36 name_(name),
37 free_chunks_list_(kInvalidChunkHandle),
38 next_allocation_id_(1) {
39 if (allow_growth) {
40 // 1MiB smallest initial allocation, unless total memory available
41 // is less.
42 curr_region_allocation_bytes_ =
43 RoundedBytes(std::min(total_memory, size_t{1048576}));
44 } else {
45 curr_region_allocation_bytes_ = RoundedBytes(total_memory);
46 }
47
48 // Allocate the requested amount of memory.
49 memory_limit_ = total_memory;
50 stats_.bytes_limit = static_cast<int64>(total_memory);
51
52 // Create a bunch of bins of various good sizes.
53
54 // We create bins to fit all possible ranges that cover the
55 // memory_limit_ starting from allocations up to 256 bytes to
56 // allocations up to (and including) the memory limit.
57 for (BinNum b = 0; b < kNumBins; b++) {
58 size_t bin_size = BinNumToSize(b);
59 VLOG(1) << "Creating bin of max chunk size "
60 << strings::HumanReadableNumBytes(bin_size);
61 new (BinFromIndex(b)) Bin(this, bin_size);
62 CHECK_EQ(BinForSize(bin_size), BinFromIndex(b));
63 CHECK_EQ(BinForSize(bin_size + 255), BinFromIndex(b));
64 CHECK_EQ(BinForSize(bin_size * 2 - 1), BinFromIndex(b));
65 if (b + 1 < kNumBins) {
66 CHECK_NE(BinForSize(bin_size * 2), BinFromIndex(b));
67 }
68 }
69 }
70
~BFCAllocator()71 BFCAllocator::~BFCAllocator() {
72 // Return memory back.
73 VLOG(2) << "Number of regions allocated: "
74 << region_manager_.regions().size();
75 for (const auto& region : region_manager_.regions()) {
76 sub_allocator_->Free(region.ptr(), region.memory_size());
77 }
78
79 for (BinNum b = 0; b < kNumBins; b++) {
80 BinFromIndex(b)->~Bin();
81 }
82 }
83
ChunkFromHandle(ChunkHandle h)84 BFCAllocator::Chunk* BFCAllocator::ChunkFromHandle(ChunkHandle h) {
85 DCHECK_GE(h, 0);
86 DCHECK_LT(h, static_cast<int>(chunks_.size()));
87 return &(chunks_[h]);
88 }
89
Extend(size_t alignment,size_t rounded_bytes)90 bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
91 size_t available_bytes = memory_limit_ - total_region_allocated_bytes_;
92 // Rounds available_bytes down to the nearest multiple of kMinAllocationSize.
93 available_bytes = (available_bytes / kMinAllocationSize) * kMinAllocationSize;
94
95 // Do we have enough space to handle the client's request?
96 // If not, fail immediately.
97 if (rounded_bytes > available_bytes) {
98 return false;
99 }
100
101 // If curr_region_allocation_bytes_ is not enough to satisfy the
102 // allocation, keep multiplying by a power of two until that is
103 // sufficient.
104 bool increased_allocation = false;
105 while (rounded_bytes > curr_region_allocation_bytes_) {
106 curr_region_allocation_bytes_ *= 2;
107 increased_allocation = true;
108 }
109
110 // Try allocating.
111 size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes);
112 void* mem_addr = sub_allocator_->Alloc(alignment, bytes);
113 if (mem_addr == nullptr && !started_backpedal_) {
114 // Only backpedal once.
115 started_backpedal_ = true;
116
117 static constexpr float kBackpedalFactor = 0.9;
118
119 // Try allocating less memory.
120 while (mem_addr == nullptr) {
121 bytes = RoundedBytes(bytes * kBackpedalFactor);
122 if (bytes < rounded_bytes) break;
123 mem_addr = sub_allocator_->Alloc(alignment, bytes);
124 }
125 }
126
127 if (mem_addr == nullptr) {
128 return false;
129 }
130
131 if (!increased_allocation) {
132 // Increase the region size of the next required allocation.
133 curr_region_allocation_bytes_ *= 2;
134 }
135
136 VLOG(1) << "Extending allocation by " << strings::HumanReadableNumBytes(bytes)
137 << " bytes.";
138
139 total_region_allocated_bytes_ += bytes;
140 VLOG(1) << "Total allocated bytes: "
141 << strings::HumanReadableNumBytes(total_region_allocated_bytes_);
142
143 VLOG(1) << "Allocated memory at " << mem_addr << " to "
144 << static_cast<void*>(static_cast<char*>(mem_addr) + bytes);
145 region_manager_.AddAllocationRegion(mem_addr, bytes);
146
147 // Create one large chunk for the whole memory space that will
148 // be chunked later.
149 ChunkHandle h = AllocateChunk();
150 BFCAllocator::Chunk* c = ChunkFromHandle(h);
151 c->ptr = mem_addr;
152 c->size = bytes;
153 c->allocation_id = -1;
154 c->prev = kInvalidChunkHandle;
155 c->next = kInvalidChunkHandle;
156 c->freed_count = 0;
157
158 region_manager_.set_handle(c->ptr, h);
159
160 // Insert the chunk into the right bin.
161 InsertFreeChunkIntoBin(h);
162
163 return true;
164 }
165
AllocateChunk()166 BFCAllocator::ChunkHandle BFCAllocator::AllocateChunk() {
167 if (free_chunks_list_ != kInvalidChunkHandle) {
168 ChunkHandle h = free_chunks_list_;
169 Chunk* c = ChunkFromHandle(h);
170 free_chunks_list_ = c->next;
171 return h;
172 } else {
173 ChunkHandle h = chunks_.size();
174 chunks_.resize(h + 1);
175 return h;
176 }
177 }
178
DeallocateChunk(ChunkHandle h)179 void BFCAllocator::DeallocateChunk(ChunkHandle h) {
180 Chunk* c = ChunkFromHandle(h);
181 c->next = free_chunks_list_;
182 free_chunks_list_ = h;
183 }
184
AllocateRawInternalWithRetry(size_t unused_alignment,size_t num_bytes,const AllocationAttributes & allocation_attr)185 void* BFCAllocator::AllocateRawInternalWithRetry(
186 size_t unused_alignment, size_t num_bytes,
187 const AllocationAttributes& allocation_attr) {
188 // Fast path: Try once to allocate without getting the retry_helper_ involved
189 uint64 freed_by_count = 0;
190 if (allocation_attr.freed_by_func != nullptr) {
191 freed_by_count = allocation_attr.freed_by_func();
192 }
193 void* r =
194 AllocateRawInternal(unused_alignment, num_bytes, false, freed_by_count);
195 if (r != nullptr) {
196 return r;
197 } else {
198 static const int64 kMaxMillisToWait = 10000; // 10 seconds
199 r = retry_helper_.AllocateRaw(
200 [this, &allocation_attr](size_t a, size_t nb, bool v) {
201 uint64 freed_by_count = 0;
202 if (allocation_attr.freed_by_func != nullptr) {
203 freed_by_count = allocation_attr.freed_by_func();
204 }
205 return AllocateRawInternal(a, nb, v, freed_by_count);
206 },
207 kMaxMillisToWait, unused_alignment, num_bytes);
208 return r;
209 }
210 }
211
AllocateRaw(size_t unused_alignment,size_t num_bytes,const AllocationAttributes & allocation_attr)212 void* BFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes,
213 const AllocationAttributes& allocation_attr) {
214 VLOG(1) << "AllocateRaw " << Name() << " " << num_bytes;
215 if (allocation_attr.no_retry_on_failure) {
216 // Return immediately upon the first failure if this is for allocating an
217 // optional scratch space.
218 bool dump_log_on_failure = VLOG_IS_ON(2);
219 uint64 freed_by_count = 0;
220 if (allocation_attr.freed_by_func != nullptr) {
221 freed_by_count = allocation_attr.freed_by_func();
222 }
223 void* result = AllocateRawInternal(unused_alignment, num_bytes,
224 dump_log_on_failure, freed_by_count);
225 if (result == nullptr) {
226 static std::atomic<int32> log_counter{0};
227 int32 counter_value = log_counter.load(std::memory_order_relaxed);
228 if (counter_value < 10) {
229 log_counter.store(counter_value + 1, std::memory_order_relaxed);
230 LOG(WARNING)
231 << "Allocator (" << Name() << ") ran out of memory trying "
232 << "to allocate " << strings::HumanReadableNumBytes(num_bytes)
233 << ". The caller indicates that this is not a failure, but"
234 << " may mean that there could be performance gains if more"
235 << " memory were available.";
236 }
237 }
238 return result;
239 } else {
240 return AllocateRawInternalWithRetry(unused_alignment, num_bytes,
241 allocation_attr);
242 }
243 }
244
245 // static
RoundedBytes(size_t bytes)246 size_t BFCAllocator::RoundedBytes(size_t bytes) {
247 size_t rounded_bytes =
248 (kMinAllocationSize *
249 ((bytes + kMinAllocationSize - 1) / kMinAllocationSize));
250 DCHECK_EQ(size_t{0}, rounded_bytes % kMinAllocationSize);
251 return rounded_bytes;
252 }
253
AllocateRawInternal(size_t unused_alignment,size_t num_bytes,bool dump_log_on_failure,uint64 freed_before)254 void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
255 size_t num_bytes,
256 bool dump_log_on_failure,
257 uint64 freed_before) {
258 if (num_bytes == 0) {
259 LOG(ERROR) << "tried to allocate 0 bytes";
260 return nullptr;
261 }
262 // First, always allocate memory of at least kMinAllocationSize
263 // bytes, and always allocate multiples of kMinAllocationSize bytes
264 // so all memory addresses are nicely byte aligned.
265 size_t rounded_bytes = RoundedBytes(num_bytes);
266
267 // The BFC allocator tries to find the best fit first.
268 BinNum bin_num = BinNumForSize(rounded_bytes);
269
270 mutex_lock l(lock_);
271 void* ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
272 if (ptr != nullptr) {
273 return ptr;
274 }
275
276 // Try to extend
277 if (Extend(unused_alignment, rounded_bytes)) {
278 ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
279 if (ptr != nullptr) {
280 return ptr;
281 }
282 }
283
284 // We searched all bins for an existing free chunk to use and
285 // couldn't find one. This means we must have run out of memory,
286 // Dump the memory log for analysis.
287 if (dump_log_on_failure) {
288 LOG(WARNING) << "Allocator (" << Name() << ") ran out of memory trying "
289 << "to allocate " << strings::HumanReadableNumBytes(num_bytes)
290 << ". Current allocation summary follows.";
291 DumpMemoryLog(rounded_bytes);
292 LOG(WARNING) << RenderOccupancy();
293 }
294 return nullptr;
295 }
296
FindChunkPtr(BinNum bin_num,size_t rounded_bytes,size_t num_bytes,uint64 freed_before)297 void* BFCAllocator::FindChunkPtr(BinNum bin_num, size_t rounded_bytes,
298 size_t num_bytes, uint64 freed_before) {
299 // First identify the first bin that could satisfy rounded_bytes.
300 for (; bin_num < kNumBins; bin_num++) {
301 // Start searching from the first bin for the smallest chunk that fits
302 // rounded_bytes.
303 Bin* b = BinFromIndex(bin_num);
304 for (auto citer = b->free_chunks.begin(); citer != b->free_chunks.end();
305 ++citer) {
306 const BFCAllocator::ChunkHandle h = (*citer);
307 BFCAllocator::Chunk* chunk = ChunkFromHandle(h);
308 DCHECK(!chunk->in_use());
309 if (freed_before > 0 && freed_before < chunk->freed_count) {
310 continue;
311 }
312 if (chunk->size >= rounded_bytes) {
313 // We found an existing chunk that fits us that wasn't in use, so remove
314 // it from the free bin structure prior to using.
315 RemoveFreeChunkIterFromBin(&b->free_chunks, citer);
316
317 // If we can break the size of the chunk into two reasonably large
318 // pieces, do so. In any case don't waste more than
319 // kMaxInternalFragmentation bytes on padding this alloc.
320 const int64 kMaxInternalFragmentation = 128 << 20; // 128mb
321 if (chunk->size >= rounded_bytes * 2 ||
322 static_cast<int64>(chunk->size) - rounded_bytes >=
323 kMaxInternalFragmentation) {
324 SplitChunk(h, rounded_bytes);
325 chunk = ChunkFromHandle(h); // Update chunk pointer in case it moved
326 }
327
328 // The requested size of the returned chunk is what the user
329 // has allocated.
330 chunk->requested_size = num_bytes;
331 // Assign a unique id and increment the id counter, marking the
332 // chunk as being in use.
333 chunk->allocation_id = next_allocation_id_++;
334
335 // Update stats.
336 ++stats_.num_allocs;
337 stats_.bytes_in_use += chunk->size;
338 stats_.peak_bytes_in_use =
339 std::max(stats_.peak_bytes_in_use, stats_.bytes_in_use);
340 stats_.largest_alloc_size =
341 std::max<std::size_t>(stats_.largest_alloc_size, chunk->size);
342
343 VLOG(4) << "Returning: " << chunk->ptr;
344 if (VLOG_IS_ON(4)) {
345 LOG(INFO) << "A: " << RenderOccupancy();
346 }
347 return chunk->ptr;
348 }
349 }
350 }
351
352 return nullptr;
353 }
354
SplitChunk(BFCAllocator::ChunkHandle h,size_t num_bytes)355 void BFCAllocator::SplitChunk(BFCAllocator::ChunkHandle h, size_t num_bytes) {
356 // Allocate the new chunk before we do any ChunkFromHandle
357 ChunkHandle h_new_chunk = AllocateChunk();
358
359 Chunk* c = ChunkFromHandle(h);
360 CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum));
361
362 // Create a new chunk starting num_bytes after c
363 BFCAllocator::Chunk* new_chunk = ChunkFromHandle(h_new_chunk);
364 new_chunk->ptr = static_cast<void*>(static_cast<char*>(c->ptr) + num_bytes);
365 region_manager_.set_handle(new_chunk->ptr, h_new_chunk);
366
367 // Set the new sizes of the chunks.
368 new_chunk->size = c->size - num_bytes;
369 c->size = num_bytes;
370
371 // The new chunk is not in use.
372 new_chunk->allocation_id = -1;
373
374 // It inherits the freed time.
375 new_chunk->freed_count = c->freed_count;
376
377 // Maintain the pointers.
378 // c <-> c_neighbor becomes
379 // c <-> new_chunk <-> c_neighbor
380 BFCAllocator::ChunkHandle h_neighbor = c->next;
381 new_chunk->prev = h;
382 new_chunk->next = h_neighbor;
383 c->next = h_new_chunk;
384 if (h_neighbor != kInvalidChunkHandle) {
385 Chunk* c_neighbor = ChunkFromHandle(h_neighbor);
386 c_neighbor->prev = h_new_chunk;
387 }
388
389 // Add the newly free chunk to the free bin.
390 InsertFreeChunkIntoBin(h_new_chunk);
391 }
392
DeallocateRaw(void * ptr)393 void BFCAllocator::DeallocateRaw(void* ptr) {
394 VLOG(1) << "DeallocateRaw " << Name() << " " << RequestedSize(ptr);
395 DeallocateRawInternal(ptr);
396 retry_helper_.NotifyDealloc();
397 }
398
DeallocateRawInternal(void * ptr)399 void BFCAllocator::DeallocateRawInternal(void* ptr) {
400 if (ptr == nullptr) {
401 LOG(ERROR) << "tried to deallocate nullptr";
402 return;
403 }
404 mutex_lock l(lock_);
405
406 // Find the chunk from the ptr.
407 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
408 CHECK(h != kInvalidChunkHandle);
409
410 // Consider coalescing it.
411 FreeAndMaybeCoalesce(h);
412
413 if (VLOG_IS_ON(4)) {
414 LOG(INFO) << "F: " << RenderOccupancy();
415 }
416 }
417
418 // Merges h1 and h2 when Chunk(h1)->next is h2 and Chunk(h2)->prev is c1.
419 // We merge Chunk(h2) into Chunk(h1).
Merge(BFCAllocator::ChunkHandle h1,BFCAllocator::ChunkHandle h2)420 void BFCAllocator::Merge(BFCAllocator::ChunkHandle h1,
421 BFCAllocator::ChunkHandle h2) {
422 Chunk* c1 = ChunkFromHandle(h1);
423 Chunk* c2 = ChunkFromHandle(h2);
424 // We can only merge chunks that are not in use.
425 CHECK(!c1->in_use() && !c2->in_use());
426
427 // c1's prev doesn't change, still points to the same ptr, and is
428 // still not in use.
429
430 // Fix up neighbor pointers
431 //
432 // c1 <-> c2 <-> c3 should become
433 // c1 <-> c3
434
435 BFCAllocator::ChunkHandle h3 = c2->next;
436 c1->next = h3;
437 CHECK(c2->prev == h1);
438 if (h3 != kInvalidChunkHandle) {
439 BFCAllocator::Chunk* c3 = ChunkFromHandle(h3);
440 c3->prev = h1;
441 }
442
443 // Set the new size
444 c1->size += c2->size;
445
446 // Pick latest free time.
447 c1->freed_count = std::max(c1->freed_count, c2->freed_count);
448
449 DeleteChunk(h2);
450 }
451
DeleteChunk(ChunkHandle h)452 void BFCAllocator::DeleteChunk(ChunkHandle h) {
453 // Delete h and cleanup all state
454 Chunk* c = ChunkFromHandle(h);
455 // VLOG(4) << "Removing: " << c->ptr;
456 region_manager_.erase(c->ptr);
457 DeallocateChunk(h);
458 }
459
InsertFreeChunkIntoBin(BFCAllocator::ChunkHandle h)460 void BFCAllocator::InsertFreeChunkIntoBin(BFCAllocator::ChunkHandle h) {
461 Chunk* c = ChunkFromHandle(h);
462 CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum));
463 BinNum bin_num = BinNumForSize(c->size);
464 Bin* new_bin = BinFromIndex(bin_num);
465 c->bin_num = bin_num;
466 new_bin->free_chunks.insert(h);
467 }
468
RemoveFreeChunkIterFromBin(BFCAllocator::Bin::FreeChunkSet * free_chunks,const BFCAllocator::Bin::FreeChunkSet::iterator & citer)469 void BFCAllocator::RemoveFreeChunkIterFromBin(
470 BFCAllocator::Bin::FreeChunkSet* free_chunks,
471 const BFCAllocator::Bin::FreeChunkSet::iterator& citer) {
472 ChunkHandle h = *citer;
473 Chunk* c = ChunkFromHandle(h);
474 CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum));
475 free_chunks->erase(citer);
476 c->bin_num = kInvalidBinNum;
477 }
478
RemoveFreeChunkFromBin(BFCAllocator::ChunkHandle h)479 void BFCAllocator::RemoveFreeChunkFromBin(BFCAllocator::ChunkHandle h) {
480 Chunk* c = ChunkFromHandle(h);
481 CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum));
482 CHECK_GT(BinFromIndex(c->bin_num)->free_chunks.erase(h), 0)
483 << "Could not find chunk in bin";
484 c->bin_num = kInvalidBinNum;
485 }
486
FreeAndMaybeCoalesce(BFCAllocator::ChunkHandle h)487 void BFCAllocator::FreeAndMaybeCoalesce(BFCAllocator::ChunkHandle h) {
488 Chunk* c = ChunkFromHandle(h);
489 CHECK(c->in_use() && (c->bin_num == kInvalidBinNum));
490
491 // Mark the chunk as no longer in use.
492 c->allocation_id = -1;
493
494 // Optionally record the free time.
495 if (timing_counter_) {
496 c->freed_count = timing_counter_->next();
497 }
498
499 // Updates the stats.
500 stats_.bytes_in_use -= c->size;
501
502 ChunkHandle coalesced_chunk = h;
503
504 // If the next chunk is free, merge it into c and delete it.
505 if (c->next != kInvalidChunkHandle && !ChunkFromHandle(c->next)->in_use()) {
506 // VLOG(8) << "Merging c->next " << ChunkFromHandle(c->next)->ptr
507 // << " with c " << c->ptr;
508 RemoveFreeChunkFromBin(c->next);
509 Merge(h, c->next);
510 }
511
512 // If the previous chunk is free, merge c into it and delete c.
513 if (c->prev != kInvalidChunkHandle && !ChunkFromHandle(c->prev)->in_use()) {
514 // VLOG(8) << "Merging c " << c->ptr << " into c->prev "
515 // << ChunkFromHandle(c->prev)->ptr;
516
517 coalesced_chunk = c->prev;
518 RemoveFreeChunkFromBin(c->prev);
519 Merge(c->prev, h);
520 }
521
522 InsertFreeChunkIntoBin(coalesced_chunk);
523 }
524
TracksAllocationSizes()525 bool BFCAllocator::TracksAllocationSizes() { return true; }
526
RequestedSize(const void * ptr)527 size_t BFCAllocator::RequestedSize(const void* ptr) {
528 mutex_lock l(lock_);
529 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
530 CHECK(h != kInvalidChunkHandle)
531 << "Asked for requested size of pointer we never allocated: " << ptr;
532 BFCAllocator::Chunk* c = ChunkFromHandle(h);
533 return c->requested_size;
534 }
535
AllocatedSize(const void * ptr)536 size_t BFCAllocator::AllocatedSize(const void* ptr) {
537 mutex_lock l(lock_);
538 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
539 CHECK(h != kInvalidChunkHandle)
540 << "Asked for allocated size of pointer we never allocated: " << ptr;
541 BFCAllocator::Chunk* c = ChunkFromHandle(h);
542 return c->size;
543 }
544
AllocationId(const void * ptr)545 int64 BFCAllocator::AllocationId(const void* ptr) {
546 mutex_lock l(lock_);
547 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
548 CHECK(h != kInvalidChunkHandle)
549 << "Asked for allocation id of pointer we never allocated: " << ptr;
550 BFCAllocator::Chunk* c = ChunkFromHandle(h);
551 return c->allocation_id;
552 }
553
554 namespace {
555
RenderRegion(char * rendered,const size_t resolution,const size_t total_render_size,const size_t offset,const void * base_ptr,const void * ptr,const size_t size,const char c)556 void RenderRegion(char* rendered, const size_t resolution,
557 const size_t total_render_size, const size_t offset,
558 const void* base_ptr, const void* ptr, const size_t size,
559 const char c) {
560 const char* base_ptr_c = static_cast<const char*>(base_ptr);
561 const char* ptr_c = static_cast<const char*>(ptr);
562
563 size_t start_location =
564 ((ptr_c - base_ptr_c + offset) * resolution) / total_render_size;
565 CHECK_GE(start_location, 0);
566 CHECK_LT(start_location, resolution);
567 size_t end_location =
568 ((ptr_c + size - 1 - base_ptr_c + offset) * resolution) /
569 total_render_size;
570 CHECK_GE(end_location, 0);
571 CHECK_LT(end_location, resolution);
572
573 for (size_t i = start_location; i <= end_location; ++i) {
574 rendered[i] = c;
575 }
576 }
577
578 } // namespace
579
RenderOccupancy()580 string BFCAllocator::RenderOccupancy() {
581 // Make a buffer for the ASCII-art representation.
582 const size_t resolution = 100;
583 char rendered[resolution];
584
585 // Compute the total region size to render over
586 size_t total_region_size = 0;
587 for (const auto& region : region_manager_.regions()) {
588 total_region_size += region.memory_size();
589 }
590
591 if (total_region_size == 0) {
592 return "<allocator contains no memory>";
593 }
594
595 // Start out with everything empty
596 RenderRegion(rendered, resolution, total_region_size, 0, nullptr, nullptr,
597 total_region_size, '_');
598
599 size_t region_offset = 0;
600 for (const auto& region : region_manager_.regions()) {
601 ChunkHandle h = region_manager_.get_handle(region.ptr());
602 // Then render each chunk left to right.
603 while (h != kInvalidChunkHandle) {
604 Chunk* c = ChunkFromHandle(h);
605 if (c->in_use()) {
606 // Render the wasted space
607 size_t wasted = c->size - c->requested_size;
608 if (wasted > 0) {
609 RenderRegion(rendered, resolution, total_region_size,
610 region_offset + c->requested_size, region.ptr(), c->ptr,
611 wasted, 'x');
612 }
613 // Then the occupied space
614 RenderRegion(rendered, resolution, total_region_size, region_offset,
615 region.ptr(), c->ptr, c->requested_size, '*');
616 }
617 h = c->next;
618 }
619 region_offset += region.memory_size();
620 }
621
622 return string(rendered, resolution);
623 }
624
DumpMemoryLog(size_t num_bytes)625 void BFCAllocator::DumpMemoryLog(size_t num_bytes) {
626 const std::array<BinDebugInfo, kNumBins> bin_infos = get_bin_debug_info();
627 for (BinNum bin_num = 0; bin_num < kNumBins; bin_num++) {
628 Bin* b = BinFromIndex(bin_num);
629 const BinDebugInfo& bin_info = bin_infos[bin_num];
630 CHECK_EQ(b->free_chunks.size(),
631 bin_info.total_chunks_in_bin - bin_info.total_chunks_in_use);
632
633 LOG(INFO) << "Bin (" << b->bin_size
634 << "): \tTotal Chunks: " << bin_info.total_chunks_in_bin
635 << ", Chunks in use: " << bin_info.total_chunks_in_use << ". "
636 << strings::HumanReadableNumBytes(bin_info.total_bytes_in_bin)
637 << " allocated for chunks. "
638 << strings::HumanReadableNumBytes(bin_info.total_bytes_in_use)
639 << " in use in bin. "
640 << strings::HumanReadableNumBytes(
641 bin_info.total_requested_bytes_in_use)
642 << " client-requested in use in bin.";
643 }
644
645 // Find the bin that we would have liked to allocate in, so we
646 // can get some further analysis about fragmentation.
647 Bin* b = BinForSize(num_bytes);
648
649 LOG(INFO) << "Bin for " << strings::HumanReadableNumBytes(num_bytes)
650 << " was " << strings::HumanReadableNumBytes(b->bin_size)
651 << ", Chunk State: ";
652
653 for (ChunkHandle h : b->free_chunks) {
654 Chunk* c = ChunkFromHandle(h);
655 LOG(INFO) << c->DebugString(this, true);
656 }
657
658 // Next show the chunks that are in use, and also summarize their
659 // number by size.
660 std::map<size_t, int> in_use_by_size;
661 for (const auto& region : region_manager_.regions()) {
662 ChunkHandle h = region_manager_.get_handle(region.ptr());
663 while (h != kInvalidChunkHandle) {
664 const Chunk* c = ChunkFromHandle(h);
665 if (c->in_use()) {
666 in_use_by_size[c->size]++;
667 }
668 LOG(INFO) << (c->in_use() ? "Chunk" : "Free ") << " at " << c->ptr
669 << " of size " << c->size
670 << (timing_counter_
671 ? strings::StrCat(" freed_count ", c->freed_count)
672 : "");
673 h = c->next;
674 }
675 }
676
677 LOG(INFO) << " Summary of in-use Chunks by size: ";
678 size_t total_bytes = 0;
679 for (auto& it : in_use_by_size) {
680 LOG(INFO) << it.second << " Chunks of size " << it.first << " totalling "
681 << strings::HumanReadableNumBytes(it.first * it.second);
682 total_bytes += (it.first * it.second);
683 }
684 LOG(INFO) << "Sum Total of in-use chunks: "
685 << strings::HumanReadableNumBytes(total_bytes);
686 LOG(INFO) << "Stats: \n" << stats_.DebugString();
687 }
688
GetStats()689 absl::optional<AllocatorStats> BFCAllocator::GetStats() {
690 mutex_lock l(lock_);
691 return stats_;
692 }
693
ClearStats()694 void BFCAllocator::ClearStats() {
695 mutex_lock l(lock_);
696 stats_.num_allocs = 0;
697 stats_.peak_bytes_in_use = stats_.bytes_in_use;
698 stats_.largest_alloc_size = 0;
699 }
700
701 std::array<BFCAllocator::BinDebugInfo, BFCAllocator::kNumBins>
get_bin_debug_info()702 BFCAllocator::get_bin_debug_info() {
703 std::array<BinDebugInfo, kNumBins> bin_infos;
704 for (const auto& region : region_manager_.regions()) {
705 ChunkHandle h = region_manager_.get_handle(region.ptr());
706 while (h != kInvalidChunkHandle) {
707 const Chunk* c = ChunkFromHandle(h);
708 BinNum bin_num = BinNumForSize(c->size);
709 BinDebugInfo& bin_info = bin_infos[bin_num];
710 bin_info.total_bytes_in_bin += c->size;
711 bin_info.total_chunks_in_bin++;
712 if (c->in_use()) {
713 bin_info.total_bytes_in_use += c->size;
714 bin_info.total_requested_bytes_in_use += c->requested_size;
715 bin_info.total_chunks_in_use++;
716 } else {
717 Bin* bin = BinFromIndex(bin_num);
718 CHECK_EQ(bin->free_chunks.count(h), 1);
719 CHECK_EQ(c->bin_num, bin_num);
720 }
721 h = c->next;
722 }
723 }
724 return bin_infos;
725 }
726
727 } // namespace tensorflow
728