1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/core/common_runtime/bfc_allocator.h"
17
18 #include <atomic>
19
20 #include "absl/strings/string_view.h"
21 #include "tensorflow/core/common_runtime/allocator_retry.h"
22 #include "tensorflow/core/lib/core/bits.h"
23 #include "tensorflow/core/lib/strings/numbers.h"
24 #include "tensorflow/core/lib/strings/str_util.h"
25 #include "tensorflow/core/lib/strings/strcat.h"
26 #include "tensorflow/core/platform/file_system.h"
27 #include "tensorflow/core/platform/logging.h"
28 #include "tensorflow/core/platform/mutex.h"
29 #include "tensorflow/core/platform/stacktrace.h"
30 #include "tensorflow/core/framework/tensor_shape.h"
31 #include "tensorflow/core/platform/types.h"
32 #include "tensorflow/core/profiler/lib/traceme.h"
33 #include "tensorflow/core/protobuf/bfc_memory_map.pb.h"
34
35 namespace tensorflow {
36
37 constexpr BFCAllocator::ChunkHandle BFCAllocator::kInvalidChunkHandle;
38 constexpr uint64 BFCAllocator::kMemDebugHistorySize;
39
BFCAllocator(SubAllocator * sub_allocator,size_t total_memory,bool allow_growth,const string & name,bool garbage_collection)40 BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
41 bool allow_growth, const string& name,
42 bool garbage_collection)
43 : garbage_collection_(garbage_collection),
44 coalesce_regions_(sub_allocator->SupportsCoalescing()),
45 sub_allocator_(sub_allocator),
46 name_(name),
47 free_chunks_list_(kInvalidChunkHandle),
48 next_allocation_id_(1) {
49 if (allow_growth) {
50 // 2MiB smallest initial allocation, unless total memory available
51 // is less.
52 curr_region_allocation_bytes_ =
53 RoundedBytes(std::min(total_memory, size_t{2 << 20}));
54 } else {
55 curr_region_allocation_bytes_ = RoundedBytes(total_memory);
56 }
57
58 // Allocate the requested amount of memory.
59 memory_limit_ = total_memory;
60 stats_.bytes_limit = static_cast<int64>(total_memory);
61
62 // Create a bunch of bins of various good sizes.
63
64 // We create bins to fit all possible ranges that cover the
65 // memory_limit_ starting from allocations up to 256 bytes to
66 // allocations up to (and including) the memory limit.
67 for (BinNum b = 0; b < kNumBins; b++) {
68 size_t bin_size = BinNumToSize(b);
69 VLOG(1) << "Creating bin of max chunk size "
70 << strings::HumanReadableNumBytes(bin_size);
71 new (BinFromIndex(b)) Bin(this, bin_size);
72 CHECK_EQ(BinForSize(bin_size), BinFromIndex(b));
73 CHECK_EQ(BinForSize(bin_size + 255), BinFromIndex(b));
74 CHECK_EQ(BinForSize(bin_size * 2 - 1), BinFromIndex(b));
75 if (b + 1 < kNumBins) {
76 CHECK_NE(BinForSize(bin_size * 2), BinFromIndex(b));
77 }
78 }
79 }
80
~BFCAllocator()81 BFCAllocator::~BFCAllocator() {
82 // Return memory back.
83 VLOG(2) << "Number of regions allocated: "
84 << region_manager_.regions().size();
85 for (const auto& region : region_manager_.regions()) {
86 sub_allocator_->Free(region.ptr(), region.memory_size());
87 }
88
89 for (BinNum b = 0; b < kNumBins; b++) {
90 BinFromIndex(b)->~Bin();
91 }
92 }
93
ChunkFromHandle(ChunkHandle h)94 BFCAllocator::Chunk* BFCAllocator::ChunkFromHandle(ChunkHandle h) {
95 DCHECK_GE(h, 0);
96 DCHECK_LT(h, static_cast<int>(chunks_.size()));
97 return &(chunks_[h]);
98 }
99
ChunkFromHandle(ChunkHandle h) const100 const BFCAllocator::Chunk* BFCAllocator::ChunkFromHandle(ChunkHandle h) const {
101 DCHECK_GE(h, 0);
102 DCHECK_LT(h, static_cast<int>(chunks_.size()));
103 return &(chunks_[h]);
104 }
105
Extend(size_t alignment,size_t rounded_bytes)106 bool BFCAllocator::Extend(size_t alignment, size_t rounded_bytes) {
107 size_t available_bytes = memory_limit_ - total_region_allocated_bytes_;
108 // Rounds available_bytes down to the nearest multiple of kMinAllocationSize.
109 available_bytes = (available_bytes / kMinAllocationSize) * kMinAllocationSize;
110
111 // Do we have enough space to handle the client's request?
112 // If not, fail immediately.
113 if (rounded_bytes > available_bytes) {
114 return false;
115 }
116
117 // If curr_region_allocation_bytes_ is not enough to satisfy the
118 // allocation, keep multiplying by a power of two until that is
119 // sufficient.
120 bool increased_allocation = false;
121 while (rounded_bytes > curr_region_allocation_bytes_) {
122 curr_region_allocation_bytes_ *= 2;
123 increased_allocation = true;
124 }
125
126 // Try allocating.
127 size_t bytes = std::min(curr_region_allocation_bytes_, available_bytes);
128 size_t bytes_received;
129 void* mem_addr = sub_allocator_->Alloc(alignment, bytes, &bytes_received);
130 if (mem_addr == nullptr && !started_backpedal_) {
131 // Only backpedal once.
132 started_backpedal_ = true;
133
134 static constexpr float kBackpedalFactor = 0.9;
135
136 // Try allocating less memory.
137 while (mem_addr == nullptr) {
138 bytes = RoundedBytes(bytes * kBackpedalFactor);
139 if (bytes < rounded_bytes) break;
140 mem_addr = sub_allocator_->Alloc(alignment, bytes, &bytes_received);
141 }
142 }
143
144 if (mem_addr == nullptr) {
145 return false;
146 }
147
148 if (!increased_allocation) {
149 // Increase the region size of the next required allocation.
150 curr_region_allocation_bytes_ *= 2;
151 }
152
153 VLOG(1) << "Extending allocation by "
154 << strings::HumanReadableNumBytes(bytes_received) << " bytes.";
155
156 total_region_allocated_bytes_ += bytes_received;
157 VLOG(1) << "Total allocated bytes: "
158 << strings::HumanReadableNumBytes(total_region_allocated_bytes_);
159
160 VLOG(1) << "Allocated memory at " << mem_addr << " to "
161 << static_cast<void*>(static_cast<char*>(mem_addr) + bytes_received);
162
163 AllocationRegion* maybe_extended_region = nullptr;
164 if (coalesce_regions_) {
165 maybe_extended_region =
166 region_manager_.AddOrExtendAllocationRegion(mem_addr, bytes_received);
167 } else {
168 region_manager_.AddAllocationRegion(mem_addr, bytes_received);
169 }
170
171 // Create one large chunk for the whole memory space that will
172 // be chunked later.
173 ChunkHandle h = AllocateChunk();
174 BFCAllocator::Chunk* c = ChunkFromHandle(h);
175 c->ptr = mem_addr;
176 c->size = bytes_received;
177 c->allocation_id = -1;
178 c->prev = kInvalidChunkHandle;
179 c->next = kInvalidChunkHandle;
180 c->freed_at_count = 0;
181
182 region_manager_.set_handle(c->ptr, h);
183
184 // If the region was extended, then there exists a previous chunk that should
185 // be linked to the new chunk.
186 if (maybe_extended_region != nullptr) {
187 ChunkHandle prev =
188 maybe_extended_region->get_handle(maybe_extended_region->ptr());
189 BFCAllocator::Chunk* prev_chunk = ChunkFromHandle(prev);
190 // Find the last recorded chunk in the extended region.
191 while (prev_chunk->next != kInvalidChunkHandle) {
192 prev = prev_chunk->next;
193 prev_chunk = ChunkFromHandle(prev);
194 }
195 c->prev = prev;
196 prev_chunk->next = h;
197 }
198
199 // Maybe merge adjacent chunks and insert the chunk into the right bin.
200 InsertFreeChunkIntoBin(TryToCoalesce(h, /*ignore_freed_at=*/false));
201
202 return true;
203 }
204
AllocateChunk()205 BFCAllocator::ChunkHandle BFCAllocator::AllocateChunk() {
206 if (free_chunks_list_ != kInvalidChunkHandle) {
207 ChunkHandle h = free_chunks_list_;
208 Chunk* c = ChunkFromHandle(h);
209 free_chunks_list_ = c->next;
210 return h;
211 } else {
212 ChunkHandle h = chunks_.size();
213 chunks_.resize(h + 1);
214 return h;
215 }
216 }
217
DeallocateChunk(ChunkHandle h)218 void BFCAllocator::DeallocateChunk(ChunkHandle h) {
219 Chunk* c = ChunkFromHandle(h);
220 c->allocation_id = -1;
221 c->bin_num = kInvalidBinNum;
222 c->next = free_chunks_list_;
223 free_chunks_list_ = h;
224 }
225
AllocateRawInternalWithRetry(size_t unused_alignment,size_t num_bytes,const AllocationAttributes & allocation_attr)226 void* BFCAllocator::AllocateRawInternalWithRetry(
227 size_t unused_alignment, size_t num_bytes,
228 const AllocationAttributes& allocation_attr) {
229 // Fast path: Try once to allocate without getting the retry_helper_ involved
230 uint64 freed_by_count = 0;
231 if (allocation_attr.freed_by_func != nullptr) {
232 freed_by_count = (*allocation_attr.freed_by_func)();
233 }
234 void* r =
235 AllocateRawInternal(unused_alignment, num_bytes, false, freed_by_count);
236 if (r != nullptr) {
237 return r;
238 } else {
239 static const int64 kMaxMillisToWait = 10000; // 10 seconds
240 r = retry_helper_.AllocateRaw(
241 [this, &allocation_attr](size_t a, size_t nb, bool v) {
242 uint64 freed_by_count = 0;
243 if (allocation_attr.freed_by_func != nullptr) {
244 freed_by_count = (*allocation_attr.freed_by_func)();
245 }
246 return AllocateRawInternal(a, nb, v, freed_by_count);
247 },
248 kMaxMillisToWait, unused_alignment, num_bytes);
249 return r;
250 }
251 }
252
AllocateRaw(size_t unused_alignment,size_t num_bytes,const AllocationAttributes & allocation_attr)253 void* BFCAllocator::AllocateRaw(size_t unused_alignment, size_t num_bytes,
254 const AllocationAttributes& allocation_attr) {
255 VLOG(1) << "AllocateRaw " << Name() << " " << num_bytes;
256 if (!allocation_attr.retry_on_failure) {
257 // Return immediately upon the first failure if this is for allocating an
258 // optional scratch space.
259 bool dump_log_on_failure = VLOG_IS_ON(2);
260 uint64 freed_by_count = 0;
261 if (allocation_attr.freed_by_func != nullptr) {
262 freed_by_count = (*allocation_attr.freed_by_func)();
263 }
264 void* result = AllocateRawInternal(unused_alignment, num_bytes,
265 dump_log_on_failure, freed_by_count);
266 if (result == nullptr) {
267 static std::atomic<int32> log_counter{0};
268 int32 counter_value = log_counter.load(std::memory_order_relaxed);
269 if (counter_value < 10) {
270 log_counter.store(counter_value + 1, std::memory_order_relaxed);
271 LOG(WARNING)
272 << "Allocator (" << Name() << ") ran out of memory trying "
273 << "to allocate " << strings::HumanReadableNumBytes(num_bytes)
274 << " with freed_by_count=" << freed_by_count
275
276 << ". The caller indicates that this is not a failure, but"
277 << " may mean that there could be performance gains if more"
278 << " memory were available.";
279 }
280 }
281 return result;
282 } else {
283 return AllocateRawInternalWithRetry(unused_alignment, num_bytes,
284 allocation_attr);
285 }
286 }
287
288 // static
RoundedBytes(size_t bytes)289 size_t BFCAllocator::RoundedBytes(size_t bytes) {
290 size_t rounded_bytes =
291 (kMinAllocationSize *
292 ((bytes + kMinAllocationSize - 1) / kMinAllocationSize));
293 DCHECK_EQ(size_t{0}, rounded_bytes % kMinAllocationSize);
294 return rounded_bytes;
295 }
296
DeallocateFreeRegions(size_t rounded_bytes)297 bool BFCAllocator::DeallocateFreeRegions(size_t rounded_bytes)
298 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
299 // Do nothing if garbage collection is off.
300 if (!garbage_collection_) {
301 return false;
302 }
303
304 // Searching for free regions.
305 absl::flat_hash_set<void*> free_region_ptrs;
306 size_t total_free_bytes = 0;
307 for (const AllocationRegion& region : region_manager_.regions()) {
308 ChunkHandle h = region_manager_.get_handle(region.ptr());
309 bool any_use = false;
310 while (h != kInvalidChunkHandle) {
311 const Chunk* c = ChunkFromHandle(h);
312 if (c->in_use()) {
313 any_use = true;
314 break;
315 }
316 h = c->next;
317 }
318
319 if (!any_use) {
320 VLOG(2) << "Found free region with ptr = " << region.ptr();
321 free_region_ptrs.insert(region.ptr());
322 total_free_bytes += region.memory_size();
323 }
324 }
325
326 if (total_free_bytes == 0) {
327 return false;
328 }
329
330 // Rough estimation to check whether deallocation can help.
331 size_t available_bytes =
332 memory_limit_ - total_region_allocated_bytes_ + total_free_bytes;
333 if (rounded_bytes > available_bytes) {
334 return false;
335 }
336
337 LOG(WARNING) << "Garbage collection: deallocate free memory regions"
338 << " (i.e., allocations) so that we can re-allocate a larger"
339 << " region to avoid OOM due to memory fragmentation. If you"
340 << " see this message frequently, you are running near the"
341 << " threshold of the available device memory and re-allocation"
342 << " may incur great performance overhead. You may try smaller"
343 << " batch sizes to observe the performance impact."
344 << " Set TF_ENABLE_GPU_GARBAGE_COLLECTION=false if you'd like to"
345 << " disable this feature.";
346
347 // Deallocate free regions.
348 DeallocateRegions(free_region_ptrs);
349
350 return true;
351 }
352
DeallocateRegions(const absl::flat_hash_set<void * > & region_ptrs)353 void BFCAllocator::DeallocateRegions(
354 const absl::flat_hash_set<void*>& region_ptrs)
355 TF_EXCLUSIVE_LOCKS_REQUIRED(lock_) {
356 // Explicitly remove the const qualifier as some compilers disallow passing
357 // const_iterator to std::vector::erase(), which is used in
358 // RemoveAllocationRegion().
359 auto regions =
360 const_cast<std::vector<AllocationRegion>*>(®ion_manager_.regions());
361 auto it = regions->begin();
362 while (it != regions->end()) {
363 if (!region_ptrs.contains(it->ptr())) {
364 ++it;
365 continue;
366 }
367
368 VLOG(2) << "Deallocate region with ptr = " << it->ptr();
369 // Remove all chunk registrations from Bins.
370 ChunkHandle h = region_manager_.get_handle(it->ptr());
371 while (h != kInvalidChunkHandle) {
372 const Chunk* c = ChunkFromHandle(h);
373 if (c->bin_num != kInvalidBinNum) {
374 RemoveFreeChunkFromBin(h);
375 }
376 auto h_to_delete = h;
377 h = c->next;
378 DeleteChunk(h_to_delete);
379 }
380
381 // Deallocate the memory.
382 sub_allocator_->Free(it->ptr(), it->memory_size());
383 total_region_allocated_bytes_ -= it->memory_size();
384 it = region_manager_.RemoveAllocationRegion(it);
385 }
386 }
387
AllocateRawInternal(size_t unused_alignment,size_t num_bytes,bool dump_log_on_failure,uint64 freed_before)388 void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
389 size_t num_bytes,
390 bool dump_log_on_failure,
391 uint64 freed_before) {
392 if (num_bytes == 0) {
393 VLOG(2) << "tried to allocate 0 bytes";
394 return nullptr;
395 }
396 // First, always allocate memory of at least kMinAllocationSize
397 // bytes, and always allocate multiples of kMinAllocationSize bytes
398 // so all memory addresses are nicely byte aligned.
399 size_t rounded_bytes = RoundedBytes(num_bytes);
400
401 // The BFC allocator tries to find the best fit first.
402 BinNum bin_num = BinNumForSize(rounded_bytes);
403
404 mutex_lock l(lock_);
405 if (!timestamped_chunks_.empty()) {
406 // Merge timestamped chunks whose counts have become safe for general use.
407 MergeTimestampedChunks(0);
408 }
409 void* ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
410 if (ptr != nullptr) {
411 AddTraceMe("MemoryAllocation", ptr);
412 return ptr;
413 }
414
415 // Try to extend
416 if (Extend(unused_alignment, rounded_bytes)) {
417 ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
418 if (ptr != nullptr) {
419 AddTraceMe("MemoryAllocation", ptr);
420 return ptr;
421 }
422 }
423
424 if ((freed_before == 0) && (!timestamped_chunks_.empty())) {
425 // We're unable to satisfy an allocation request without a specific
426 // timestamp requirement. Rather than fail, try merging any held-out
427 // timestamped chunks more aggressively until a free chunk of the necessary
428 // size is formed.
429 if (MergeTimestampedChunks(rounded_bytes)) {
430 ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
431 if (ptr != nullptr) {
432 AddTraceMe("MemoryAllocation", ptr);
433 return ptr;
434 }
435 }
436 }
437
438 // Reaching this point means that no chunks can satisfy the request. Also,
439 // the unallocated bytes cannot satisfy the request. Before giving up, let's
440 // try deallocating free regions so that suballocator can combine them with
441 // the unallocated bytes and form a larger region.
442 if (DeallocateFreeRegions(rounded_bytes) &&
443 Extend(unused_alignment, rounded_bytes)) {
444 ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
445 if (ptr != nullptr) {
446 AddTraceMe("MemoryAllocation", ptr);
447 return ptr;
448 }
449 }
450
451 // We searched all bins for an existing free chunk to use and
452 // couldn't find one. This means we must have run out of memory,
453 // Dump the memory log for analysis.
454 MaybeWriteMemoryMap();
455 if (dump_log_on_failure) {
456 LOG(WARNING)
457 << "Allocator (" << Name() << ") ran out of memory trying "
458 << "to allocate " << strings::HumanReadableNumBytes(num_bytes)
459 << " (rounded to " << rounded_bytes << ")"
460 << "requested by op "
461 << ScopedMemoryDebugAnnotation::CurrentAnnotation().pending_op_name
462 << "\nCurrent allocation summary follows.";
463 DumpMemoryLog(rounded_bytes);
464 LOG(WARNING) << RenderOccupancy();
465 }
466 return nullptr;
467 }
468
LargestFreeChunk()469 int64 BFCAllocator::LargestFreeChunk() {
470 for (int i = kNumBins - 1; i >= 0; i--) {
471 if (!BinFromIndex(i)->free_chunks.empty()) {
472 return ChunkFromHandle(*BinFromIndex(i)->free_chunks.rbegin())->size;
473 }
474 }
475 return 0;
476 }
477
GetFragmentation()478 double BFCAllocator::GetFragmentation() {
479 int64 bytes_available = total_region_allocated_bytes_ - stats_.bytes_in_use;
480 DCHECK_GT(bytes_available, 0);
481 return static_cast<double>(bytes_available - LargestFreeChunk()) /
482 bytes_available;
483 }
484
AddTraceMe(absl::string_view traceme_name,const void * ptr)485 void BFCAllocator::AddTraceMe(absl::string_view traceme_name, const void* ptr) {
486 BFCAllocator::Chunk* chunk = ChunkFromHandle(region_manager_.get_handle(ptr));
487 AddTraceMe(traceme_name, chunk->ptr, chunk->requested_size, chunk->size);
488 }
489
AddTraceMe(absl::string_view traceme_name,const void * chunk_ptr,int64 req_bytes,int64 alloc_bytes)490 void BFCAllocator::AddTraceMe(absl::string_view traceme_name,
491 const void* chunk_ptr, int64 req_bytes,
492 int64 alloc_bytes) {
493 tensorflow::profiler::TraceMe::InstantActivity(
494 [this, traceme_name, chunk_ptr, req_bytes, alloc_bytes]()
495 TF_NO_THREAD_SAFETY_ANALYSIS {
496 int64 bytes_available =
497 memory_limit_ - stats_.bytes_reserved - stats_.bytes_in_use;
498 const auto& annotation =
499 ScopedMemoryDebugAnnotation::CurrentAnnotation();
500 std::string tensor_shape;
501 if (annotation.pending_shape) {
502 tensor_shape = annotation.pending_shape->DebugString();
503 }
504 return tensorflow::profiler::TraceMeEncode(
505 traceme_name, {{"allocator_name", name_},
506 {"bytes_reserved", stats_.bytes_reserved},
507 {"bytes_allocated", stats_.bytes_in_use},
508 {"bytes_available", bytes_available},
509 {"fragmentation", GetFragmentation()},
510 {"peak_bytes_in_use", stats_.peak_bytes_in_use},
511 {"requested_bytes", req_bytes},
512 {"allocation_bytes", alloc_bytes},
513 {"addr", reinterpret_cast<uint64>(chunk_ptr)},
514 {"tf_op", annotation.pending_op_name},
515 {"id", annotation.pending_step_id},
516 {"region_type", annotation.pending_region_type},
517 {"data_type", annotation.pending_data_type},
518 {"shape", tensor_shape}});
519 },
520 /*level=*/profiler::TraceMeLevel::kInfo);
521 }
522
FindChunkPtr(BinNum bin_num,size_t rounded_bytes,size_t num_bytes,uint64 freed_before)523 void* BFCAllocator::FindChunkPtr(BinNum bin_num, size_t rounded_bytes,
524 size_t num_bytes, uint64 freed_before) {
525 // First identify the first bin that could satisfy rounded_bytes.
526 for (; bin_num < kNumBins; bin_num++) {
527 // Start searching from the first bin for the smallest chunk that fits
528 // rounded_bytes.
529 Bin* b = BinFromIndex(bin_num);
530 for (auto citer = b->free_chunks.begin(); citer != b->free_chunks.end();
531 ++citer) {
532 const BFCAllocator::ChunkHandle h = (*citer);
533 BFCAllocator::Chunk* chunk = ChunkFromHandle(h);
534 DCHECK(!chunk->in_use());
535 if (freed_before > 0 && freed_before < chunk->freed_at_count) {
536 continue;
537 }
538 if (chunk->size >= rounded_bytes) {
539 // We found an existing chunk that fits us that wasn't in use, so remove
540 // it from the free bin structure prior to using.
541 RemoveFreeChunkIterFromBin(&b->free_chunks, citer);
542
543 // If we can break the size of the chunk into two reasonably large
544 // pieces, do so. In any case don't waste more than
545 // kMaxInternalFragmentation bytes on padding this alloc.
546 const int64 kMaxInternalFragmentation = 128 << 20; // 128mb
547 if (chunk->size >= rounded_bytes * 2 ||
548 static_cast<int64>(chunk->size) - rounded_bytes >=
549 kMaxInternalFragmentation) {
550 SplitChunk(h, rounded_bytes);
551 chunk = ChunkFromHandle(h); // Update chunk pointer in case it moved
552 }
553
554 // The requested size of the returned chunk is what the user
555 // has allocated.
556 chunk->requested_size = num_bytes;
557 // Assign a unique id and increment the id counter, marking the
558 // chunk as being in use.
559 chunk->allocation_id = next_allocation_id_++;
560
561 // Update stats.
562 ++stats_.num_allocs;
563 stats_.bytes_in_use += chunk->size;
564 stats_.peak_bytes_in_use =
565 std::max(stats_.peak_bytes_in_use, stats_.bytes_in_use);
566 stats_.largest_alloc_size =
567 std::max<std::size_t>(stats_.largest_alloc_size, chunk->size);
568 if (ShouldRecordOpName()) {
569 const auto& annotation =
570 ScopedMemoryDebugAnnotation::CurrentAnnotation();
571 chunk->op_name = annotation.pending_op_name;
572 if (!annotation.pending_op_name) {
573 VLOG(2) << "missing pending_op_name for " << Name()
574 << " reading addr "
575 << static_cast<const void*>(&annotation.pending_op_name)
576 << "\n"
577 << CurrentStackTrace();
578 }
579 chunk->step_id = annotation.pending_step_id;
580 chunk->action_count = ++action_counter_;
581 uint64 slot = chunk->action_count % kMemDebugHistorySize;
582 size_history_[slot] = stats_.bytes_in_use;
583 }
584
585 VLOG(4) << "Returning: " << chunk->ptr;
586 if (VLOG_IS_ON(4)) {
587 LOG(INFO) << "A: " << RenderOccupancy();
588 }
589 return chunk->ptr;
590 }
591 }
592 }
593
594 return nullptr;
595 }
596
SplitChunk(BFCAllocator::ChunkHandle h,size_t num_bytes)597 void BFCAllocator::SplitChunk(BFCAllocator::ChunkHandle h, size_t num_bytes) {
598 // Allocate the new chunk before we do any ChunkFromHandle
599 ChunkHandle h_new_chunk = AllocateChunk();
600
601 Chunk* c = ChunkFromHandle(h);
602 CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum));
603
604 // Create a new chunk starting num_bytes after c
605 BFCAllocator::Chunk* new_chunk = ChunkFromHandle(h_new_chunk);
606 new_chunk->ptr = static_cast<void*>(static_cast<char*>(c->ptr) + num_bytes);
607 region_manager_.set_handle(new_chunk->ptr, h_new_chunk);
608
609 // Set the new sizes of the chunks.
610 new_chunk->size = c->size - num_bytes;
611 c->size = num_bytes;
612
613 // The new chunk is not in use.
614 new_chunk->allocation_id = -1;
615
616 // It inherits the freed time.
617 new_chunk->freed_at_count = c->freed_at_count;
618
619 // Maintain the pointers.
620 // c <-> c_neighbor becomes
621 // c <-> new_chunk <-> c_neighbor
622 BFCAllocator::ChunkHandle h_neighbor = c->next;
623 new_chunk->prev = h;
624 new_chunk->next = h_neighbor;
625 c->next = h_new_chunk;
626 if (h_neighbor != kInvalidChunkHandle) {
627 Chunk* c_neighbor = ChunkFromHandle(h_neighbor);
628 c_neighbor->prev = h_new_chunk;
629 }
630
631 // Add the newly free chunk to the free bin.
632 InsertFreeChunkIntoBin(h_new_chunk);
633 }
634
DeallocateRaw(void * ptr)635 void BFCAllocator::DeallocateRaw(void* ptr) {
636 VLOG(1) << "DeallocateRaw " << Name() << " "
637 << (ptr ? RequestedSize(ptr) : 0);
638 DeallocateRawInternal(ptr);
639 retry_helper_.NotifyDealloc();
640 }
641
DeallocateRawInternal(void * ptr)642 void BFCAllocator::DeallocateRawInternal(void* ptr) {
643 if (ptr == nullptr) {
644 VLOG(2) << "tried to deallocate nullptr";
645 return;
646 }
647 mutex_lock l(lock_);
648
649 // Find the chunk from the ptr.
650 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
651 CHECK(h != kInvalidChunkHandle);
652 // Record chunk information before it's freed.
653 Chunk* chunk = ChunkFromHandle(h);
654 void* chunk_ptr = chunk->ptr;
655 int64 req_bytes = chunk->requested_size;
656 int64 alloc_bytes = chunk->size;
657
658 MarkFree(h);
659
660 // Consider coalescing it.
661 if (timing_counter_) {
662 InsertFreeChunkIntoBin(h);
663 timestamped_chunks_.push_back(h);
664 } else {
665 InsertFreeChunkIntoBin(TryToCoalesce(h, false));
666 }
667
668 // TraceMe needs to be added after MarkFree and InsertFreeChunkIntoBin for
669 // correct aggregation stats (bytes_in_use, fragmentation).
670 AddTraceMe("MemoryDeallocation", chunk_ptr, req_bytes, alloc_bytes);
671
672 if (VLOG_IS_ON(4)) {
673 LOG(INFO) << "F: " << RenderOccupancy();
674 }
675 }
676
677 // Merges h1 and h2 when Chunk(h1)->next is h2 and Chunk(h2)->prev is c1.
678 // We merge Chunk(h2) into Chunk(h1).
Merge(BFCAllocator::ChunkHandle h1,BFCAllocator::ChunkHandle h2)679 void BFCAllocator::Merge(BFCAllocator::ChunkHandle h1,
680 BFCAllocator::ChunkHandle h2) {
681 Chunk* c1 = ChunkFromHandle(h1);
682 Chunk* c2 = ChunkFromHandle(h2);
683 // We can only merge chunks that are not in use.
684 CHECK(!c1->in_use() && !c2->in_use());
685
686 // c1's prev doesn't change, still points to the same ptr, and is
687 // still not in use.
688
689 // Fix up neighbor pointers
690 //
691 // c1 <-> c2 <-> c3 should become
692 // c1 <-> c3
693
694 BFCAllocator::ChunkHandle h3 = c2->next;
695 c1->next = h3;
696 CHECK(c2->prev == h1);
697 if (h3 != kInvalidChunkHandle) {
698 BFCAllocator::Chunk* c3 = ChunkFromHandle(h3);
699 c3->prev = h1;
700 }
701
702 // Set the new size
703 c1->size += c2->size;
704
705 // Pick latest free time.
706 c1->freed_at_count = std::max(c1->freed_at_count, c2->freed_at_count);
707
708 DeleteChunk(h2);
709 }
710
DeleteChunk(ChunkHandle h)711 void BFCAllocator::DeleteChunk(ChunkHandle h) {
712 // Delete h and cleanup all state
713 Chunk* c = ChunkFromHandle(h);
714 // VLOG(4) << "Removing: " << c->ptr;
715 region_manager_.erase(c->ptr);
716 DeallocateChunk(h);
717 }
718
InsertFreeChunkIntoBin(BFCAllocator::ChunkHandle h)719 void BFCAllocator::InsertFreeChunkIntoBin(BFCAllocator::ChunkHandle h) {
720 Chunk* c = ChunkFromHandle(h);
721 CHECK(!c->in_use() && (c->bin_num == kInvalidBinNum));
722 BinNum bin_num = BinNumForSize(c->size);
723 Bin* new_bin = BinFromIndex(bin_num);
724 c->bin_num = bin_num;
725 new_bin->free_chunks.insert(h);
726 }
727
RemoveFreeChunkIterFromBin(BFCAllocator::Bin::FreeChunkSet * free_chunks,const BFCAllocator::Bin::FreeChunkSet::iterator & citer)728 void BFCAllocator::RemoveFreeChunkIterFromBin(
729 BFCAllocator::Bin::FreeChunkSet* free_chunks,
730 const BFCAllocator::Bin::FreeChunkSet::iterator& citer) {
731 ChunkHandle h = *citer;
732 Chunk* c = ChunkFromHandle(h);
733 CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum));
734 free_chunks->erase(citer);
735 c->bin_num = kInvalidBinNum;
736 }
737
RemoveFreeChunkFromBin(BFCAllocator::ChunkHandle h)738 void BFCAllocator::RemoveFreeChunkFromBin(BFCAllocator::ChunkHandle h) {
739 Chunk* c = ChunkFromHandle(h);
740 CHECK(!c->in_use() && (c->bin_num != kInvalidBinNum));
741 CHECK_GT(BinFromIndex(c->bin_num)->free_chunks.erase(h), 0)
742 << "Could not find chunk in bin";
743 c->bin_num = kInvalidBinNum;
744 }
745
MarkFree(BFCAllocator::ChunkHandle h)746 void BFCAllocator::MarkFree(BFCAllocator::ChunkHandle h) {
747 Chunk* c = ChunkFromHandle(h);
748 CHECK(c->in_use() && (c->bin_num == kInvalidBinNum));
749
750 // Mark the chunk as no longer in use.
751 c->allocation_id = -1;
752
753 // Optionally record the free time.
754 if (timing_counter_) {
755 c->freed_at_count = timing_counter_->next();
756 }
757
758 // Updates the stats.
759 stats_.bytes_in_use -= c->size;
760
761 if (ShouldRecordOpName()) {
762 c->action_count = ++action_counter_;
763 uint64 slot = c->action_count % kMemDebugHistorySize;
764 size_history_[slot] = stats_.bytes_in_use;
765 }
766 }
767
TryToCoalesce(ChunkHandle h,bool ignore_freed_at)768 BFCAllocator::ChunkHandle BFCAllocator::TryToCoalesce(ChunkHandle h,
769 bool ignore_freed_at) {
770 Chunk* c = ChunkFromHandle(h);
771 if ((!ignore_freed_at) && c->freed_at_count > 0) return h;
772 ChunkHandle coalesced_chunk = h;
773
774 // If the next chunk is free, merge it into c and delete it.
775 if (c->next != kInvalidChunkHandle && !ChunkFromHandle(c->next)->in_use()) {
776 Chunk* n = ChunkFromHandle(c->next);
777 if ((n->freed_at_count == 0) || ignore_freed_at) {
778 VLOG(4) << "Merging c->next " << n->ptr << " with c " << c->ptr;
779 RemoveFreeChunkFromBin(c->next);
780 Merge(h, c->next);
781 }
782 }
783
784 // If the previous chunk is free, merge c into it and delete c.
785 if (c->prev != kInvalidChunkHandle && !ChunkFromHandle(c->prev)->in_use()) {
786 Chunk* n = ChunkFromHandle(c->prev);
787 if ((n->freed_at_count == 0) || ignore_freed_at) {
788 VLOG(4) << "Merging c " << c->ptr << " into c->prev " << n->ptr;
789 coalesced_chunk = c->prev;
790 RemoveFreeChunkFromBin(c->prev);
791 Merge(c->prev, h);
792 }
793 }
794
795 return coalesced_chunk;
796 }
797
SetSafeFrontier(uint64 count)798 void BFCAllocator::SetSafeFrontier(uint64 count) {
799 uint64 current = safe_frontier_.load(std::memory_order_relaxed);
800 while (count > current) {
801 if (safe_frontier_.compare_exchange_strong(current, count)) {
802 retry_helper_.NotifyDealloc();
803 return;
804 } else {
805 current = safe_frontier_.load(std::memory_order_relaxed);
806 }
807 }
808 }
809
MergeTimestampedChunks(size_t required_bytes)810 bool BFCAllocator::MergeTimestampedChunks(size_t required_bytes) {
811 VLOG(1) << "MergeTimestampedChunks queue_len=" << timestamped_chunks_.size()
812 << " required_bytes=" << required_bytes;
813 bool satisfied = (required_bytes == 0);
814 std::vector<void*> to_merge;
815 std::deque<ChunkHandle> new_ts_queue;
816 while (!timestamped_chunks_.empty()) {
817 ChunkHandle h = timestamped_chunks_.front();
818 timestamped_chunks_.pop_front();
819 DCHECK_NE(h, kInvalidChunkHandle);
820 Chunk* c = ChunkFromHandle(h);
821 // It's possible this chunk has already been merged so refetch and retest
822 // the handle.
823 h = region_manager_.get_handle(c->ptr);
824 if (h == kInvalidChunkHandle) {
825 continue;
826 }
827 if (c->in_use() || (c->bin_num == kInvalidBinNum)) {
828 // This chunk has already been reallocated.
829 continue;
830 }
831 if (c->freed_at_count == 0) {
832 to_merge.push_back(c->ptr);
833 continue;
834 }
835 // Chunk should be free and assigned to a bin.
836 DCHECK_NE(c->bin_num, kInvalidBinNum);
837 if (c->freed_at_count < safe_frontier_) {
838 c->freed_at_count = 0;
839 to_merge.push_back(c->ptr);
840 } else if (required_bytes > 0) {
841 to_merge.push_back(c->ptr);
842 } else {
843 new_ts_queue.push_back(h);
844 }
845 }
846 DCHECK(timestamped_chunks_.empty());
847 std::swap(timestamped_chunks_, new_ts_queue);
848
849 // At this point all candidate chunks have been moved from timestamped_chunks_
850 // to to_merge. If this is a standard merge (required_bytes == 0) then
851 // merge them all, otherwise merge just until a Chunk of the required size
852 // is produced.
853 for (int ci = 0, end = to_merge.size(); ci < end; ++ci) {
854 void* ptr = to_merge[ci];
855 // It's possible that the Chunk associated with this memory location got
856 // merged and deallocated in a prior iteration so refetch the handle and
857 // retest.
858 ChunkHandle h = region_manager_.get_handle(ptr);
859 if (h == kInvalidChunkHandle) continue;
860 if (required_bytes == 0 || !satisfied) {
861 Chunk* c = ChunkFromHandle(h);
862 DCHECK_NE(c->bin_num, kInvalidBinNum);
863 DCHECK(!c->in_use());
864 RemoveFreeChunkFromBin(h);
865 ChunkHandle new_h = TryToCoalesce(h, (required_bytes > 0));
866 InsertFreeChunkIntoBin(new_h);
867 if (required_bytes > 0) {
868 c = ChunkFromHandle(new_h);
869 if (new_h != h && c->freed_at_count > 0) {
870 timestamped_chunks_.push_back(new_h);
871 }
872 if (c->size >= required_bytes) {
873 satisfied = true;
874 }
875 }
876 } else {
877 // We were force merging Chunks with unsafe timestamps, but managed
878 // to create a satisfying Chunk so just requeue the rest.
879 timestamped_chunks_.push_back(h);
880 }
881 }
882 return satisfied;
883 }
884
TracksAllocationSizes() const885 bool BFCAllocator::TracksAllocationSizes() const { return true; }
886
RequestedSize(const void * ptr) const887 size_t BFCAllocator::RequestedSize(const void* ptr) const {
888 CHECK(ptr);
889 mutex_lock l(lock_);
890 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
891 CHECK(h != kInvalidChunkHandle)
892 << "Asked for requested size of pointer we never allocated: " << ptr;
893 const BFCAllocator::Chunk* c = ChunkFromHandle(h);
894 return c->requested_size;
895 }
896
AllocatedSize(const void * ptr) const897 size_t BFCAllocator::AllocatedSize(const void* ptr) const {
898 mutex_lock l(lock_);
899 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
900 CHECK(h != kInvalidChunkHandle)
901 << "Asked for allocated size of pointer we never allocated: " << ptr;
902 const BFCAllocator::Chunk* c = ChunkFromHandle(h);
903 return c->size;
904 }
905
AllocationId(const void * ptr) const906 int64 BFCAllocator::AllocationId(const void* ptr) const {
907 mutex_lock l(lock_);
908 BFCAllocator::ChunkHandle h = region_manager_.get_handle(ptr);
909 CHECK(h != kInvalidChunkHandle)
910 << "Asked for allocation id of pointer we never allocated: " << ptr;
911 const BFCAllocator::Chunk* c = ChunkFromHandle(h);
912 return c->allocation_id;
913 }
914
915 namespace {
916
RenderRegion(char * rendered,const size_t resolution,const size_t total_render_size,const size_t offset,const void * base_ptr,const void * ptr,const size_t size,const char c)917 void RenderRegion(char* rendered, const size_t resolution,
918 const size_t total_render_size, const size_t offset,
919 const void* base_ptr, const void* ptr, const size_t size,
920 const char c) {
921 const char* base_ptr_c = static_cast<const char*>(base_ptr);
922 const char* ptr_c = static_cast<const char*>(ptr);
923
924 size_t start_location =
925 ((ptr_c - base_ptr_c + offset) * resolution) / total_render_size;
926 CHECK_GE(start_location, 0);
927 CHECK_LT(start_location, resolution);
928 size_t end_location =
929 ((ptr_c + size - 1 - base_ptr_c + offset) * resolution) /
930 total_render_size;
931 CHECK_GE(end_location, 0);
932 CHECK_LT(end_location, resolution);
933
934 for (size_t i = start_location; i <= end_location; ++i) {
935 rendered[i] = c;
936 }
937 }
938
939 } // namespace
940
RenderOccupancy()941 string BFCAllocator::RenderOccupancy() {
942 // Make a buffer for the ASCII-art representation.
943 const size_t resolution = 100;
944 char rendered[resolution];
945
946 // Compute the total region size to render over
947 size_t total_region_size = 0;
948 for (const auto& region : region_manager_.regions()) {
949 total_region_size += region.memory_size();
950 }
951
952 if (total_region_size == 0) {
953 return "<allocator contains no memory>";
954 }
955
956 // Start out with everything empty
957 RenderRegion(rendered, resolution, total_region_size, 0, nullptr, nullptr,
958 total_region_size, '_');
959
960 size_t region_offset = 0;
961 for (const auto& region : region_manager_.regions()) {
962 ChunkHandle h = region_manager_.get_handle(region.ptr());
963 // Then render each chunk left to right.
964 while (h != kInvalidChunkHandle) {
965 Chunk* c = ChunkFromHandle(h);
966 if (c->in_use()) {
967 // Render the wasted space
968 size_t wasted = c->size - c->requested_size;
969 if (wasted > 0) {
970 RenderRegion(rendered, resolution, total_region_size,
971 region_offset + c->requested_size, region.ptr(), c->ptr,
972 wasted, 'x');
973 }
974 // Then the occupied space
975 RenderRegion(rendered, resolution, total_region_size, region_offset,
976 region.ptr(), c->ptr, c->requested_size, '*');
977 }
978 h = c->next;
979 }
980 region_offset += region.memory_size();
981 }
982
983 return string(rendered, resolution);
984 }
985
DumpMemoryLog(size_t num_bytes)986 void BFCAllocator::DumpMemoryLog(size_t num_bytes) {
987 const std::array<BinDebugInfo, kNumBins> bin_infos = get_bin_debug_info();
988 LOG(INFO) << "BFCAllocator dump for " << Name();
989 for (BinNum bin_num = 0; bin_num < kNumBins; bin_num++) {
990 Bin* b = BinFromIndex(bin_num);
991 const BinDebugInfo& bin_info = bin_infos[bin_num];
992 CHECK_EQ(b->free_chunks.size(),
993 bin_info.total_chunks_in_bin - bin_info.total_chunks_in_use);
994
995 LOG(INFO) << "Bin (" << b->bin_size
996 << "): \tTotal Chunks: " << bin_info.total_chunks_in_bin
997 << ", Chunks in use: " << bin_info.total_chunks_in_use << ". "
998 << strings::HumanReadableNumBytes(bin_info.total_bytes_in_bin)
999 << " allocated for chunks. "
1000 << strings::HumanReadableNumBytes(bin_info.total_bytes_in_use)
1001 << " in use in bin. "
1002 << strings::HumanReadableNumBytes(
1003 bin_info.total_requested_bytes_in_use)
1004 << " client-requested in use in bin.";
1005 }
1006
1007 // Find the bin that we would have liked to allocate in, so we
1008 // can get some further analysis about fragmentation.
1009 Bin* b = BinForSize(num_bytes);
1010
1011 LOG(INFO) << "Bin for " << strings::HumanReadableNumBytes(num_bytes)
1012 << " was " << strings::HumanReadableNumBytes(b->bin_size)
1013 << ", Chunk State: ";
1014
1015 for (ChunkHandle h : b->free_chunks) {
1016 Chunk* c = ChunkFromHandle(h);
1017 LOG(INFO) << c->DebugString(this, true);
1018 }
1019
1020 // Next show the chunks that are in use, and also summarize their
1021 // number by size.
1022 std::map<size_t, int> in_use_by_size;
1023 for (const auto& region : region_manager_.regions()) {
1024 LOG(INFO) << "Next region of size " << region.memory_size();
1025 ChunkHandle h = region_manager_.get_handle(region.ptr());
1026 while (h != kInvalidChunkHandle) {
1027 const Chunk* c = ChunkFromHandle(h);
1028 if (c->in_use()) {
1029 in_use_by_size[c->size]++;
1030 }
1031 string buf = strings::StrCat(
1032 (c->in_use() ? "InUse" : "Free "), " at ",
1033 strings::Hex(reinterpret_cast<uint64>(c->ptr)), " of size ", c->size);
1034 if (ShouldRecordOpName()) {
1035 strings::StrAppend(&buf, " by op ", c->GetDebugOpName(),
1036 " action_count ", c->action_count, " step ",
1037 c->step_id);
1038 }
1039 strings::StrAppend(&buf, " next ", c->next);
1040 if (timing_counter_) {
1041 strings::StrAppend(&buf, " freed_at_count ", c->freed_at_count);
1042 }
1043 LOG(INFO) << buf;
1044 h = c->next;
1045 }
1046 }
1047
1048 LOG(INFO) << " Summary of in-use Chunks by size: ";
1049 size_t total_bytes = 0;
1050 for (auto& it : in_use_by_size) {
1051 LOG(INFO) << it.second << " Chunks of size " << it.first << " totalling "
1052 << strings::HumanReadableNumBytes(it.first * it.second);
1053 total_bytes += (it.first * it.second);
1054 }
1055 LOG(INFO) << "Sum Total of in-use chunks: "
1056 << strings::HumanReadableNumBytes(total_bytes);
1057 LOG(INFO) << "total_region_allocated_bytes_: "
1058 << total_region_allocated_bytes_
1059 << " memory_limit_: " << memory_limit_ << " available bytes: "
1060 << (memory_limit_ - total_region_allocated_bytes_)
1061 << " curr_region_allocation_bytes_: "
1062 << curr_region_allocation_bytes_;
1063 LOG(INFO) << "Stats: \n" << stats_.DebugString();
1064 }
1065
MaybeWriteMemoryMap()1066 void BFCAllocator::MaybeWriteMemoryMap() {
1067 const char* gpu_memory_map_file = std::getenv("TF_BFC_MEMORY_DUMP");
1068 if (gpu_memory_map_file != nullptr) {
1069 std::unique_ptr<WritableFile> dump_file;
1070 string file_name = strings::StrCat(gpu_memory_map_file, "_", Name(), ".",
1071 Env::Default()->NowMicros());
1072 Status status = Env::Default()->NewWritableFile(file_name, &dump_file);
1073 if (!status.ok()) {
1074 LOG(ERROR) << "Failed to open file " << file_name;
1075 return;
1076 }
1077 MemoryDump md = RecordMemoryMapInternal();
1078 status = dump_file->Append(md.SerializeAsString());
1079 if (!status.ok()) {
1080 LOG(ERROR) << "Error on writing to file " << gpu_memory_map_file << ": "
1081 << status;
1082 }
1083 }
1084 }
1085
RecordMemoryMap()1086 MemoryDump BFCAllocator::RecordMemoryMap() {
1087 mutex_lock l(lock_);
1088 return RecordMemoryMapInternal();
1089 }
1090
RecordMemoryMapInternal()1091 MemoryDump BFCAllocator::RecordMemoryMapInternal() {
1092 MemoryDump md;
1093 md.set_allocator_name(Name());
1094
1095 // Record the general stats
1096 MemAllocatorStats* mas = md.mutable_stats();
1097 mas->set_num_allocs(stats_.num_allocs);
1098 mas->set_bytes_in_use(stats_.bytes_in_use);
1099 mas->set_peak_bytes_in_use(stats_.peak_bytes_in_use);
1100 mas->set_largest_alloc_size(stats_.largest_alloc_size);
1101
1102 // Record summary data for every bin.
1103 const std::array<BinDebugInfo, kNumBins> bin_infos = get_bin_debug_info();
1104 for (BinNum bin_num = 0; bin_num < kNumBins; bin_num++) {
1105 Bin* b = BinFromIndex(bin_num);
1106 const BinDebugInfo& bin_info = bin_infos[bin_num];
1107 DCHECK_EQ(b->free_chunks.size(),
1108 bin_info.total_chunks_in_bin - bin_info.total_chunks_in_use);
1109 BinSummary* bs = md.add_bin_summary();
1110 bs->set_bin(bin_num);
1111 bs->set_total_bytes_in_use(bin_info.total_bytes_in_use);
1112 bs->set_total_bytes_in_bin(bin_info.total_bytes_in_bin);
1113 bs->set_total_chunks_in_use(bin_info.total_chunks_in_use);
1114 bs->set_total_chunks_in_bin(bin_info.total_chunks_in_bin);
1115 }
1116
1117 // Record state of every defined Chunk.
1118 for (const auto& region : region_manager_.regions()) {
1119 ChunkHandle h = region_manager_.get_handle(region.ptr());
1120 while (h != kInvalidChunkHandle) {
1121 const Chunk* c = ChunkFromHandle(h);
1122 MemChunk* mc = md.add_chunk();
1123 mc->set_in_use(c->in_use());
1124 mc->set_address(reinterpret_cast<uint64>(c->ptr));
1125 mc->set_size(c->size);
1126 mc->set_requested_size(c->requested_size);
1127 mc->set_bin(c->bin_num);
1128 mc->set_op_name(c->GetDebugOpName());
1129 mc->set_step_id(c->step_id);
1130 mc->set_action_count(c->action_count);
1131 if (timing_counter_) {
1132 mc->set_freed_at_count(c->in_use() ? 0 : c->freed_at_count);
1133 }
1134 h = c->next;
1135 }
1136 }
1137
1138 mas->set_fragmentation_metric(GetFragmentation());
1139
1140 // Record the recent size history
1141 uint64 history_len = std::min(action_counter_, kMemDebugHistorySize);
1142 for (uint64 i = action_counter_ - history_len; i < action_counter_; ++i) {
1143 SnapShot* ss = md.add_snap_shot();
1144 ss->set_action_count(i);
1145 uint64 slot = i % kMemDebugHistorySize;
1146 ss->set_size(size_history_[slot]);
1147 }
1148
1149 return md;
1150 }
1151
GetStats()1152 absl::optional<AllocatorStats> BFCAllocator::GetStats() {
1153 mutex_lock l(lock_);
1154 return stats_;
1155 }
1156
ClearStats()1157 void BFCAllocator::ClearStats() {
1158 mutex_lock l(lock_);
1159 stats_.num_allocs = 0;
1160 stats_.peak_bytes_in_use = stats_.bytes_in_use;
1161 stats_.largest_alloc_size = 0;
1162 }
1163
1164 std::array<BFCAllocator::BinDebugInfo, BFCAllocator::kNumBins>
get_bin_debug_info()1165 BFCAllocator::get_bin_debug_info() {
1166 std::array<BinDebugInfo, kNumBins> bin_infos;
1167 for (const auto& region : region_manager_.regions()) {
1168 ChunkHandle h = region_manager_.get_handle(region.ptr());
1169 while (h != kInvalidChunkHandle) {
1170 const Chunk* c = ChunkFromHandle(h);
1171 BinNum bin_num = BinNumForSize(c->size);
1172 BinDebugInfo& bin_info = bin_infos[bin_num];
1173 bin_info.total_bytes_in_bin += c->size;
1174 bin_info.total_chunks_in_bin++;
1175 if (c->in_use()) {
1176 bin_info.total_bytes_in_use += c->size;
1177 bin_info.total_requested_bytes_in_use += c->requested_size;
1178 bin_info.total_chunks_in_use++;
1179 } else {
1180 Bin* bin = BinFromIndex(bin_num);
1181 CHECK_EQ(bin->free_chunks.count(h), 1);
1182 CHECK_EQ(c->bin_num, bin_num);
1183 }
1184 h = c->next;
1185 }
1186 }
1187 return bin_infos;
1188 }
1189
1190 } // namespace tensorflow
1191