1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <atomic>
17
18 #include "tensorflow/core/framework/allocator.h"
19 #include "tensorflow/core/framework/allocator_registry.h"
20 #include "tensorflow/core/framework/tracking_allocator.h"
21 #include "tensorflow/core/lib/strings/strcat.h"
22 #include "tensorflow/core/lib/strings/stringprintf.h"
23 #include "tensorflow/core/platform/mem.h"
24 #include "tensorflow/core/platform/mutex.h"
25 #include "tensorflow/core/platform/types.h"
26
27 namespace tensorflow {
28
29 // If true, cpu allocator collects more stats.
30 static bool cpu_allocator_collect_stats = false;
31
EnableCPUAllocatorStats()32 void EnableCPUAllocatorStats() { cpu_allocator_collect_stats = true; }
DisableCPUAllocatorStats()33 void DisableCPUAllocatorStats() { cpu_allocator_collect_stats = false; }
CPUAllocatorStatsEnabled()34 bool CPUAllocatorStatsEnabled() { return cpu_allocator_collect_stats; }
35
36 static const int kMaxTotalAllocationWarnings = 1;
37
38 static const int kMaxSingleAllocationWarnings = 5;
39
40 // If cpu_allocator_collect_stats is true, warn when the total allocated memory
41 // exceeds this threshold.
42 static const double kTotalAllocationWarningThreshold = 0.5;
43
44 // Individual allocations large than this amount will trigger a warning.
45 static const double kLargeAllocationWarningThreshold = 0.1;
46
47 // Cache first invocation to port::AvailableRam, as it can be expensive.
LargeAllocationWarningBytes()48 static int64_t LargeAllocationWarningBytes() {
49 static int64_t value = static_cast<int64>(port::AvailableRam() *
50 kLargeAllocationWarningThreshold);
51 return value;
52 }
53
TotalAllocationWarningBytes()54 static int64_t TotalAllocationWarningBytes() {
55 static int64_t value = static_cast<int64>(port::AvailableRam() *
56 kTotalAllocationWarningThreshold);
57 return value;
58 }
59
60 namespace {
61
62 // A default Allocator for CPU devices. ProcessState::GetCPUAllocator() will
63 // return a different version that may perform better, but may also lack the
64 // optional stats triggered by the functions above. TODO(tucker): migrate all
65 // uses of cpu_allocator() except tests to use ProcessState instead.
66 class CPUAllocator : public Allocator {
67 public:
CPUAllocator()68 CPUAllocator()
69 : single_allocation_warning_count_(0),
70 total_allocation_warning_count_(0) {}
71
~CPUAllocator()72 ~CPUAllocator() override {}
73
Name()74 string Name() override { return "cpu"; }
75
AllocateRaw(size_t alignment,size_t num_bytes)76 void* AllocateRaw(size_t alignment, size_t num_bytes) override {
77 if (num_bytes > static_cast<size_t>(LargeAllocationWarningBytes()) &&
78 single_allocation_warning_count_ < kMaxSingleAllocationWarnings) {
79 ++single_allocation_warning_count_;
80 LOG(WARNING) << "Allocation of " << num_bytes << " exceeds "
81 << 100 * kLargeAllocationWarningThreshold
82 << "% of free system memory.";
83 }
84
85 void* p = port::AlignedMalloc(num_bytes, alignment);
86 if (cpu_allocator_collect_stats) {
87 const std::size_t alloc_size = port::MallocExtension_GetAllocatedSize(p);
88 mutex_lock l(mu_);
89 ++stats_.num_allocs;
90 stats_.bytes_in_use += alloc_size;
91 stats_.peak_bytes_in_use =
92 std::max<int64>(stats_.peak_bytes_in_use, stats_.bytes_in_use);
93 stats_.largest_alloc_size =
94 std::max<int64>(stats_.largest_alloc_size, alloc_size);
95
96 if (stats_.bytes_in_use > TotalAllocationWarningBytes() &&
97 total_allocation_warning_count_ < kMaxTotalAllocationWarnings) {
98 ++total_allocation_warning_count_;
99 LOG(WARNING) << "Total allocated memory " << stats_.bytes_in_use
100 << "exceeds " << 100 * kTotalAllocationWarningThreshold
101 << "% of free system memory";
102 }
103 }
104 return p;
105 }
106
DeallocateRaw(void * ptr)107 void DeallocateRaw(void* ptr) override {
108 if (cpu_allocator_collect_stats) {
109 const std::size_t alloc_size =
110 port::MallocExtension_GetAllocatedSize(ptr);
111 mutex_lock l(mu_);
112 stats_.bytes_in_use -= alloc_size;
113 }
114 port::AlignedFree(ptr);
115 }
116
GetStats()117 absl::optional<AllocatorStats> GetStats() override {
118 mutex_lock l(mu_);
119 return stats_;
120 }
121
ClearStats()122 void ClearStats() override {
123 mutex_lock l(mu_);
124 stats_.num_allocs = 0;
125 stats_.peak_bytes_in_use = stats_.bytes_in_use;
126 stats_.largest_alloc_size = 0;
127 }
128
AllocatedSizeSlow(const void * ptr) const129 size_t AllocatedSizeSlow(const void* ptr) const override {
130 return port::MallocExtension_GetAllocatedSize(ptr);
131 }
132
133 private:
134 mutex mu_;
135 AllocatorStats stats_ TF_GUARDED_BY(mu_);
136
137 // Use <atomic> for single allocations to avoid mutex contention when
138 // statistics are disabled.
139 std::atomic<int> single_allocation_warning_count_;
140 int total_allocation_warning_count_ TF_GUARDED_BY(mu_);
141
142 TF_DISALLOW_COPY_AND_ASSIGN(CPUAllocator);
143 };
144
145 class CPUAllocatorFactory : public AllocatorFactory {
146 public:
CreateAllocator()147 Allocator* CreateAllocator() override { return new CPUAllocator; }
148
CreateSubAllocator(int numa_node)149 SubAllocator* CreateSubAllocator(int numa_node) override {
150 return new CPUSubAllocator(new CPUAllocator);
151 }
152
153 private:
154 class CPUSubAllocator : public SubAllocator {
155 public:
CPUSubAllocator(CPUAllocator * cpu_allocator)156 explicit CPUSubAllocator(CPUAllocator* cpu_allocator)
157 : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {}
158
Alloc(size_t alignment,size_t num_bytes,size_t * bytes_received)159 void* Alloc(size_t alignment, size_t num_bytes,
160 size_t* bytes_received) override {
161 *bytes_received = num_bytes;
162 return cpu_allocator_->AllocateRaw(alignment, num_bytes);
163 }
164
Free(void * ptr,size_t num_bytes)165 void Free(void* ptr, size_t num_bytes) override {
166 cpu_allocator_->DeallocateRaw(ptr);
167 }
168
SupportsCoalescing() const169 bool SupportsCoalescing() const override { return false; }
170
171 private:
172 CPUAllocator* cpu_allocator_;
173 };
174 };
175
176 REGISTER_MEM_ALLOCATOR("DefaultCPUAllocator", 100, CPUAllocatorFactory);
177 } // namespace
178
179 } // namespace tensorflow
180