1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include <atomic>
17
18 #include "tensorflow/core/framework/allocator.h"
19 #include "tensorflow/core/framework/allocator_registry.h"
20 #include "tensorflow/core/framework/tracking_allocator.h"
21 #include "tensorflow/core/lib/strings/strcat.h"
22 #include "tensorflow/core/lib/strings/stringprintf.h"
23 #include "tensorflow/core/platform/mem.h"
24 #include "tensorflow/core/platform/mutex.h"
25 #include "tensorflow/core/platform/types.h"
26
27 namespace tensorflow {
28
29 // If true, cpu allocator collects more stats.
30 static bool cpu_allocator_collect_stats = false;
31
EnableCPUAllocatorStats()32 void EnableCPUAllocatorStats() { cpu_allocator_collect_stats = true; }
DisableCPUAllocatorStats()33 void DisableCPUAllocatorStats() { cpu_allocator_collect_stats = false; }
CPUAllocatorStatsEnabled()34 bool CPUAllocatorStatsEnabled() { return cpu_allocator_collect_stats; }
35
36 static const int kMaxTotalAllocationWarnings = 1;
37
38 static const int kMaxSingleAllocationWarnings = 5;
39
40 // If cpu_allocator_collect_stats is true, warn when the total allocated memory
41 // exceeds this threshold.
42 static const double kTotalAllocationWarningThreshold = 0.5;
43
44 // Individual allocations large than this amount will trigger a warning.
45 static const double kLargeAllocationWarningThreshold = 0.1;
46
47 // Cache first invocation to port::AvailableRam, as it can be expensive.
LargeAllocationWarningBytes()48 static int64_t LargeAllocationWarningBytes() {
49 static int64_t value = static_cast<int64>(port::AvailableRam() *
50 kLargeAllocationWarningThreshold);
51 return value;
52 }
53
TotalAllocationWarningBytes()54 static int64_t TotalAllocationWarningBytes() {
55 static int64_t value = static_cast<int64>(port::AvailableRam() *
56 kTotalAllocationWarningThreshold);
57 return value;
58 }
59
60 namespace {
61
62 // A default Allocator for CPU devices. ProcessState::GetCPUAllocator() will
63 // return a different version that may perform better, but may also lack the
64 // optional stats triggered by the functions above. TODO(tucker): migrate all
65 // uses of cpu_allocator() except tests to use ProcessState instead.
66 class CPUAllocator : public Allocator {
67 public:
CPUAllocator()68 CPUAllocator()
69 : single_allocation_warning_count_(0),
70 total_allocation_warning_count_(0) {}
71
~CPUAllocator()72 ~CPUAllocator() override {}
73
Name()74 string Name() override { return "cpu"; }
75
AllocateRaw(size_t alignment,size_t num_bytes)76 void* AllocateRaw(size_t alignment, size_t num_bytes) override {
77 if (num_bytes > static_cast<size_t>(LargeAllocationWarningBytes()) &&
78 single_allocation_warning_count_ < kMaxSingleAllocationWarnings) {
79 ++single_allocation_warning_count_;
80 LOG(WARNING) << "Allocation of " << num_bytes << " exceeds "
81 << 100 * kLargeAllocationWarningThreshold
82 << "% of free system memory.";
83 }
84
85 void* p = port::AlignedMalloc(num_bytes, alignment);
86 if (cpu_allocator_collect_stats) {
87 const std::size_t alloc_size = port::MallocExtension_GetAllocatedSize(p);
88 mutex_lock l(mu_);
89 ++stats_.num_allocs;
90 stats_.bytes_in_use += alloc_size;
91 stats_.peak_bytes_in_use =
92 std::max<int64>(stats_.peak_bytes_in_use, stats_.bytes_in_use);
93 stats_.largest_alloc_size =
94 std::max<int64>(stats_.largest_alloc_size, alloc_size);
95
96 if (stats_.bytes_in_use > TotalAllocationWarningBytes() &&
97 total_allocation_warning_count_ < kMaxTotalAllocationWarnings) {
98 ++total_allocation_warning_count_;
99 LOG(WARNING) << "Total allocated memory " << stats_.bytes_in_use
100 << "exceeds " << 100 * kTotalAllocationWarningThreshold
101 << "% of free system memory";
102 }
103 }
104 return p;
105 }
106
DeallocateRaw(void * ptr)107 void DeallocateRaw(void* ptr) override {
108 if (cpu_allocator_collect_stats) {
109 const std::size_t alloc_size =
110 port::MallocExtension_GetAllocatedSize(ptr);
111 mutex_lock l(mu_);
112 stats_.bytes_in_use -= alloc_size;
113 }
114 port::AlignedFree(ptr);
115 }
116
GetStats()117 absl::optional<AllocatorStats> GetStats() override {
118 if (!cpu_allocator_collect_stats) return absl::nullopt;
119 mutex_lock l(mu_);
120 return stats_;
121 }
122
ClearStats()123 bool ClearStats() override {
124 if (!cpu_allocator_collect_stats) return false;
125 mutex_lock l(mu_);
126 stats_.num_allocs = 0;
127 stats_.peak_bytes_in_use = stats_.bytes_in_use;
128 stats_.largest_alloc_size = 0;
129 return true;
130 }
131
AllocatedSizeSlow(const void * ptr) const132 size_t AllocatedSizeSlow(const void* ptr) const override {
133 return port::MallocExtension_GetAllocatedSize(ptr);
134 }
135
136 private:
137 mutex mu_;
138 AllocatorStats stats_ TF_GUARDED_BY(mu_);
139
140 // Use <atomic> for single allocations to avoid mutex contention when
141 // statistics are disabled.
142 std::atomic<int> single_allocation_warning_count_;
143 int total_allocation_warning_count_ TF_GUARDED_BY(mu_);
144
145 TF_DISALLOW_COPY_AND_ASSIGN(CPUAllocator);
146 };
147
148 class CPUAllocatorFactory : public AllocatorFactory {
149 public:
CreateAllocator()150 Allocator* CreateAllocator() override { return new CPUAllocator; }
151
CreateSubAllocator(int numa_node)152 SubAllocator* CreateSubAllocator(int numa_node) override {
153 return new CPUSubAllocator(new CPUAllocator);
154 }
155
156 private:
157 class CPUSubAllocator : public SubAllocator {
158 public:
CPUSubAllocator(CPUAllocator * cpu_allocator)159 explicit CPUSubAllocator(CPUAllocator* cpu_allocator)
160 : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {}
161
Alloc(size_t alignment,size_t num_bytes,size_t * bytes_received)162 void* Alloc(size_t alignment, size_t num_bytes,
163 size_t* bytes_received) override {
164 *bytes_received = num_bytes;
165 return cpu_allocator_->AllocateRaw(alignment, num_bytes);
166 }
167
Free(void * ptr,size_t num_bytes)168 void Free(void* ptr, size_t num_bytes) override {
169 cpu_allocator_->DeallocateRaw(ptr);
170 }
171
SupportsCoalescing() const172 bool SupportsCoalescing() const override { return false; }
173
174 private:
175 CPUAllocator* cpu_allocator_;
176 };
177 };
178
179 REGISTER_MEM_ALLOCATOR("DefaultCPUAllocator", 100, CPUAllocatorFactory);
180 } // namespace
181
182 } // namespace tensorflow
183