• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include <atomic>
17 
18 #include "tensorflow/core/framework/allocator.h"
19 #include "tensorflow/core/framework/allocator_registry.h"
20 #include "tensorflow/core/framework/tracking_allocator.h"
21 #include "tensorflow/core/lib/strings/strcat.h"
22 #include "tensorflow/core/lib/strings/stringprintf.h"
23 #include "tensorflow/core/platform/mem.h"
24 #include "tensorflow/core/platform/mutex.h"
25 #include "tensorflow/core/platform/types.h"
26 
27 namespace tensorflow {
28 
29 // If true, cpu allocator collects more stats.
30 static bool cpu_allocator_collect_stats = false;
31 
EnableCPUAllocatorStats()32 void EnableCPUAllocatorStats() { cpu_allocator_collect_stats = true; }
DisableCPUAllocatorStats()33 void DisableCPUAllocatorStats() { cpu_allocator_collect_stats = false; }
CPUAllocatorStatsEnabled()34 bool CPUAllocatorStatsEnabled() { return cpu_allocator_collect_stats; }
35 
36 static const int kMaxTotalAllocationWarnings = 1;
37 
38 static const int kMaxSingleAllocationWarnings = 5;
39 
40 // If cpu_allocator_collect_stats is true, warn when the total allocated memory
41 // exceeds this threshold.
42 static const double kTotalAllocationWarningThreshold = 0.5;
43 
44 // Individual allocations large than this amount will trigger a warning.
45 static const double kLargeAllocationWarningThreshold = 0.1;
46 
47 // Cache first invocation to port::AvailableRam, as it can be expensive.
LargeAllocationWarningBytes()48 static int64_t LargeAllocationWarningBytes() {
49   static int64_t value = static_cast<int64>(port::AvailableRam() *
50                                             kLargeAllocationWarningThreshold);
51   return value;
52 }
53 
TotalAllocationWarningBytes()54 static int64_t TotalAllocationWarningBytes() {
55   static int64_t value = static_cast<int64>(port::AvailableRam() *
56                                             kTotalAllocationWarningThreshold);
57   return value;
58 }
59 
60 namespace {
61 
62 // A default Allocator for CPU devices.  ProcessState::GetCPUAllocator() will
63 // return a different version that may perform better, but may also lack the
64 // optional stats triggered by the functions above.  TODO(tucker): migrate all
65 // uses of cpu_allocator() except tests to use ProcessState instead.
66 class CPUAllocator : public Allocator {
67  public:
CPUAllocator()68   CPUAllocator()
69       : single_allocation_warning_count_(0),
70         total_allocation_warning_count_(0) {}
71 
~CPUAllocator()72   ~CPUAllocator() override {}
73 
Name()74   string Name() override { return "cpu"; }
75 
AllocateRaw(size_t alignment,size_t num_bytes)76   void* AllocateRaw(size_t alignment, size_t num_bytes) override {
77     if (num_bytes > static_cast<size_t>(LargeAllocationWarningBytes()) &&
78         single_allocation_warning_count_ < kMaxSingleAllocationWarnings) {
79       ++single_allocation_warning_count_;
80       LOG(WARNING) << "Allocation of " << num_bytes << " exceeds "
81                    << 100 * kLargeAllocationWarningThreshold
82                    << "% of free system memory.";
83     }
84 
85     void* p = port::AlignedMalloc(num_bytes, alignment);
86     if (cpu_allocator_collect_stats) {
87       const std::size_t alloc_size = port::MallocExtension_GetAllocatedSize(p);
88       mutex_lock l(mu_);
89       ++stats_.num_allocs;
90       stats_.bytes_in_use += alloc_size;
91       stats_.peak_bytes_in_use =
92           std::max<int64>(stats_.peak_bytes_in_use, stats_.bytes_in_use);
93       stats_.largest_alloc_size =
94           std::max<int64>(stats_.largest_alloc_size, alloc_size);
95 
96       if (stats_.bytes_in_use > TotalAllocationWarningBytes() &&
97           total_allocation_warning_count_ < kMaxTotalAllocationWarnings) {
98         ++total_allocation_warning_count_;
99         LOG(WARNING) << "Total allocated memory " << stats_.bytes_in_use
100                      << "exceeds " << 100 * kTotalAllocationWarningThreshold
101                      << "% of free system memory";
102       }
103     }
104     return p;
105   }
106 
DeallocateRaw(void * ptr)107   void DeallocateRaw(void* ptr) override {
108     if (cpu_allocator_collect_stats) {
109       const std::size_t alloc_size =
110           port::MallocExtension_GetAllocatedSize(ptr);
111       mutex_lock l(mu_);
112       stats_.bytes_in_use -= alloc_size;
113     }
114     port::AlignedFree(ptr);
115   }
116 
GetStats()117   absl::optional<AllocatorStats> GetStats() override {
118     if (!cpu_allocator_collect_stats) return absl::nullopt;
119     mutex_lock l(mu_);
120     return stats_;
121   }
122 
ClearStats()123   bool ClearStats() override {
124     if (!cpu_allocator_collect_stats) return false;
125     mutex_lock l(mu_);
126     stats_.num_allocs = 0;
127     stats_.peak_bytes_in_use = stats_.bytes_in_use;
128     stats_.largest_alloc_size = 0;
129     return true;
130   }
131 
AllocatedSizeSlow(const void * ptr) const132   size_t AllocatedSizeSlow(const void* ptr) const override {
133     return port::MallocExtension_GetAllocatedSize(ptr);
134   }
135 
136  private:
137   mutex mu_;
138   AllocatorStats stats_ TF_GUARDED_BY(mu_);
139 
140   // Use <atomic> for single allocations to avoid mutex contention when
141   // statistics are disabled.
142   std::atomic<int> single_allocation_warning_count_;
143   int total_allocation_warning_count_ TF_GUARDED_BY(mu_);
144 
145   TF_DISALLOW_COPY_AND_ASSIGN(CPUAllocator);
146 };
147 
148 class CPUAllocatorFactory : public AllocatorFactory {
149  public:
CreateAllocator()150   Allocator* CreateAllocator() override { return new CPUAllocator; }
151 
CreateSubAllocator(int numa_node)152   SubAllocator* CreateSubAllocator(int numa_node) override {
153     return new CPUSubAllocator(new CPUAllocator);
154   }
155 
156  private:
157   class CPUSubAllocator : public SubAllocator {
158    public:
CPUSubAllocator(CPUAllocator * cpu_allocator)159     explicit CPUSubAllocator(CPUAllocator* cpu_allocator)
160         : SubAllocator({}, {}), cpu_allocator_(cpu_allocator) {}
161 
Alloc(size_t alignment,size_t num_bytes,size_t * bytes_received)162     void* Alloc(size_t alignment, size_t num_bytes,
163                 size_t* bytes_received) override {
164       *bytes_received = num_bytes;
165       return cpu_allocator_->AllocateRaw(alignment, num_bytes);
166     }
167 
Free(void * ptr,size_t num_bytes)168     void Free(void* ptr, size_t num_bytes) override {
169       cpu_allocator_->DeallocateRaw(ptr);
170     }
171 
SupportsCoalescing() const172     bool SupportsCoalescing() const override { return false; }
173 
174    private:
175     CPUAllocator* cpu_allocator_;
176   };
177 };
178 
179 REGISTER_MEM_ALLOCATOR("DefaultCPUAllocator", 100, CPUAllocatorFactory);
180 }  // namespace
181 
182 }  // namespace tensorflow
183