• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_FRAMEWORK_ALLOCATOR_H_
17 #define TENSORFLOW_CORE_FRAMEWORK_ALLOCATOR_H_
18 
19 #include <stdlib.h>
20 
21 #include <functional>
22 #include <limits>
23 
24 #include "absl/strings/string_view.h"
25 #include "absl/types/optional.h"
26 #include "tensorflow/core/framework/numeric_types.h"
27 #include "tensorflow/core/framework/type_traits.h"
28 #include "tensorflow/core/platform/logging.h"
29 #include "tensorflow/core/platform/macros.h"
30 #include "tensorflow/core/platform/numa.h"
31 #include "tensorflow/core/platform/types.h"
32 
33 namespace tensorflow {
34 
35 // Attributes for a single allocation call. Different calls to the same
36 // allocator could potentially have different allocation attributes.
37 struct AllocationAttributes {
38   AllocationAttributes() = default;
39 
AllocationAttributesAllocationAttributes40   AllocationAttributes(bool no_retry_on_failure, bool allocation_will_be_logged,
41                        std::function<uint64()>* freed_by_func)
42       : no_retry_on_failure(no_retry_on_failure),
43         allocation_will_be_logged(allocation_will_be_logged),
44         freed_by_func(freed_by_func) {}
45 
46   // If the first attempt to allocate the memory fails, the allocation
47   // should return immediately without retrying.
48   // An example use case is optional scratch spaces where a failure
49   // has only performance impact.
50   bool no_retry_on_failure = false;
51   // If a Tensor is allocated without the following set to true, then
52   // it is logged as an unknown allocation. During execution Tensors
53   // should be allocated through the OpKernelContext which records
54   // which Op is performing the allocation, and sets this flag to
55   // true.
56   bool allocation_will_be_logged = false;
57   // EXPERIMENTAL: If provided, then evaluates to a timing count such that only
58   // a memory chunk whose freed_at_count is at this value or earlier may be
59   // returned.
60   std::function<uint64()>* freed_by_func = nullptr;  // Not owned.
61 
62   TF_DISALLOW_COPY_AND_ASSIGN(AllocationAttributes);
63 };
64 
65 // If defined, the runtime will cache Op names in thread-local memory
66 // and some allocators will try to tag allocations with the requesting Op.
67 #ifdef TENSORFLOW_MEM_DEBUG
68 extern thread_local const char* pending_op_name;
69 extern thread_local uint64 pending_step_id;
70 #define MEMDEBUG_CACHE_OP(N) \
71   do {                       \
72     pending_op_name = (N);   \
73   } while (0)
74 #define MEMDEBUG_CACHE_STEPID(N) \
75   do {                           \
76     pending_step_id = (N);       \
77   } while (0)
78 #define MEMDEBUG_CACHE_VAL pending_op_name
79 #else
80 #define MEMDEBUG_CACHE_OP(N) \
81   do {                       \
82   } while (0)
83 #define MEMDEBUG_CACHE_STEPID(N) \
84   do {                           \
85   } while (0)
86 #define MEMDEBUG_CACHE_VAL nullptr
87 #endif
88 
89 // Runtime statistics collected by an allocator. Exactly the same as
90 // stream_executor::AllocatorStats, but independently defined to preserve the
91 // mutual independence of StreamExecutor and TensorFlow.
92 struct AllocatorStats {
93   int64 num_allocs;          // Number of allocations.
94   int64 bytes_in_use;        // Number of bytes in use.
95   int64 peak_bytes_in_use;   // The peak bytes in use.
96   int64 largest_alloc_size;  // The largest single allocation seen.
97 
98   // The upper limit of bytes of user allocatable device memory, if such a limit
99   // is known.
100   absl::optional<int64> bytes_limit;
101 
102   // Stats for reserved memory usage.
103   int64 bytes_reserved;       // Number of bytes reserved.
104   int64 peak_bytes_reserved;  // The peak number of bytes reserved.
105   // The upper limit on the number bytes of reservable memory,
106   // if such a limit is known.
107   absl::optional<int64> bytes_reservable_limit;
108 
AllocatorStatsAllocatorStats109   AllocatorStats()
110       : num_allocs(0),
111         bytes_in_use(0),
112         peak_bytes_in_use(0),
113         largest_alloc_size(0),
114         bytes_reserved(0),
115         peak_bytes_reserved(0) {}
116 
117   string DebugString() const;
118 };
119 
120 // Allocator is an abstract interface for allocating and deallocating
121 // device memory.
122 class Allocator {
123  public:
124   // Align to 64 byte boundary.
125   static constexpr size_t kAllocatorAlignment = 64;
126 
127   virtual ~Allocator();
128 
129   // Return a string identifying this allocator
130   virtual string Name() = 0;
131 
132   // Return an uninitialized block of memory that is "num_bytes" bytes
133   // in size.  The returned pointer is guaranteed to be aligned to a
134   // multiple of "alignment" bytes.
135   // REQUIRES: "alignment" is a power of 2.
136   virtual void* AllocateRaw(size_t alignment, size_t num_bytes) = 0;
137 
138   // Return an uninitialized block of memory that is "num_bytes" bytes
139   // in size with specified allocation attributes.  The returned pointer is
140   // guaranteed to be aligned to a multiple of "alignment" bytes.
141   // REQUIRES: "alignment" is a power of 2.
AllocateRaw(size_t alignment,size_t num_bytes,const AllocationAttributes & allocation_attr)142   virtual void* AllocateRaw(size_t alignment, size_t num_bytes,
143                             const AllocationAttributes& allocation_attr) {
144     // The default behavior is to use the implementation without any allocation
145     // attributes.
146     return AllocateRaw(alignment, num_bytes);
147   }
148 
149   // Deallocate a block of memory pointer to by "ptr"
150   // REQUIRES: "ptr" was previously returned by a call to AllocateRaw
151   virtual void DeallocateRaw(void* ptr) = 0;
152 
153   // Returns true if this allocator tracks the sizes of allocations.
154   // RequestedSize and AllocatedSize must be overridden if
155   // TracksAllocationSizes is overridden to return true.
TracksAllocationSizes()156   virtual bool TracksAllocationSizes() const { return false; }
157 
158   // Returns true if this allocator allocates an opaque handle rather than the
159   // requested number of bytes.
160   //
161   // This method returns false for most allocators, but may be used by
162   // special-case allocators that track tensor usage. If this method returns
163   // true, AllocateRaw() should be invoked for all values of `num_bytes`,
164   // including 0.
165   //
166   // NOTE: It is the caller's responsibility to track whether an allocated
167   // object is a buffer or an opaque handle. In particular, when this method
168   // returns `true`, users of this allocator must not run any constructors or
169   // destructors for complex objects, since there is no backing store for the
170   // tensor in which to place their outputs.
AllocatesOpaqueHandle()171   virtual bool AllocatesOpaqueHandle() const { return false; }
172 
173   // Returns the user-requested size of the data allocated at
174   // 'ptr'.  Note that the actual buffer allocated might be larger
175   // than requested, but this function returns the size requested by
176   // the user.
177   //
178   // REQUIRES: TracksAllocationSizes() is true.
179   //
180   // REQUIRES: 'ptr!=nullptr' and points to a buffer previously
181   // allocated by this allocator.
RequestedSize(const void * ptr)182   virtual size_t RequestedSize(const void* ptr) const {
183     CHECK(false) << "allocator doesn't track sizes";
184     return size_t(0);
185   }
186 
187   // Returns the allocated size of the buffer at 'ptr' if known,
188   // otherwise returns RequestedSize(ptr). AllocatedSize(ptr) is
189   // guaranteed to be >= RequestedSize(ptr).
190   //
191   // REQUIRES: TracksAllocationSizes() is true.
192   //
193   // REQUIRES: 'ptr!=nullptr' and points to a buffer previously
194   // allocated by this allocator.
AllocatedSize(const void * ptr)195   virtual size_t AllocatedSize(const void* ptr) const {
196     return RequestedSize(ptr);
197   }
198 
199   // Returns either 0 or an identifier assigned to the buffer at 'ptr'
200   // when the buffer was returned by AllocateRaw. If non-zero, the
201   // identifier differs from every other ID assigned by this
202   // allocator.
203   //
204   // REQUIRES: TracksAllocationSizes() is true.
205   //
206   // REQUIRES: 'ptr!=nullptr' and points to a buffer previously
207   // allocated by this allocator.
AllocationId(const void * ptr)208   virtual int64 AllocationId(const void* ptr) const { return 0; }
209 
210   // Returns the allocated size of the buffer at 'ptr' if known,
211   // otherwise returns 0. This method can be called when
212   // TracksAllocationSizes() is false, but can be extremely slow.
213   //
214   // REQUIRES: 'ptr!=nullptr' and points to a buffer previously
215   // allocated by this allocator.
AllocatedSizeSlow(const void * ptr)216   virtual size_t AllocatedSizeSlow(const void* ptr) const {
217     if (TracksAllocationSizes()) {
218       return AllocatedSize(ptr);
219     }
220     return 0;
221   }
222 
223   // Fills in 'stats' with statistics collected by this allocator.
GetStats()224   virtual absl::optional<AllocatorStats> GetStats() { return absl::nullopt; }
225 
226   // Clears the internal stats except for the `in_use` field.
ClearStats()227   virtual void ClearStats() {}
228 
SetSafeFrontier(uint64 count)229   virtual void SetSafeFrontier(uint64 count) {}
230 };
231 
232 // An implementation of Allocator that delegates all calls to another Allocator.
233 //
234 // Useful to clients who want to override part of the functionality of another
235 // allocator.
236 class AllocatorWrapper : public Allocator {
237  public:
AllocatorWrapper(Allocator * wrapped)238   explicit AllocatorWrapper(Allocator* wrapped) : wrapped_(wrapped) {}
239 
~AllocatorWrapper()240   ~AllocatorWrapper() override {}
241 
242   // Returns the wrapped allocator to which all calls are delegated.
wrapped()243   Allocator* wrapped() const { return wrapped_; }
244 
Name()245   string Name() override { return wrapped_->Name(); }
246 
AllocateRaw(size_t alignment,size_t num_bytes)247   void* AllocateRaw(size_t alignment, size_t num_bytes) override {
248     return wrapped_->AllocateRaw(alignment, num_bytes);
249   }
250 
AllocateRaw(size_t alignment,size_t num_bytes,const AllocationAttributes & allocation_attr)251   void* AllocateRaw(size_t alignment, size_t num_bytes,
252                     const AllocationAttributes& allocation_attr) override {
253     return wrapped_->AllocateRaw(alignment, num_bytes, allocation_attr);
254   }
255 
DeallocateRaw(void * ptr)256   void DeallocateRaw(void* ptr) override { wrapped_->DeallocateRaw(ptr); }
257 
TracksAllocationSizes()258   bool TracksAllocationSizes() const override {
259     return wrapped_->TracksAllocationSizes();
260   }
261 
AllocatesOpaqueHandle()262   bool AllocatesOpaqueHandle() const override {
263     return wrapped_->AllocatesOpaqueHandle();
264   }
265 
RequestedSize(const void * ptr)266   size_t RequestedSize(const void* ptr) const override {
267     return wrapped_->RequestedSize(ptr);
268   }
269 
AllocatedSize(const void * ptr)270   size_t AllocatedSize(const void* ptr) const override {
271     return wrapped_->AllocatedSize(ptr);
272   }
273 
AllocationId(const void * ptr)274   int64 AllocationId(const void* ptr) const override {
275     return wrapped_->AllocationId(ptr);
276   }
277 
AllocatedSizeSlow(const void * ptr)278   size_t AllocatedSizeSlow(const void* ptr) const override {
279     return wrapped_->AllocatedSizeSlow(ptr);
280   }
281 
282  private:
283   Allocator* const wrapped_;
284 };
285 
286 // A tensorflow Op may need access to different kinds of memory that
287 // are not simply a function of the device to which the Op has been
288 // assigned.  For example, an Op executing on a GPU may still need
289 // to allocate CPU RAM for some purpose.  Internal to the tensorflow
290 // runtime we may choose to allocate CPU ram from special regions
291 // that have been prepared for higher performance in some use
292 // contexts, e.g. doing DMA with particular devices.  For these
293 // reasons, the Device interface does not expose just one memory
294 // Allocator, but instead provides an accessor that takes a
295 // specification of the desired memory attributes in order to select
296 // an Allocator.
297 //
298 // Example use:
299 //  // Allocator for ordinary device memory:
300 //  Allocator* a = allocator(AllocatorAttributes());
301 // ...
302 //  // Allocator for CPU RAM, regardless of where Op is executing:
303 //  AllocatorAttributes attr;
304 //  attr.set_on_host(true);
305 //  Allocator* a = allocator(attr);
306 struct AllocatorAttributes {
set_on_hostAllocatorAttributes307   void set_on_host(bool v) { value |= (static_cast<int>(v)); }
on_hostAllocatorAttributes308   bool on_host() const { return value & 0x1; }
set_nic_compatibleAllocatorAttributes309   void set_nic_compatible(bool v) { value |= (static_cast<int>(v) << 1); }
nic_compatibleAllocatorAttributes310   bool nic_compatible() const { return value & (0x1 << 1); }
set_gpu_compatibleAllocatorAttributes311   void set_gpu_compatible(bool v) { value |= (static_cast<int>(v) << 2); }
gpu_compatibleAllocatorAttributes312   bool gpu_compatible() const { return value & (0x1 << 2); }
MergeAllocatorAttributes313   void Merge(AllocatorAttributes other) {
314     value |= other.value;
315     if (scope_id != other.scope_id) {
316       CHECK(scope_id == 0 || other.scope_id == 0)
317           << "At least one scope_id should be zero to merge "
318              "AllocatorAttributes but found this.scope_id="
319           << scope_id << " and other.scope_id=" << other.scope_id;
320       scope_id = scope_id == 0 ? other.scope_id : scope_id;
321     }
322   }
323   // Returns true if the fields set in *this is a subset of or equal to
324   // those set in other.
IsEqualOrLessRestrictiveThanAllocatorAttributes325   bool IsEqualOrLessRestrictiveThan(const AllocatorAttributes& other) const {
326     return (value | other.value) == other.value;
327   }
328 
329   // NOTE: The upper 8 bits of the value are reserved for
330   // device-specific uses.  Implementors of a device can interpret these
331   // upper 8 bits in device-specific ways, and ops implemented for those
332   // devices are responsible for setting those 8 bits appropriately.
333   uint32 value = 0;
334   // EXPERIMENTAL: If this is greater than zero, then allocation is delegated to
335   // a named special-purpose allocator on the same device.
336   int32 scope_id = 0;
337 
338   // Returns a human readable representation of this.
339   string DebugString() const;
340 };
341 
342 // Returns a trivial implementation of Allocator, which is a process singleton.
343 // Access through this function is only intended for use by restricted parts
344 // of the infrastructure.
345 Allocator* cpu_allocator_base();
346 
347 // If available, calls ProcessState::GetCPUAllocator(numa_node).
348 // If not, falls back to cpu_allocator_base().
349 // Intended for use in contexts where ProcessState is not visible at
350 // compile time. Where ProcessState is visible, it's preferable to
351 // call it directly.
352 Allocator* cpu_allocator(int numa_node = port::kNUMANoAffinity);
353 
354 // If 'enable' is true, the default CPU allocator implementation will collect
355 // AllocatorStats. By default, it's disabled.
356 void EnableCPUAllocatorStats(bool enable);
357 bool CPUAllocatorStatsEnabled();
358 
359 // If 'enable' is true, the default CPU allocator implementation will collect
360 // full statistics. By default, it's disabled.
361 void EnableCPUAllocatorFullStats(bool enable);
362 bool CPUAllocatorFullStatsEnabled();
363 
364 // An object that does the underlying suballoc/free of memory for a higher-level
365 // allocator.  The expectation is that the higher-level allocator is doing some
366 // kind of cache or pool management so that it will call SubAllocator::Alloc and
367 // Free relatively infrequently, compared to the number of times its own
368 // AllocateRaw and Free methods are called.
369 class SubAllocator {
370  public:
371   // Visitor gets called with a pointer to a memory area and its
372   // size in bytes.  The index value will be numa_node for a CPU
373   // allocator and GPU id for a GPU allocator.
374   typedef std::function<void(void*, int index, size_t)> Visitor;
375 
376   SubAllocator(const std::vector<Visitor>& alloc_visitors,
377                const std::vector<Visitor>& free_visitors);
378 
~SubAllocator()379   virtual ~SubAllocator() {}
380   virtual void* Alloc(size_t alignment, size_t num_bytes) = 0;
381   virtual void Free(void* ptr, size_t num_bytes) = 0;
382 
383  protected:
384   // Implementation of Alloc() method must call this on newly allocated
385   // value.
386   void VisitAlloc(void* ptr, int index, size_t num_bytes);
387 
388   // Implementation of Free() method must call this on value to be
389   // freed immediately before deallocation.
390   void VisitFree(void* ptr, int index, size_t num_bytes);
391 
392   const std::vector<Visitor> alloc_visitors_;
393   const std::vector<Visitor> free_visitors_;
394 };
395 
396 }  // namespace tensorflow
397 
398 #endif  // TENSORFLOW_CORE_FRAMEWORK_ALLOCATOR_H_
399