• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_PROCESS_STATE_H_
17 #define TENSORFLOW_CORE_COMMON_RUNTIME_PROCESS_STATE_H_
18 
19 #include <functional>
20 #include <map>
21 #include <unordered_map>
22 #include <vector>
23 
24 #include "tensorflow/core/framework/allocator.h"
25 #include "tensorflow/core/framework/allocator_registry.h"
26 #include "tensorflow/core/platform/mutex.h"
27 #include "tensorflow/core/platform/thread_annotations.h"
28 #include "tensorflow/core/platform/types.h"
29 #include "tensorflow/core/protobuf/config.pb.h"
30 
31 namespace tensorflow {
32 
33 class Allocator;
34 class PoolAllocator;
35 
36 // Singleton that manages per-process state, e.g. allocation of
37 // shared resources.
38 class ProcessState : public ProcessStateInterface {
39  public:
40   static ProcessState* singleton();
41 
42   // Descriptor for memory allocation attributes, used by optional
43   // runtime correctness analysis logic.
44   struct MemDesc {
45     enum MemLoc { CPU, GPU };
46     MemLoc loc;
47     int dev_index;
48     bool gpu_registered;
49     bool nic_registered;
MemDescMemDesc50     MemDesc()
51         : loc(CPU),
52           dev_index(0),
53           gpu_registered(false),
54           nic_registered(false) {}
55     string DebugString();
56   };
57 
58   // If NUMA Allocators are desired, call this before calling any
59   // Allocator accessor.
EnableNUMA()60   void EnableNUMA() { numa_enabled_ = true; }
61 
62   // Returns what we know about the memory at ptr.
63   // If we know nothing, it's called CPU 0 with no other attributes.
64   MemDesc PtrType(const void* ptr);
65 
66   // Returns the one CPUAllocator used for the given numa_node.
67   // Treats numa_node == kNUMANoAffinity as numa_node == 0.
68   Allocator* GetCPUAllocator(int numa_node) override;
69 
70   // Registers alloc visitor for the CPU allocator(s).
71   // REQUIRES: must be called before GetCPUAllocator.
72   void AddCPUAllocVisitor(SubAllocator::Visitor v);
73 
74   // Registers free visitor for the CPU allocator(s).
75   // REQUIRES: must be called before GetCPUAllocator.
76   void AddCPUFreeVisitor(SubAllocator::Visitor v);
77 
78   typedef std::unordered_map<const void*, MemDesc> MDMap;
79 
80  protected:
81   ProcessState();
~ProcessState()82   virtual ~ProcessState() {}
83   friend class GPUProcessState;
84 
85   // If these flags need to be runtime configurable consider adding
86   // them to ConfigProto.
87   static const bool FLAGS_brain_mem_reg_gpu_dma = true;
88   static const bool FLAGS_brain_gpu_record_mem_types = false;
89 
90   // Helper method for unit tests to reset the ProcessState singleton by
91   // cleaning up everything. Never use in production.
92   void TestOnlyReset();
93 
94   static ProcessState* instance_;
95   bool numa_enabled_;
96 
97   mutex mu_;
98 
99   // Indexed by numa_node.  If we want numa-specific allocators AND a
100   // non-specific allocator, maybe should index by numa_node+1.
101   std::vector<Allocator*> cpu_allocators_ GUARDED_BY(mu_);
102   std::vector<SubAllocator::Visitor> cpu_alloc_visitors_ GUARDED_BY(mu_);
103   std::vector<SubAllocator::Visitor> cpu_free_visitors_ GUARDED_BY(mu_);
104 
105   // Optional RecordingAllocators that wrap the corresponding
106   // Allocators for runtime attribute use analysis.
107   MDMap mem_desc_map_;
108   std::vector<Allocator*> cpu_al_ GUARDED_BY(mu_);
109 };
110 
111 namespace internal {
112 class RecordingAllocator : public Allocator {
113  public:
RecordingAllocator(ProcessState::MDMap * mm,Allocator * a,ProcessState::MemDesc md,mutex * mu)114   RecordingAllocator(ProcessState::MDMap* mm, Allocator* a,
115                      ProcessState::MemDesc md, mutex* mu)
116       : mm_(mm), a_(a), md_(md), mu_(mu) {}
117 
Name()118   string Name() override { return a_->Name(); }
AllocateRaw(size_t alignment,size_t num_bytes)119   void* AllocateRaw(size_t alignment, size_t num_bytes) override {
120     void* p = a_->AllocateRaw(alignment, num_bytes);
121     mutex_lock l(*mu_);
122     (*mm_)[p] = md_;
123     return p;
124   }
DeallocateRaw(void * p)125   void DeallocateRaw(void* p) override {
126     mutex_lock l(*mu_);
127     auto iter = mm_->find(p);
128     mm_->erase(iter);
129     a_->DeallocateRaw(p);
130   }
TracksAllocationSizes()131   bool TracksAllocationSizes() const override {
132     return a_->TracksAllocationSizes();
133   }
RequestedSize(const void * p)134   size_t RequestedSize(const void* p) const override {
135     return a_->RequestedSize(p);
136   }
AllocatedSize(const void * p)137   size_t AllocatedSize(const void* p) const override {
138     return a_->AllocatedSize(p);
139   }
GetStats()140   absl::optional<AllocatorStats> GetStats() override { return a_->GetStats(); }
ClearStats()141   void ClearStats() override { a_->ClearStats(); }
142   ProcessState::MDMap* mm_;  // not owned
143   Allocator* a_;             // not owned
144   ProcessState::MemDesc md_;
145   mutex* mu_;
146 };
147 }  // namespace internal
148 }  // namespace tensorflow
149 #endif  // TENSORFLOW_CORE_COMMON_RUNTIME_PROCESS_STATE_H_
150