• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/core/common_runtime/gpu/gpu_debug_allocator.h"
17 
18 #include <cstddef>
19 #include <vector>
20 
21 #include "tensorflow/core/common_runtime/gpu/gpu_id.h"
22 #include "tensorflow/core/common_runtime/gpu/gpu_id_utils.h"
23 #include "tensorflow/core/common_runtime/gpu/gpu_init.h"
24 #include "tensorflow/core/platform/stream_executor.h"
25 
26 #define MASK_WORDS 2
27 #define MASK_BYTES (MASK_WORDS * sizeof(int64))
28 
29 namespace tensorflow {
30 namespace {
31 
NewMask(int64 word)32 int64* NewMask(int64 word) {
33   int64* m = new int64[MASK_WORDS];
34   for (int i = 0; i < MASK_WORDS; ++i) {
35     m[i] = word;
36   }
37   return m;
38 }
39 
40 int64* before_mask = NewMask(0xabababababababab);
41 int64* after_mask = NewMask(0xcdcdcdcdcdcdcdcd);
42 
CheckMask(se::StreamExecutor * exec,void * ptr,int64 * mask)43 bool CheckMask(se::StreamExecutor* exec, void* ptr, int64* mask) {
44   se::DeviceMemory<int64> gpu_ptr{se::DeviceMemoryBase{ptr, MASK_BYTES}};
45   int64 tmp[MASK_WORDS];
46 
47   Status result = exec->SynchronousMemcpyD2H(gpu_ptr, MASK_BYTES, tmp);
48   if (!result.ok()) {
49     LOG(FATAL) << "Could not copy debug mask, " << result;
50   }
51 
52   bool ok = true;
53   for (int i = 0; i < MASK_WORDS; ++i) {
54     ok &= (mask[i] == tmp[i]);
55     if (!ok) {
56       LOG(ERROR) << "i=" << i
57                  << " mask=" << reinterpret_cast<const void*>(mask[i])
58                  << " field=" << reinterpret_cast<const void*>(tmp[i]);
59     }
60   }
61 
62   return ok;
63 }
64 
InitMask(se::StreamExecutor * exec,void * ptr,int64 * mask)65 void InitMask(se::StreamExecutor* exec, void* ptr, int64* mask) {
66   se::DeviceMemory<int64> gpu_ptr{se::DeviceMemoryBase{ptr, MASK_BYTES}};
67   Status result = exec->SynchronousMemcpyH2D(mask, MASK_BYTES, &gpu_ptr);
68   if (!result.ok()) {
69     LOG(FATAL) << "Could not copy debug mask, " << result;
70   }
71 }
72 
73 }  // namespace
74 
75 // -----------------------------------------------------------------------------
76 // GPUDebugAllocator
77 // -----------------------------------------------------------------------------
GPUDebugAllocator(Allocator * allocator,PlatformGpuId platform_gpu_id)78 GPUDebugAllocator::GPUDebugAllocator(Allocator* allocator,
79                                      PlatformGpuId platform_gpu_id)
80     : base_allocator_(allocator) {
81   stream_exec_ =
82       GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
83 }
84 
~GPUDebugAllocator()85 GPUDebugAllocator::~GPUDebugAllocator() { delete base_allocator_; }
86 
AllocateRaw(size_t alignment,size_t num_bytes)87 void* GPUDebugAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
88   num_bytes += (2 * MASK_BYTES);
89   void* allocated_ptr = base_allocator_->AllocateRaw(alignment, num_bytes);
90   if (allocated_ptr == nullptr) return allocated_ptr;
91 
92   // Return the pointer after the header
93   void* rv = static_cast<char*>(allocated_ptr) + MASK_BYTES;
94 
95   // Write the header at allocated_ptr
96   InitMask(stream_exec_, allocated_ptr, before_mask);
97 
98   // Write the footer at the end.
99   size_t req_size = base_allocator_->RequestedSize(allocated_ptr);
100   InitMask(stream_exec_,
101            static_cast<char*>(allocated_ptr) + req_size - MASK_BYTES,
102            after_mask);
103   return rv;
104 }
DeallocateRaw(void * ptr)105 void GPUDebugAllocator::DeallocateRaw(void* ptr) {
106   if (ptr != nullptr) {
107     CHECK(CheckHeader(ptr)) << "before_mask has been overwritten";
108     CHECK(CheckFooter(ptr)) << "after_mask has been overwritten";
109 
110     // Backtrack to the beginning of the header.
111     ptr = static_cast<void*>(static_cast<char*>(ptr) - MASK_BYTES);
112   }
113   // Deallocate the memory
114   base_allocator_->DeallocateRaw(ptr);
115 }
116 
TracksAllocationSizes()117 bool GPUDebugAllocator::TracksAllocationSizes() { return true; }
118 
RequestedSize(const void * ptr)119 size_t GPUDebugAllocator::RequestedSize(const void* ptr) {
120   auto req_size = base_allocator_->RequestedSize(static_cast<const char*>(ptr) -
121                                                  MASK_BYTES);
122   return req_size - 2 * MASK_BYTES;
123 }
124 
AllocatedSize(const void * ptr)125 size_t GPUDebugAllocator::AllocatedSize(const void* ptr) {
126   return base_allocator_->AllocatedSize(static_cast<const char*>(ptr) -
127                                         MASK_BYTES);
128 }
129 
AllocationId(const void * ptr)130 int64 GPUDebugAllocator::AllocationId(const void* ptr) {
131   return base_allocator_->AllocationId(static_cast<const char*>(ptr) -
132                                        MASK_BYTES);
133 }
134 
GetStats()135 absl::optional<AllocatorStats> GPUDebugAllocator::GetStats() {
136   return base_allocator_->GetStats();
137 }
138 
ClearStats()139 void GPUDebugAllocator::ClearStats() { base_allocator_->ClearStats(); }
140 
CheckHeader(void * ptr)141 bool GPUDebugAllocator::CheckHeader(void* ptr) {
142   return CheckMask(stream_exec_, static_cast<char*>(ptr) - MASK_BYTES,
143                    before_mask);
144 }
145 
CheckFooter(void * ptr)146 bool GPUDebugAllocator::CheckFooter(void* ptr) {
147   char* original_ptr = static_cast<char*>(ptr) - MASK_BYTES;
148   size_t req_size = base_allocator_->RequestedSize(original_ptr);
149   return CheckMask(stream_exec_, original_ptr + req_size - MASK_BYTES,
150                    after_mask);
151 }
152 
153 // -----------------------------------------------------------------------------
154 // GPUNanResetAllocator
155 // -----------------------------------------------------------------------------
GPUNanResetAllocator(Allocator * allocator,PlatformGpuId platform_gpu_id)156 GPUNanResetAllocator::GPUNanResetAllocator(Allocator* allocator,
157                                            PlatformGpuId platform_gpu_id)
158     : base_allocator_(allocator) {
159   stream_exec_ =
160       GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie();
161 }
162 
~GPUNanResetAllocator()163 GPUNanResetAllocator::~GPUNanResetAllocator() { delete base_allocator_; }
164 
AllocateRaw(size_t alignment,size_t num_bytes)165 void* GPUNanResetAllocator::AllocateRaw(size_t alignment, size_t num_bytes) {
166   void* allocated_ptr = base_allocator_->AllocateRaw(alignment, num_bytes);
167   if (allocated_ptr == nullptr) return allocated_ptr;
168 
169   // Initialize the buffer to Nans
170   size_t req_size = base_allocator_->RequestedSize(allocated_ptr);
171   std::vector<float> nans((req_size + sizeof(float) - 1) / sizeof(float),
172                           std::nanf(""));
173   se::DeviceMemory<float> nan_ptr{
174       se::DeviceMemoryBase{static_cast<float*>(allocated_ptr), req_size}};
175 
176   Status result =
177       stream_exec_->SynchronousMemcpyH2D(&nans[0], req_size, &nan_ptr);
178   if (!result.ok()) {
179     LOG(ERROR) << "Could not initialize to NaNs, " << result;
180   }
181 
182   return allocated_ptr;
183 }
DeallocateRaw(void * ptr)184 void GPUNanResetAllocator::DeallocateRaw(void* ptr) {
185   if (ptr != nullptr) {
186     // Reset the buffer to Nans
187     size_t req_size = base_allocator_->RequestedSize(ptr);
188     std::vector<float> nans((req_size + sizeof(float) - 1) / sizeof(float),
189                             std::nanf(""));
190     se::DeviceMemory<float> nan_ptr{
191         se::DeviceMemoryBase{static_cast<float*>(ptr), req_size}};
192     Status result =
193         stream_exec_->SynchronousMemcpyH2D(&nans[0], req_size, &nan_ptr);
194     if (!result.ok()) {
195       LOG(ERROR) << "Could not initialize to NaNs, " << result;
196     }
197   }
198 
199   // Deallocate the memory
200   base_allocator_->DeallocateRaw(ptr);
201 }
202 
RequestedSize(const void * ptr)203 size_t GPUNanResetAllocator::RequestedSize(const void* ptr) {
204   return base_allocator_->RequestedSize(ptr);
205 }
206 
AllocatedSize(const void * ptr)207 size_t GPUNanResetAllocator::AllocatedSize(const void* ptr) {
208   return base_allocator_->AllocatedSize(ptr);
209 }
210 
GetStats()211 absl::optional<AllocatorStats> GPUNanResetAllocator::GetStats() {
212   return base_allocator_->GetStats();
213 }
214 
ClearStats()215 void GPUNanResetAllocator::ClearStats() { base_allocator_->ClearStats(); }
216 
217 }  // namespace tensorflow
218