1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
17
18 #include "tensorflow/core/common_runtime/pool_allocator.h"
19
20 #include "gpu_init.h"
21 #include "tensorflow/core/common_runtime/device/device_host_allocator.h"
22 #include "tensorflow/core/platform/stream_executor.h"
23 #include "tensorflow/core/platform/test.h"
24 namespace tensorflow {
25 namespace {
26
TEST(PoolAllocatorTest,ZeroSizeBuffers)27 TEST(PoolAllocatorTest, ZeroSizeBuffers) {
28 se::Platform* platform =
29 se::MultiPlatformManager::PlatformWithName(GpuPlatformName())
30 .ValueOrDie();
31 PoolAllocator pool(
32 2 /*pool_size_limit*/, false /*auto_resize*/,
33 new DeviceHostAllocator(
34 platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
35 .ValueOrDie(),
36 0 /*numa_node*/, {}, {}),
37 new NoopRounder, "pool");
38
39 EXPECT_EQ(nullptr, pool.AllocateRaw(4 /*alignment*/, 0 /*num_bytes*/));
40 pool.DeallocateRaw(nullptr); // Should not crash.
41 EXPECT_EQ(0, pool.get_from_pool_count());
42 EXPECT_EQ(0, pool.put_count());
43 EXPECT_EQ(0, pool.allocated_count());
44 EXPECT_EQ(0, pool.evicted_count());
45 }
46
TEST(PoolAllocatorTest,ZeroSizePool)47 TEST(PoolAllocatorTest, ZeroSizePool) {
48 se::Platform* platform =
49 se::MultiPlatformManager::PlatformWithName(GpuPlatformName())
50 .ValueOrDie();
51 PoolAllocator pool(
52 0 /*pool_size_limit*/, false /*auto_resize*/,
53 new DeviceHostAllocator(
54 platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
55 .ValueOrDie(),
56 0 /*numa_node*/, {}, {}),
57 new NoopRounder, "pool");
58
59 EXPECT_EQ(0, pool.get_from_pool_count());
60 EXPECT_EQ(0, pool.put_count());
61 EXPECT_EQ(0, pool.allocated_count());
62 EXPECT_EQ(0, pool.evicted_count());
63
64 // All allocations should bypass the pool and return valid pointers.
65 for (int i = 0; i < 3; ++i) {
66 void* p0 = pool.AllocateRaw(4, 0);
67 void* p4 = pool.AllocateRaw(4, 4);
68 void* p12 = pool.AllocateRaw(4, 12);
69 EXPECT_EQ(nullptr, p0);
70 EXPECT_NE(nullptr, p4);
71 EXPECT_NE(nullptr, p12);
72 pool.DeallocateRaw(p0);
73 pool.DeallocateRaw(p4);
74 pool.DeallocateRaw(p12);
75 }
76 EXPECT_EQ(0, pool.get_from_pool_count());
77 EXPECT_EQ(0, pool.put_count());
78 EXPECT_EQ(0, pool.allocated_count());
79 EXPECT_EQ(0, pool.evicted_count());
80 }
81
TEST(PoolAllocatorTest,Alignment)82 TEST(PoolAllocatorTest, Alignment) {
83 se::Platform* platform =
84 se::MultiPlatformManager::PlatformWithName(GpuPlatformName())
85 .ValueOrDie();
86 PoolAllocator pool(
87 0 /*pool_size_limit*/, false /*auto_resize*/,
88 new DeviceHostAllocator(
89 platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
90 .ValueOrDie(),
91 0 /*numa_node*/, {}, {}),
92 new NoopRounder, "pool");
93 for (int i = 0; i < 16; ++i) {
94 size_t alignment = 1 << i;
95 void* p = pool.AllocateRaw(alignment, 111);
96 EXPECT_TRUE(p != nullptr);
97 EXPECT_EQ(0, reinterpret_cast<int64>(p) & (alignment - 1))
98 << "ptr: " << p << " alignment " << alignment;
99 // Intentionally don't deallocate, to test that destruction of
100 // the PoolAllocator frees all pending memory.
101 }
102 }
103
TEST(PoolAllocatorTest,AutoResize)104 TEST(PoolAllocatorTest, AutoResize) {
105 PoolAllocator pool(2 /*pool_size_limit*/, true /*auto_resize*/,
106 new BasicCPUAllocator(0 /*numa_node*/, {}, {}),
107 new NoopRounder, "pool");
108
109 // Alloc/dealloc 10 sizes just a few times, confirming pool size
110 // stays at 2.
111 for (int i = 0; i < 10; ++i) {
112 void* p = pool.AllocateRaw(4, 64 << i);
113 pool.DeallocateRaw(p);
114 }
115 EXPECT_EQ(0, pool.get_from_pool_count());
116 EXPECT_EQ(10, pool.allocated_count());
117 EXPECT_EQ(10, pool.put_count());
118 EXPECT_EQ(8, pool.evicted_count());
119 EXPECT_EQ(2, pool.size_limit());
120
121 // Then repeat 1200 times. Pool size limit should jump to 100.
122 for (int j = 0; j < 120; ++j) {
123 for (int i = 0; i < 10; ++i) {
124 void* p = pool.AllocateRaw(4, 64 << i);
125 pool.DeallocateRaw(p);
126 }
127 }
128 EXPECT_EQ(100, pool.size_limit());
129 }
130
TEST(PoolAllocatorTest,CudaHostAllocator)131 TEST(PoolAllocatorTest, CudaHostAllocator) {
132 int alloc_count = 0;
133 int64 alloc_size = 0;
134 SubAllocator::Visitor alloc_visitor =
135 [&alloc_count, &alloc_size](void* ptr, int numa_node, int64 size) {
136 ++alloc_count;
137 alloc_size += size;
138 };
139 int free_count = 0;
140 int64 free_size = 0;
141 SubAllocator::Visitor free_visitor =
142 [&free_count, &free_size](void* ptr, int numa_node, int64 size) {
143 ++free_count;
144 free_size += size;
145 };
146 se::Platform* platform =
147 se::MultiPlatformManager::PlatformWithName(GpuPlatformName())
148 .ValueOrDie();
149 DeviceHostAllocator* sub_allocator = new DeviceHostAllocator(
150 platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
151 .ValueOrDie(),
152 0 /*numa_node*/, {alloc_visitor}, {free_visitor});
153 PoolAllocator pool(2 /*pool_size_limit*/, false /*auto_resize*/,
154 sub_allocator, new NoopRounder, "pool");
155 EXPECT_EQ(0, alloc_count);
156 EXPECT_EQ(0, alloc_size);
157 EXPECT_EQ(0, free_count);
158 EXPECT_EQ(0, free_size);
159
160 // Repeatedly Get a 16-byte value, confirming that there's only
161 // one real allocation.
162 void* p1_16 = pool.AllocateRaw(4, 16);
163 EXPECT_EQ(0, pool.get_from_pool_count());
164 EXPECT_EQ(1, pool.allocated_count());
165 EXPECT_NE(nullptr, p1_16);
166 EXPECT_EQ(1, alloc_count); // Underlying suballoc of 16 bytes
167 // Each suballocation includes a 16B ChunkPrefix.
168 static const int kChunkPrefixSize = 16;
169 EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size);
170 pool.DeallocateRaw(p1_16);
171 // Pool contents {16}
172 EXPECT_EQ(1, pool.put_count());
173 void* p2_16 = pool.AllocateRaw(4, 16); // Get it again.
174 EXPECT_EQ(1, pool.get_from_pool_count());
175 EXPECT_EQ(1, pool.allocated_count());
176 EXPECT_EQ(p1_16, p2_16); // Same pointer value
177 pool.DeallocateRaw(p2_16); // Put it back.
178 // Pool contents {16}
179 EXPECT_EQ(2, pool.put_count());
180 EXPECT_EQ(1, alloc_count); // Underlying suballoc of 16 bytes
181 EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size);
182 EXPECT_EQ(0, free_count);
183
184 // Get two more values of different sizes.
185 void* p3_4 = pool.AllocateRaw(4, 4);
186 EXPECT_EQ(2, pool.allocated_count());
187 EXPECT_NE(p1_16, p3_4); // Different pointer value
188 EXPECT_NE(nullptr, p3_4);
189 pool.DeallocateRaw(p3_4); // Put it back. Pool is now full.
190 // Pool contents {4, 16}
191 EXPECT_EQ(3, pool.put_count());
192 void* p4_2 = pool.AllocateRaw(4, 2); // Get a third size buffer.
193 EXPECT_NE(nullptr, p4_2);
194 EXPECT_EQ(0, pool.evicted_count());
195 EXPECT_EQ(3, alloc_count);
196 EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
197 EXPECT_EQ(0, free_count);
198
199 // The pool is full: when we put back p4_2, the 16-byte buffer
200 // should be evicted since it was least recently inserted.
201 pool.DeallocateRaw(p4_2);
202 // Pool contents {2, 4}
203 EXPECT_EQ(4, pool.put_count());
204 EXPECT_EQ(1, pool.evicted_count());
205 EXPECT_EQ(3, alloc_count);
206 EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
207 EXPECT_EQ(1, free_count);
208 EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size);
209
210 // Re-getting and putting size 2 or 4 should not alter pool size or
211 // num-evicted.
212 void* p5_4 = pool.AllocateRaw(4, 4);
213 EXPECT_NE(nullptr, p5_4);
214 pool.DeallocateRaw(p5_4);
215 void* p6_2 = pool.AllocateRaw(4, 2);
216 EXPECT_NE(nullptr, p6_2);
217 pool.DeallocateRaw(p6_2);
218 EXPECT_EQ(3, pool.get_from_pool_count());
219 EXPECT_EQ(6, pool.put_count());
220 EXPECT_EQ(3, pool.allocated_count());
221 EXPECT_EQ(1, pool.evicted_count());
222 EXPECT_EQ(3, alloc_count);
223 EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
224 EXPECT_EQ(1, free_count);
225 EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size);
226
227 pool.Clear();
228 EXPECT_EQ(0, pool.get_from_pool_count());
229 EXPECT_EQ(0, pool.put_count());
230 EXPECT_EQ(0, pool.allocated_count());
231 EXPECT_EQ(0, pool.evicted_count());
232 EXPECT_EQ(3, alloc_count);
233 EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
234 EXPECT_EQ(3, free_count);
235 EXPECT_EQ(16 + 4 + 2 + (free_count * kChunkPrefixSize), free_size);
236 }
237
TEST(PoolAllocatorTest,Pow2Rounder)238 TEST(PoolAllocatorTest, Pow2Rounder) {
239 Pow2Rounder rounder;
240 EXPECT_EQ(1, rounder.RoundUp(1));
241 EXPECT_EQ(2, rounder.RoundUp(2));
242 EXPECT_EQ(16, rounder.RoundUp(9));
243 EXPECT_EQ(16, rounder.RoundUp(16));
244 EXPECT_EQ(65536, rounder.RoundUp(41234));
245 EXPECT_EQ(65536, rounder.RoundUp(65535));
246 EXPECT_EQ(65536, rounder.RoundUp(65536));
247 }
248
TEST(PoolAllocatorTest,Name)249 TEST(PoolAllocatorTest, Name) {
250 se::Platform* platform =
251 se::MultiPlatformManager::PlatformWithName(GpuPlatformName())
252 .ValueOrDie();
253 PoolAllocator pool(
254 2 /*pool_size_limit*/, false /*auto_resize*/,
255 new DeviceHostAllocator(
256 platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
257 .ValueOrDie(),
258 0 /*numa_node*/, {}, {}),
259 new NoopRounder, "pool");
260 EXPECT_EQ("pool", pool.Name());
261 }
262
263 } // namespace
264 } // namespace tensorflow
265
266 #endif // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
267