1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #if GOOGLE_CUDA
17
18 #include "tensorflow/core/common_runtime/pool_allocator.h"
19
20 #include "tensorflow/core/common_runtime/gpu/gpu_host_allocator.h"
21 #include "tensorflow/core/platform/stream_executor.h"
22 #include "tensorflow/core/platform/test.h"
23
24 namespace tensorflow {
25 namespace {
26
TEST(PoolAllocatorTest,ZeroSizeBuffers)27 TEST(PoolAllocatorTest, ZeroSizeBuffers) {
28 se::Platform* platform =
29 se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
30 PoolAllocator pool(
31 2 /*pool_size_limit*/, false /*auto_resize*/,
32 new GpuHostAllocator(
33 platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
34 .ValueOrDie(),
35 0 /*numa_node*/, {}, {}),
36 new NoopRounder, "pool");
37
38 EXPECT_EQ(nullptr, pool.AllocateRaw(4 /*alignment*/, 0 /*num_bytes*/));
39 pool.DeallocateRaw(nullptr); // Should not crash.
40 EXPECT_EQ(0, pool.get_from_pool_count());
41 EXPECT_EQ(0, pool.put_count());
42 EXPECT_EQ(0, pool.allocated_count());
43 EXPECT_EQ(0, pool.evicted_count());
44 }
45
TEST(PoolAllocatorTest,ZeroSizePool)46 TEST(PoolAllocatorTest, ZeroSizePool) {
47 se::Platform* platform =
48 se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
49 PoolAllocator pool(
50 0 /*pool_size_limit*/, false /*auto_resize*/,
51 new GpuHostAllocator(
52 platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
53 .ValueOrDie(),
54 0 /*numa_node*/, {}, {}),
55 new NoopRounder, "pool");
56
57 EXPECT_EQ(0, pool.get_from_pool_count());
58 EXPECT_EQ(0, pool.put_count());
59 EXPECT_EQ(0, pool.allocated_count());
60 EXPECT_EQ(0, pool.evicted_count());
61
62 // All allocations should bypass the pool and return valid pointers.
63 for (int i = 0; i < 3; ++i) {
64 void* p0 = pool.AllocateRaw(4, 0);
65 void* p4 = pool.AllocateRaw(4, 4);
66 void* p12 = pool.AllocateRaw(4, 12);
67 EXPECT_EQ(nullptr, p0);
68 EXPECT_NE(nullptr, p4);
69 EXPECT_NE(nullptr, p12);
70 pool.DeallocateRaw(p0);
71 pool.DeallocateRaw(p4);
72 pool.DeallocateRaw(p12);
73 }
74 EXPECT_EQ(0, pool.get_from_pool_count());
75 EXPECT_EQ(0, pool.put_count());
76 EXPECT_EQ(0, pool.allocated_count());
77 EXPECT_EQ(0, pool.evicted_count());
78 }
79
TEST(PoolAllocatorTest,Alignment)80 TEST(PoolAllocatorTest, Alignment) {
81 se::Platform* platform =
82 se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
83 PoolAllocator pool(
84 0 /*pool_size_limit*/, false /*auto_resize*/,
85 new GpuHostAllocator(
86 platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
87 .ValueOrDie(),
88 0 /*numa_node*/, {}, {}),
89 new NoopRounder, "pool");
90 for (int i = 0; i < 16; ++i) {
91 size_t alignment = 1 << i;
92 void* p = pool.AllocateRaw(alignment, 111);
93 EXPECT_TRUE(p != nullptr);
94 EXPECT_EQ(0, reinterpret_cast<int64>(p) & (alignment - 1))
95 << "ptr: " << p << " alignment " << alignment;
96 // Intentionally don't deallocate, to test that destruction of
97 // the PoolAllocator frees all pending memory.
98 }
99 }
100
TEST(PoolAllocatorTest,AutoResize)101 TEST(PoolAllocatorTest, AutoResize) {
102 PoolAllocator pool(2 /*pool_size_limit*/, true /*auto_resize*/,
103 new BasicCPUAllocator(0 /*numa_node*/, {}, {}),
104 new NoopRounder, "pool");
105
106 // Alloc/dealloc 10 sizes just a few times, confirming pool size
107 // stays at 2.
108 for (int i = 0; i < 10; ++i) {
109 void* p = pool.AllocateRaw(4, 64 << i);
110 pool.DeallocateRaw(p);
111 }
112 EXPECT_EQ(0, pool.get_from_pool_count());
113 EXPECT_EQ(10, pool.allocated_count());
114 EXPECT_EQ(10, pool.put_count());
115 EXPECT_EQ(8, pool.evicted_count());
116 EXPECT_EQ(2, pool.size_limit());
117
118 // Then repeat 1200 times. Pool size limit should jump to 100.
119 for (int j = 0; j < 120; ++j) {
120 for (int i = 0; i < 10; ++i) {
121 void* p = pool.AllocateRaw(4, 64 << i);
122 pool.DeallocateRaw(p);
123 }
124 }
125 EXPECT_EQ(100, pool.size_limit());
126 }
127
TEST(PoolAllocatorTest,CudaHostAllocator)128 TEST(PoolAllocatorTest, CudaHostAllocator) {
129 int alloc_count = 0;
130 int64 alloc_size = 0;
131 SubAllocator::Visitor alloc_visitor =
132 [&alloc_count, &alloc_size](void* ptr, int numa_node, int64 size) {
133 ++alloc_count;
134 alloc_size += size;
135 };
136 int free_count = 0;
137 int64 free_size = 0;
138 SubAllocator::Visitor free_visitor =
139 [&free_count, &free_size](void* ptr, int numa_node, int64 size) {
140 ++free_count;
141 free_size += size;
142 };
143 se::Platform* platform =
144 se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
145 GpuHostAllocator* sub_allocator = new GpuHostAllocator(
146 platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
147 .ValueOrDie(),
148 0 /*numa_node*/, {alloc_visitor}, {free_visitor});
149 PoolAllocator pool(2 /*pool_size_limit*/, false /*auto_resize*/,
150 sub_allocator, new NoopRounder, "pool");
151 EXPECT_EQ(0, alloc_count);
152 EXPECT_EQ(0, alloc_size);
153 EXPECT_EQ(0, free_count);
154 EXPECT_EQ(0, free_size);
155
156 // Repeatedly Get a 16-byte value, confirming that there's only
157 // one real allocation.
158 void* p1_16 = pool.AllocateRaw(4, 16);
159 EXPECT_EQ(0, pool.get_from_pool_count());
160 EXPECT_EQ(1, pool.allocated_count());
161 EXPECT_NE(nullptr, p1_16);
162 EXPECT_EQ(1, alloc_count); // Underlying suballoc of 16 bytes
163 // Each suballocation includes a 16B ChunkPrefix.
164 static const int kChunkPrefixSize = 16;
165 EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size);
166 pool.DeallocateRaw(p1_16);
167 // Pool contents {16}
168 EXPECT_EQ(1, pool.put_count());
169 void* p2_16 = pool.AllocateRaw(4, 16); // Get it again.
170 EXPECT_EQ(1, pool.get_from_pool_count());
171 EXPECT_EQ(1, pool.allocated_count());
172 EXPECT_EQ(p1_16, p2_16); // Same pointer value
173 pool.DeallocateRaw(p2_16); // Put it back.
174 // Pool contents {16}
175 EXPECT_EQ(2, pool.put_count());
176 EXPECT_EQ(1, alloc_count); // Underlying suballoc of 16 bytes
177 EXPECT_EQ(16 + (alloc_count * kChunkPrefixSize), alloc_size);
178 EXPECT_EQ(0, free_count);
179
180 // Get two more values of different sizes.
181 void* p3_4 = pool.AllocateRaw(4, 4);
182 EXPECT_EQ(2, pool.allocated_count());
183 EXPECT_NE(p1_16, p3_4); // Different pointer value
184 EXPECT_NE(nullptr, p3_4);
185 pool.DeallocateRaw(p3_4); // Put it back. Pool is now full.
186 // Pool contents {4, 16}
187 EXPECT_EQ(3, pool.put_count());
188 void* p4_2 = pool.AllocateRaw(4, 2); // Get a third size buffer.
189 EXPECT_NE(nullptr, p4_2);
190 EXPECT_EQ(0, pool.evicted_count());
191 EXPECT_EQ(3, alloc_count);
192 EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
193 EXPECT_EQ(0, free_count);
194
195 // The pool is full: when we put back p4_2, the 16-byte buffer
196 // should be evicted since it was least recently inserted.
197 pool.DeallocateRaw(p4_2);
198 // Pool contents {2, 4}
199 EXPECT_EQ(4, pool.put_count());
200 EXPECT_EQ(1, pool.evicted_count());
201 EXPECT_EQ(3, alloc_count);
202 EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
203 EXPECT_EQ(1, free_count);
204 EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size);
205
206 // Re-getting and putting size 2 or 4 should not alter pool size or
207 // num-evicted.
208 void* p5_4 = pool.AllocateRaw(4, 4);
209 EXPECT_NE(nullptr, p5_4);
210 pool.DeallocateRaw(p5_4);
211 void* p6_2 = pool.AllocateRaw(4, 2);
212 EXPECT_NE(nullptr, p6_2);
213 pool.DeallocateRaw(p6_2);
214 EXPECT_EQ(3, pool.get_from_pool_count());
215 EXPECT_EQ(6, pool.put_count());
216 EXPECT_EQ(3, pool.allocated_count());
217 EXPECT_EQ(1, pool.evicted_count());
218 EXPECT_EQ(3, alloc_count);
219 EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
220 EXPECT_EQ(1, free_count);
221 EXPECT_EQ(16 + (free_count * kChunkPrefixSize), free_size);
222
223 pool.Clear();
224 EXPECT_EQ(0, pool.get_from_pool_count());
225 EXPECT_EQ(0, pool.put_count());
226 EXPECT_EQ(0, pool.allocated_count());
227 EXPECT_EQ(0, pool.evicted_count());
228 EXPECT_EQ(3, alloc_count);
229 EXPECT_EQ(16 + 4 + 2 + (alloc_count * kChunkPrefixSize), alloc_size);
230 EXPECT_EQ(3, free_count);
231 EXPECT_EQ(16 + 4 + 2 + (free_count * kChunkPrefixSize), free_size);
232 }
233
TEST(PoolAllocatorTest,Pow2Rounder)234 TEST(PoolAllocatorTest, Pow2Rounder) {
235 Pow2Rounder rounder;
236 EXPECT_EQ(1, rounder.RoundUp(1));
237 EXPECT_EQ(2, rounder.RoundUp(2));
238 EXPECT_EQ(16, rounder.RoundUp(9));
239 EXPECT_EQ(16, rounder.RoundUp(16));
240 EXPECT_EQ(65536, rounder.RoundUp(41234));
241 EXPECT_EQ(65536, rounder.RoundUp(65535));
242 EXPECT_EQ(65536, rounder.RoundUp(65536));
243 }
244
TEST(PoolAllocatorTest,Name)245 TEST(PoolAllocatorTest, Name) {
246 se::Platform* platform =
247 se::MultiPlatformManager::PlatformWithName("cuda").ValueOrDie();
248 PoolAllocator pool(
249 2 /*pool_size_limit*/, false /*auto_resize*/,
250 new GpuHostAllocator(
251 platform->GetExecutor(se::StreamExecutorConfig(/*ordinal=*/0))
252 .ValueOrDie(),
253 0 /*numa_node*/, {}, {}),
254 new NoopRounder, "pool");
255 EXPECT_EQ("pool", pool.Name());
256 }
257
258 } // namespace
259 } // namespace tensorflow
260
261 #endif // GOOGLE_CUDA
262