1 //
2 // Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved.
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
5 // of this software and associated documentation files (the "Software"), to deal
6 // in the Software without restriction, including without limitation the rights
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the Software is
9 // furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 // THE SOFTWARE.
21 //
22
23 #include "Tests.h"
24 #include "VmaUsage.h"
25 #include "Common.h"
26 #include <atomic>
27 #include <thread>
28 #include <mutex>
29 #include <functional>
30
31 #ifdef _WIN32
32
33 static const char* CODE_DESCRIPTION = "Foo";
34
35 extern VkCommandBuffer g_hTemporaryCommandBuffer;
36 extern const VkAllocationCallbacks* g_Allocs;
37 extern bool g_BufferDeviceAddressEnabled;
38 extern PFN_vkGetBufferDeviceAddressEXT g_vkGetBufferDeviceAddressEXT;
39 void BeginSingleTimeCommands();
40 void EndSingleTimeCommands();
41
42 #ifndef VMA_DEBUG_MARGIN
43 #define VMA_DEBUG_MARGIN 0
44 #endif
45
46 enum CONFIG_TYPE {
47 CONFIG_TYPE_MINIMUM,
48 CONFIG_TYPE_SMALL,
49 CONFIG_TYPE_AVERAGE,
50 CONFIG_TYPE_LARGE,
51 CONFIG_TYPE_MAXIMUM,
52 CONFIG_TYPE_COUNT
53 };
54
55 static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;
56 //static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;
57
58 enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT };
59
60 static const char* FREE_ORDER_NAMES[] = {
61 "FORWARD",
62 "BACKWARD",
63 "RANDOM",
64 };
65
66 // Copy of internal VmaAlgorithmToStr.
AlgorithmToStr(uint32_t algorithm)67 static const char* AlgorithmToStr(uint32_t algorithm)
68 {
69 switch(algorithm)
70 {
71 case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT:
72 return "Linear";
73 case VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT:
74 return "Buddy";
75 case 0:
76 return "Default";
77 default:
78 assert(0);
79 return "";
80 }
81 }
82
83 struct AllocationSize
84 {
85 uint32_t Probability;
86 VkDeviceSize BufferSizeMin, BufferSizeMax;
87 uint32_t ImageSizeMin, ImageSizeMax;
88 };
89
90 struct Config
91 {
92 uint32_t RandSeed;
93 VkDeviceSize BeginBytesToAllocate;
94 uint32_t AdditionalOperationCount;
95 VkDeviceSize MaxBytesToAllocate;
96 uint32_t MemUsageProbability[4]; // For VMA_MEMORY_USAGE_*
97 std::vector<AllocationSize> AllocationSizes;
98 uint32_t ThreadCount;
99 uint32_t ThreadsUsingCommonAllocationsProbabilityPercent;
100 FREE_ORDER FreeOrder;
101 VmaAllocationCreateFlags AllocationStrategy; // For VMA_ALLOCATION_CREATE_STRATEGY_*
102 };
103
104 struct Result
105 {
106 duration TotalTime;
107 duration AllocationTimeMin, AllocationTimeAvg, AllocationTimeMax;
108 duration DeallocationTimeMin, DeallocationTimeAvg, DeallocationTimeMax;
109 VkDeviceSize TotalMemoryAllocated;
110 VkDeviceSize FreeRangeSizeAvg, FreeRangeSizeMax;
111 };
112
113 void TestDefragmentationSimple();
114 void TestDefragmentationFull();
115
116 struct PoolTestConfig
117 {
118 uint32_t RandSeed;
119 uint32_t ThreadCount;
120 VkDeviceSize PoolSize;
121 uint32_t FrameCount;
122 uint32_t TotalItemCount;
123 // Range for number of items used in each frame.
124 uint32_t UsedItemCountMin, UsedItemCountMax;
125 // Percent of items to make unused, and possibly make some others used in each frame.
126 uint32_t ItemsToMakeUnusedPercent;
127 std::vector<AllocationSize> AllocationSizes;
128
CalcAvgResourceSizePoolTestConfig129 VkDeviceSize CalcAvgResourceSize() const
130 {
131 uint32_t probabilitySum = 0;
132 VkDeviceSize sizeSum = 0;
133 for(size_t i = 0; i < AllocationSizes.size(); ++i)
134 {
135 const AllocationSize& allocSize = AllocationSizes[i];
136 if(allocSize.BufferSizeMax > 0)
137 sizeSum += (allocSize.BufferSizeMin + allocSize.BufferSizeMax) / 2 * allocSize.Probability;
138 else
139 {
140 const VkDeviceSize avgDimension = (allocSize.ImageSizeMin + allocSize.ImageSizeMax) / 2;
141 sizeSum += avgDimension * avgDimension * 4 * allocSize.Probability;
142 }
143 probabilitySum += allocSize.Probability;
144 }
145 return sizeSum / probabilitySum;
146 }
147
UsesBuffersPoolTestConfig148 bool UsesBuffers() const
149 {
150 for(size_t i = 0; i < AllocationSizes.size(); ++i)
151 if(AllocationSizes[i].BufferSizeMax > 0)
152 return true;
153 return false;
154 }
155
UsesImagesPoolTestConfig156 bool UsesImages() const
157 {
158 for(size_t i = 0; i < AllocationSizes.size(); ++i)
159 if(AllocationSizes[i].ImageSizeMax > 0)
160 return true;
161 return false;
162 }
163 };
164
165 struct PoolTestResult
166 {
167 duration TotalTime;
168 duration AllocationTimeMin, AllocationTimeAvg, AllocationTimeMax;
169 duration DeallocationTimeMin, DeallocationTimeAvg, DeallocationTimeMax;
170 size_t LostAllocationCount, LostAllocationTotalSize;
171 size_t FailedAllocationCount, FailedAllocationTotalSize;
172 };
173
174 static const uint32_t IMAGE_BYTES_PER_PIXEL = 1;
175
176 uint32_t g_FrameIndex = 0;
177
178 struct BufferInfo
179 {
180 VkBuffer Buffer = VK_NULL_HANDLE;
181 VmaAllocation Allocation = VK_NULL_HANDLE;
182 };
183
MemoryTypeToHeap(uint32_t memoryTypeIndex)184 static uint32_t MemoryTypeToHeap(uint32_t memoryTypeIndex)
185 {
186 const VkPhysicalDeviceMemoryProperties* props;
187 vmaGetMemoryProperties(g_hAllocator, &props);
188 return props->memoryTypes[memoryTypeIndex].heapIndex;
189 }
190
GetAllocationStrategyCount()191 static uint32_t GetAllocationStrategyCount()
192 {
193 uint32_t strategyCount = 0;
194 switch(ConfigType)
195 {
196 case CONFIG_TYPE_MINIMUM: strategyCount = 1; break;
197 case CONFIG_TYPE_SMALL: strategyCount = 1; break;
198 case CONFIG_TYPE_AVERAGE: strategyCount = 2; break;
199 case CONFIG_TYPE_LARGE: strategyCount = 2; break;
200 case CONFIG_TYPE_MAXIMUM: strategyCount = 3; break;
201 default: assert(0);
202 }
203 return strategyCount;
204 }
205
GetAllocationStrategyName(VmaAllocationCreateFlags allocStrategy)206 static const char* GetAllocationStrategyName(VmaAllocationCreateFlags allocStrategy)
207 {
208 switch(allocStrategy)
209 {
210 case VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT: return "BEST_FIT"; break;
211 case VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT: return "WORST_FIT"; break;
212 case VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT: return "FIRST_FIT"; break;
213 case 0: return "Default"; break;
214 default: assert(0); return "";
215 }
216 }
217
InitResult(Result & outResult)218 static void InitResult(Result& outResult)
219 {
220 outResult.TotalTime = duration::zero();
221 outResult.AllocationTimeMin = duration::max();
222 outResult.AllocationTimeAvg = duration::zero();
223 outResult.AllocationTimeMax = duration::min();
224 outResult.DeallocationTimeMin = duration::max();
225 outResult.DeallocationTimeAvg = duration::zero();
226 outResult.DeallocationTimeMax = duration::min();
227 outResult.TotalMemoryAllocated = 0;
228 outResult.FreeRangeSizeAvg = 0;
229 outResult.FreeRangeSizeMax = 0;
230 }
231
232 class TimeRegisterObj
233 {
234 public:
TimeRegisterObj(duration & min,duration & sum,duration & max)235 TimeRegisterObj(duration& min, duration& sum, duration& max) :
236 m_Min(min),
237 m_Sum(sum),
238 m_Max(max),
239 m_TimeBeg(std::chrono::high_resolution_clock::now())
240 {
241 }
242
~TimeRegisterObj()243 ~TimeRegisterObj()
244 {
245 duration d = std::chrono::high_resolution_clock::now() - m_TimeBeg;
246 m_Sum += d;
247 if(d < m_Min) m_Min = d;
248 if(d > m_Max) m_Max = d;
249 }
250
251 private:
252 duration& m_Min;
253 duration& m_Sum;
254 duration& m_Max;
255 time_point m_TimeBeg;
256 };
257
258 struct PoolTestThreadResult
259 {
260 duration AllocationTimeMin, AllocationTimeSum, AllocationTimeMax;
261 duration DeallocationTimeMin, DeallocationTimeSum, DeallocationTimeMax;
262 size_t AllocationCount, DeallocationCount;
263 size_t LostAllocationCount, LostAllocationTotalSize;
264 size_t FailedAllocationCount, FailedAllocationTotalSize;
265 };
266
267 class AllocationTimeRegisterObj : public TimeRegisterObj
268 {
269 public:
AllocationTimeRegisterObj(Result & result)270 AllocationTimeRegisterObj(Result& result) :
271 TimeRegisterObj(result.AllocationTimeMin, result.AllocationTimeAvg, result.AllocationTimeMax)
272 {
273 }
274 };
275
276 class DeallocationTimeRegisterObj : public TimeRegisterObj
277 {
278 public:
DeallocationTimeRegisterObj(Result & result)279 DeallocationTimeRegisterObj(Result& result) :
280 TimeRegisterObj(result.DeallocationTimeMin, result.DeallocationTimeAvg, result.DeallocationTimeMax)
281 {
282 }
283 };
284
285 class PoolAllocationTimeRegisterObj : public TimeRegisterObj
286 {
287 public:
PoolAllocationTimeRegisterObj(PoolTestThreadResult & result)288 PoolAllocationTimeRegisterObj(PoolTestThreadResult& result) :
289 TimeRegisterObj(result.AllocationTimeMin, result.AllocationTimeSum, result.AllocationTimeMax)
290 {
291 }
292 };
293
294 class PoolDeallocationTimeRegisterObj : public TimeRegisterObj
295 {
296 public:
PoolDeallocationTimeRegisterObj(PoolTestThreadResult & result)297 PoolDeallocationTimeRegisterObj(PoolTestThreadResult& result) :
298 TimeRegisterObj(result.DeallocationTimeMin, result.DeallocationTimeSum, result.DeallocationTimeMax)
299 {
300 }
301 };
302
CurrentTimeToStr(std::string & out)303 static void CurrentTimeToStr(std::string& out)
304 {
305 time_t rawTime; time(&rawTime);
306 struct tm timeInfo; localtime_s(&timeInfo, &rawTime);
307 char timeStr[128];
308 strftime(timeStr, _countof(timeStr), "%c", &timeInfo);
309 out = timeStr;
310 }
311
MainTest(Result & outResult,const Config & config)312 VkResult MainTest(Result& outResult, const Config& config)
313 {
314 assert(config.ThreadCount > 0);
315
316 InitResult(outResult);
317
318 RandomNumberGenerator mainRand{config.RandSeed};
319
320 time_point timeBeg = std::chrono::high_resolution_clock::now();
321
322 std::atomic<size_t> allocationCount = 0;
323 VkResult res = VK_SUCCESS;
324
325 uint32_t memUsageProbabilitySum =
326 config.MemUsageProbability[0] + config.MemUsageProbability[1] +
327 config.MemUsageProbability[2] + config.MemUsageProbability[3];
328 assert(memUsageProbabilitySum > 0);
329
330 uint32_t allocationSizeProbabilitySum = std::accumulate(
331 config.AllocationSizes.begin(),
332 config.AllocationSizes.end(),
333 0u,
334 [](uint32_t sum, const AllocationSize& allocSize) {
335 return sum + allocSize.Probability;
336 });
337
338 struct Allocation
339 {
340 VkBuffer Buffer;
341 VkImage Image;
342 VmaAllocation Alloc;
343 };
344
345 std::vector<Allocation> commonAllocations;
346 std::mutex commonAllocationsMutex;
347
348 auto Allocate = [&](
349 VkDeviceSize bufferSize,
350 const VkExtent2D imageExtent,
351 RandomNumberGenerator& localRand,
352 VkDeviceSize& totalAllocatedBytes,
353 std::vector<Allocation>& allocations) -> VkResult
354 {
355 assert((bufferSize == 0) != (imageExtent.width == 0 && imageExtent.height == 0));
356
357 uint32_t memUsageIndex = 0;
358 uint32_t memUsageRand = localRand.Generate() % memUsageProbabilitySum;
359 while(memUsageRand >= config.MemUsageProbability[memUsageIndex])
360 memUsageRand -= config.MemUsageProbability[memUsageIndex++];
361
362 VmaAllocationCreateInfo memReq = {};
363 memReq.usage = (VmaMemoryUsage)(VMA_MEMORY_USAGE_GPU_ONLY + memUsageIndex);
364 memReq.flags |= config.AllocationStrategy;
365
366 Allocation allocation = {};
367 VmaAllocationInfo allocationInfo;
368
369 // Buffer
370 if(bufferSize > 0)
371 {
372 assert(imageExtent.width == 0);
373 VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
374 bufferInfo.size = bufferSize;
375 bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
376
377 {
378 AllocationTimeRegisterObj timeRegisterObj{outResult};
379 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &memReq, &allocation.Buffer, &allocation.Alloc, &allocationInfo);
380 }
381 }
382 // Image
383 else
384 {
385 VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
386 imageInfo.imageType = VK_IMAGE_TYPE_2D;
387 imageInfo.extent.width = imageExtent.width;
388 imageInfo.extent.height = imageExtent.height;
389 imageInfo.extent.depth = 1;
390 imageInfo.mipLevels = 1;
391 imageInfo.arrayLayers = 1;
392 imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
393 imageInfo.tiling = memReq.usage == VMA_MEMORY_USAGE_GPU_ONLY ?
394 VK_IMAGE_TILING_OPTIMAL :
395 VK_IMAGE_TILING_LINEAR;
396 imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
397 switch(memReq.usage)
398 {
399 case VMA_MEMORY_USAGE_GPU_ONLY:
400 switch(localRand.Generate() % 3)
401 {
402 case 0:
403 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
404 break;
405 case 1:
406 imageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
407 break;
408 case 2:
409 imageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
410 break;
411 }
412 break;
413 case VMA_MEMORY_USAGE_CPU_ONLY:
414 case VMA_MEMORY_USAGE_CPU_TO_GPU:
415 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
416 break;
417 case VMA_MEMORY_USAGE_GPU_TO_CPU:
418 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT;
419 break;
420 }
421 imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
422 imageInfo.flags = 0;
423
424 {
425 AllocationTimeRegisterObj timeRegisterObj{outResult};
426 res = vmaCreateImage(g_hAllocator, &imageInfo, &memReq, &allocation.Image, &allocation.Alloc, &allocationInfo);
427 }
428 }
429
430 if(res == VK_SUCCESS)
431 {
432 ++allocationCount;
433 totalAllocatedBytes += allocationInfo.size;
434 bool useCommonAllocations = localRand.Generate() % 100 < config.ThreadsUsingCommonAllocationsProbabilityPercent;
435 if(useCommonAllocations)
436 {
437 std::unique_lock<std::mutex> lock(commonAllocationsMutex);
438 commonAllocations.push_back(allocation);
439 }
440 else
441 allocations.push_back(allocation);
442 }
443 else
444 {
445 TEST(0);
446 }
447 return res;
448 };
449
450 auto GetNextAllocationSize = [&](
451 VkDeviceSize& outBufSize,
452 VkExtent2D& outImageSize,
453 RandomNumberGenerator& localRand)
454 {
455 outBufSize = 0;
456 outImageSize = {0, 0};
457
458 uint32_t allocSizeIndex = 0;
459 uint32_t r = localRand.Generate() % allocationSizeProbabilitySum;
460 while(r >= config.AllocationSizes[allocSizeIndex].Probability)
461 r -= config.AllocationSizes[allocSizeIndex++].Probability;
462
463 const AllocationSize& allocSize = config.AllocationSizes[allocSizeIndex];
464 if(allocSize.BufferSizeMax > 0)
465 {
466 assert(allocSize.ImageSizeMax == 0);
467 if(allocSize.BufferSizeMax == allocSize.BufferSizeMin)
468 outBufSize = allocSize.BufferSizeMin;
469 else
470 {
471 outBufSize = allocSize.BufferSizeMin + localRand.Generate() % (allocSize.BufferSizeMax - allocSize.BufferSizeMin);
472 outBufSize = outBufSize / 16 * 16;
473 }
474 }
475 else
476 {
477 if(allocSize.ImageSizeMax == allocSize.ImageSizeMin)
478 outImageSize.width = outImageSize.height = allocSize.ImageSizeMax;
479 else
480 {
481 outImageSize.width = allocSize.ImageSizeMin + localRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
482 outImageSize.height = allocSize.ImageSizeMin + localRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
483 }
484 }
485 };
486
487 std::atomic<uint32_t> numThreadsReachedMaxAllocations = 0;
488 HANDLE threadsFinishEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
489
490 auto ThreadProc = [&](uint32_t randSeed) -> void
491 {
492 RandomNumberGenerator threadRand(randSeed);
493 VkDeviceSize threadTotalAllocatedBytes = 0;
494 std::vector<Allocation> threadAllocations;
495 VkDeviceSize threadBeginBytesToAllocate = config.BeginBytesToAllocate / config.ThreadCount;
496 VkDeviceSize threadMaxBytesToAllocate = config.MaxBytesToAllocate / config.ThreadCount;
497 uint32_t threadAdditionalOperationCount = config.AdditionalOperationCount / config.ThreadCount;
498
499 // BEGIN ALLOCATIONS
500 for(;;)
501 {
502 VkDeviceSize bufferSize = 0;
503 VkExtent2D imageExtent = {};
504 GetNextAllocationSize(bufferSize, imageExtent, threadRand);
505 if(threadTotalAllocatedBytes + bufferSize + imageExtent.width * imageExtent.height * IMAGE_BYTES_PER_PIXEL <
506 threadBeginBytesToAllocate)
507 {
508 if(Allocate(bufferSize, imageExtent, threadRand, threadTotalAllocatedBytes, threadAllocations) != VK_SUCCESS)
509 break;
510 }
511 else
512 break;
513 }
514
515 // ADDITIONAL ALLOCATIONS AND FREES
516 for(size_t i = 0; i < threadAdditionalOperationCount; ++i)
517 {
518 VkDeviceSize bufferSize = 0;
519 VkExtent2D imageExtent = {};
520 GetNextAllocationSize(bufferSize, imageExtent, threadRand);
521
522 // true = allocate, false = free
523 bool allocate = threadRand.Generate() % 2 != 0;
524
525 if(allocate)
526 {
527 if(threadTotalAllocatedBytes +
528 bufferSize +
529 imageExtent.width * imageExtent.height * IMAGE_BYTES_PER_PIXEL <
530 threadMaxBytesToAllocate)
531 {
532 if(Allocate(bufferSize, imageExtent, threadRand, threadTotalAllocatedBytes, threadAllocations) != VK_SUCCESS)
533 break;
534 }
535 }
536 else
537 {
538 bool useCommonAllocations = threadRand.Generate() % 100 < config.ThreadsUsingCommonAllocationsProbabilityPercent;
539 if(useCommonAllocations)
540 {
541 std::unique_lock<std::mutex> lock(commonAllocationsMutex);
542 if(!commonAllocations.empty())
543 {
544 size_t indexToFree = threadRand.Generate() % commonAllocations.size();
545 VmaAllocationInfo allocationInfo;
546 vmaGetAllocationInfo(g_hAllocator, commonAllocations[indexToFree].Alloc, &allocationInfo);
547 if(threadTotalAllocatedBytes >= allocationInfo.size)
548 {
549 DeallocationTimeRegisterObj timeRegisterObj{outResult};
550 if(commonAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
551 vmaDestroyBuffer(g_hAllocator, commonAllocations[indexToFree].Buffer, commonAllocations[indexToFree].Alloc);
552 else
553 vmaDestroyImage(g_hAllocator, commonAllocations[indexToFree].Image, commonAllocations[indexToFree].Alloc);
554 threadTotalAllocatedBytes -= allocationInfo.size;
555 commonAllocations.erase(commonAllocations.begin() + indexToFree);
556 }
557 }
558 }
559 else
560 {
561 if(!threadAllocations.empty())
562 {
563 size_t indexToFree = threadRand.Generate() % threadAllocations.size();
564 VmaAllocationInfo allocationInfo;
565 vmaGetAllocationInfo(g_hAllocator, threadAllocations[indexToFree].Alloc, &allocationInfo);
566 if(threadTotalAllocatedBytes >= allocationInfo.size)
567 {
568 DeallocationTimeRegisterObj timeRegisterObj{outResult};
569 if(threadAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
570 vmaDestroyBuffer(g_hAllocator, threadAllocations[indexToFree].Buffer, threadAllocations[indexToFree].Alloc);
571 else
572 vmaDestroyImage(g_hAllocator, threadAllocations[indexToFree].Image, threadAllocations[indexToFree].Alloc);
573 threadTotalAllocatedBytes -= allocationInfo.size;
574 threadAllocations.erase(threadAllocations.begin() + indexToFree);
575 }
576 }
577 }
578 }
579 }
580
581 ++numThreadsReachedMaxAllocations;
582
583 WaitForSingleObject(threadsFinishEvent, INFINITE);
584
585 // DEALLOCATION
586 while(!threadAllocations.empty())
587 {
588 size_t indexToFree = 0;
589 switch(config.FreeOrder)
590 {
591 case FREE_ORDER::FORWARD:
592 indexToFree = 0;
593 break;
594 case FREE_ORDER::BACKWARD:
595 indexToFree = threadAllocations.size() - 1;
596 break;
597 case FREE_ORDER::RANDOM:
598 indexToFree = mainRand.Generate() % threadAllocations.size();
599 break;
600 }
601
602 {
603 DeallocationTimeRegisterObj timeRegisterObj{outResult};
604 if(threadAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
605 vmaDestroyBuffer(g_hAllocator, threadAllocations[indexToFree].Buffer, threadAllocations[indexToFree].Alloc);
606 else
607 vmaDestroyImage(g_hAllocator, threadAllocations[indexToFree].Image, threadAllocations[indexToFree].Alloc);
608 }
609 threadAllocations.erase(threadAllocations.begin() + indexToFree);
610 }
611 };
612
613 uint32_t threadRandSeed = mainRand.Generate();
614 std::vector<std::thread> bkgThreads;
615 for(size_t i = 0; i < config.ThreadCount; ++i)
616 {
617 bkgThreads.emplace_back(std::bind(ThreadProc, threadRandSeed + (uint32_t)i));
618 }
619
620 // Wait for threads reached max allocations
621 while(numThreadsReachedMaxAllocations < config.ThreadCount)
622 Sleep(0);
623
624 // CALCULATE MEMORY STATISTICS ON FINAL USAGE
625 VmaStats vmaStats = {};
626 vmaCalculateStats(g_hAllocator, &vmaStats);
627 outResult.TotalMemoryAllocated = vmaStats.total.usedBytes + vmaStats.total.unusedBytes;
628 outResult.FreeRangeSizeMax = vmaStats.total.unusedRangeSizeMax;
629 outResult.FreeRangeSizeAvg = vmaStats.total.unusedRangeSizeAvg;
630
631 // Signal threads to deallocate
632 SetEvent(threadsFinishEvent);
633
634 // Wait for threads finished
635 for(size_t i = 0; i < bkgThreads.size(); ++i)
636 bkgThreads[i].join();
637 bkgThreads.clear();
638
639 CloseHandle(threadsFinishEvent);
640
641 // Deallocate remaining common resources
642 while(!commonAllocations.empty())
643 {
644 size_t indexToFree = 0;
645 switch(config.FreeOrder)
646 {
647 case FREE_ORDER::FORWARD:
648 indexToFree = 0;
649 break;
650 case FREE_ORDER::BACKWARD:
651 indexToFree = commonAllocations.size() - 1;
652 break;
653 case FREE_ORDER::RANDOM:
654 indexToFree = mainRand.Generate() % commonAllocations.size();
655 break;
656 }
657
658 {
659 DeallocationTimeRegisterObj timeRegisterObj{outResult};
660 if(commonAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
661 vmaDestroyBuffer(g_hAllocator, commonAllocations[indexToFree].Buffer, commonAllocations[indexToFree].Alloc);
662 else
663 vmaDestroyImage(g_hAllocator, commonAllocations[indexToFree].Image, commonAllocations[indexToFree].Alloc);
664 }
665 commonAllocations.erase(commonAllocations.begin() + indexToFree);
666 }
667
668 if(allocationCount)
669 {
670 outResult.AllocationTimeAvg /= allocationCount;
671 outResult.DeallocationTimeAvg /= allocationCount;
672 }
673
674 outResult.TotalTime = std::chrono::high_resolution_clock::now() - timeBeg;
675
676 return res;
677 }
678
SaveAllocatorStatsToFile(const wchar_t * filePath)679 void SaveAllocatorStatsToFile(const wchar_t* filePath)
680 {
681 wprintf(L"Saving JSON dump to file \"%s\"\n", filePath);
682 char* stats;
683 vmaBuildStatsString(g_hAllocator, &stats, VK_TRUE);
684 SaveFile(filePath, stats, strlen(stats));
685 vmaFreeStatsString(g_hAllocator, stats);
686 }
687
688 struct AllocInfo
689 {
690 VmaAllocation m_Allocation = VK_NULL_HANDLE;
691 VkBuffer m_Buffer = VK_NULL_HANDLE;
692 VkImage m_Image = VK_NULL_HANDLE;
693 VkImageLayout m_ImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
694 uint32_t m_StartValue = 0;
695 union
696 {
697 VkBufferCreateInfo m_BufferInfo;
698 VkImageCreateInfo m_ImageInfo;
699 };
700
701 // After defragmentation.
702 VkBuffer m_NewBuffer = VK_NULL_HANDLE;
703 VkImage m_NewImage = VK_NULL_HANDLE;
704
705 void CreateBuffer(
706 const VkBufferCreateInfo& bufCreateInfo,
707 const VmaAllocationCreateInfo& allocCreateInfo);
708 void CreateImage(
709 const VkImageCreateInfo& imageCreateInfo,
710 const VmaAllocationCreateInfo& allocCreateInfo,
711 VkImageLayout layout);
712 void Destroy();
713 };
714
CreateBuffer(const VkBufferCreateInfo & bufCreateInfo,const VmaAllocationCreateInfo & allocCreateInfo)715 void AllocInfo::CreateBuffer(
716 const VkBufferCreateInfo& bufCreateInfo,
717 const VmaAllocationCreateInfo& allocCreateInfo)
718 {
719 m_BufferInfo = bufCreateInfo;
720 VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &m_Buffer, &m_Allocation, nullptr);
721 TEST(res == VK_SUCCESS);
722 }
CreateImage(const VkImageCreateInfo & imageCreateInfo,const VmaAllocationCreateInfo & allocCreateInfo,VkImageLayout layout)723 void AllocInfo::CreateImage(
724 const VkImageCreateInfo& imageCreateInfo,
725 const VmaAllocationCreateInfo& allocCreateInfo,
726 VkImageLayout layout)
727 {
728 m_ImageInfo = imageCreateInfo;
729 m_ImageLayout = layout;
730 VkResult res = vmaCreateImage(g_hAllocator, &imageCreateInfo, &allocCreateInfo, &m_Image, &m_Allocation, nullptr);
731 TEST(res == VK_SUCCESS);
732 }
733
Destroy()734 void AllocInfo::Destroy()
735 {
736 if(m_Image)
737 {
738 assert(!m_Buffer);
739 vkDestroyImage(g_hDevice, m_Image, g_Allocs);
740 m_Image = VK_NULL_HANDLE;
741 }
742 if(m_Buffer)
743 {
744 assert(!m_Image);
745 vkDestroyBuffer(g_hDevice, m_Buffer, g_Allocs);
746 m_Buffer = VK_NULL_HANDLE;
747 }
748 if(m_Allocation)
749 {
750 vmaFreeMemory(g_hAllocator, m_Allocation);
751 m_Allocation = VK_NULL_HANDLE;
752 }
753 }
754
755 class StagingBufferCollection
756 {
757 public:
StagingBufferCollection()758 StagingBufferCollection() { }
759 ~StagingBufferCollection();
760 // Returns false if maximum total size of buffers would be exceeded.
761 bool AcquireBuffer(VkDeviceSize size, VkBuffer& outBuffer, void*& outMappedPtr);
762 void ReleaseAllBuffers();
763
764 private:
765 static const VkDeviceSize MAX_TOTAL_SIZE = 256ull * 1024 * 1024;
766 struct BufInfo
767 {
768 VmaAllocation Allocation = VK_NULL_HANDLE;
769 VkBuffer Buffer = VK_NULL_HANDLE;
770 VkDeviceSize Size = VK_WHOLE_SIZE;
771 void* MappedPtr = nullptr;
772 bool Used = false;
773 };
774 std::vector<BufInfo> m_Bufs;
775 // Including both used and unused.
776 VkDeviceSize m_TotalSize = 0;
777 };
778
~StagingBufferCollection()779 StagingBufferCollection::~StagingBufferCollection()
780 {
781 for(size_t i = m_Bufs.size(); i--; )
782 {
783 vmaDestroyBuffer(g_hAllocator, m_Bufs[i].Buffer, m_Bufs[i].Allocation);
784 }
785 }
786
AcquireBuffer(VkDeviceSize size,VkBuffer & outBuffer,void * & outMappedPtr)787 bool StagingBufferCollection::AcquireBuffer(VkDeviceSize size, VkBuffer& outBuffer, void*& outMappedPtr)
788 {
789 assert(size <= MAX_TOTAL_SIZE);
790
791 // Try to find existing unused buffer with best size.
792 size_t bestIndex = SIZE_MAX;
793 for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
794 {
795 BufInfo& currBufInfo = m_Bufs[i];
796 if(!currBufInfo.Used && currBufInfo.Size >= size &&
797 (bestIndex == SIZE_MAX || currBufInfo.Size < m_Bufs[bestIndex].Size))
798 {
799 bestIndex = i;
800 }
801 }
802
803 if(bestIndex != SIZE_MAX)
804 {
805 m_Bufs[bestIndex].Used = true;
806 outBuffer = m_Bufs[bestIndex].Buffer;
807 outMappedPtr = m_Bufs[bestIndex].MappedPtr;
808 return true;
809 }
810
811 // Allocate new buffer with requested size.
812 if(m_TotalSize + size <= MAX_TOTAL_SIZE)
813 {
814 BufInfo bufInfo;
815 bufInfo.Size = size;
816 bufInfo.Used = true;
817
818 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
819 bufCreateInfo.size = size;
820 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
821
822 VmaAllocationCreateInfo allocCreateInfo = {};
823 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
824 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
825
826 VmaAllocationInfo allocInfo;
827 VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &bufInfo.Buffer, &bufInfo.Allocation, &allocInfo);
828 bufInfo.MappedPtr = allocInfo.pMappedData;
829 TEST(res == VK_SUCCESS && bufInfo.MappedPtr);
830
831 outBuffer = bufInfo.Buffer;
832 outMappedPtr = bufInfo.MappedPtr;
833
834 m_Bufs.push_back(std::move(bufInfo));
835
836 m_TotalSize += size;
837
838 return true;
839 }
840
841 // There are some unused but smaller buffers: Free them and try again.
842 bool hasUnused = false;
843 for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
844 {
845 if(!m_Bufs[i].Used)
846 {
847 hasUnused = true;
848 break;
849 }
850 }
851 if(hasUnused)
852 {
853 for(size_t i = m_Bufs.size(); i--; )
854 {
855 if(!m_Bufs[i].Used)
856 {
857 m_TotalSize -= m_Bufs[i].Size;
858 vmaDestroyBuffer(g_hAllocator, m_Bufs[i].Buffer, m_Bufs[i].Allocation);
859 m_Bufs.erase(m_Bufs.begin() + i);
860 }
861 }
862
863 return AcquireBuffer(size, outBuffer, outMappedPtr);
864 }
865
866 return false;
867 }
868
ReleaseAllBuffers()869 void StagingBufferCollection::ReleaseAllBuffers()
870 {
871 for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
872 {
873 m_Bufs[i].Used = false;
874 }
875 }
876
UploadGpuData(const AllocInfo * allocInfo,size_t allocInfoCount)877 static void UploadGpuData(const AllocInfo* allocInfo, size_t allocInfoCount)
878 {
879 StagingBufferCollection stagingBufs;
880
881 bool cmdBufferStarted = false;
882 for(size_t allocInfoIndex = 0; allocInfoIndex < allocInfoCount; ++allocInfoIndex)
883 {
884 const AllocInfo& currAllocInfo = allocInfo[allocInfoIndex];
885 if(currAllocInfo.m_Buffer)
886 {
887 const VkDeviceSize size = currAllocInfo.m_BufferInfo.size;
888
889 VkBuffer stagingBuf = VK_NULL_HANDLE;
890 void* stagingBufMappedPtr = nullptr;
891 if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
892 {
893 TEST(cmdBufferStarted);
894 EndSingleTimeCommands();
895 stagingBufs.ReleaseAllBuffers();
896 cmdBufferStarted = false;
897
898 bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
899 TEST(ok);
900 }
901
902 // Fill staging buffer.
903 {
904 assert(size % sizeof(uint32_t) == 0);
905 uint32_t* stagingValPtr = (uint32_t*)stagingBufMappedPtr;
906 uint32_t val = currAllocInfo.m_StartValue;
907 for(size_t i = 0; i < size / sizeof(uint32_t); ++i)
908 {
909 *stagingValPtr = val;
910 ++stagingValPtr;
911 ++val;
912 }
913 }
914
915 // Issue copy command from staging buffer to destination buffer.
916 if(!cmdBufferStarted)
917 {
918 cmdBufferStarted = true;
919 BeginSingleTimeCommands();
920 }
921
922 VkBufferCopy copy = {};
923 copy.srcOffset = 0;
924 copy.dstOffset = 0;
925 copy.size = size;
926 vkCmdCopyBuffer(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Buffer, 1, ©);
927 }
928 else
929 {
930 TEST(currAllocInfo.m_ImageInfo.format == VK_FORMAT_R8G8B8A8_UNORM && "Only RGBA8 images are currently supported.");
931 TEST(currAllocInfo.m_ImageInfo.mipLevels == 1 && "Only single mip images are currently supported.");
932
933 const VkDeviceSize size = (VkDeviceSize)currAllocInfo.m_ImageInfo.extent.width * currAllocInfo.m_ImageInfo.extent.height * sizeof(uint32_t);
934
935 VkBuffer stagingBuf = VK_NULL_HANDLE;
936 void* stagingBufMappedPtr = nullptr;
937 if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
938 {
939 TEST(cmdBufferStarted);
940 EndSingleTimeCommands();
941 stagingBufs.ReleaseAllBuffers();
942 cmdBufferStarted = false;
943
944 bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
945 TEST(ok);
946 }
947
948 // Fill staging buffer.
949 {
950 assert(size % sizeof(uint32_t) == 0);
951 uint32_t *stagingValPtr = (uint32_t *)stagingBufMappedPtr;
952 uint32_t val = currAllocInfo.m_StartValue;
953 for(size_t i = 0; i < size / sizeof(uint32_t); ++i)
954 {
955 *stagingValPtr = val;
956 ++stagingValPtr;
957 ++val;
958 }
959 }
960
961 // Issue copy command from staging buffer to destination buffer.
962 if(!cmdBufferStarted)
963 {
964 cmdBufferStarted = true;
965 BeginSingleTimeCommands();
966 }
967
968
969 // Transfer to transfer dst layout
970 VkImageSubresourceRange subresourceRange = {
971 VK_IMAGE_ASPECT_COLOR_BIT,
972 0, VK_REMAINING_MIP_LEVELS,
973 0, VK_REMAINING_ARRAY_LAYERS
974 };
975
976 VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
977 barrier.srcAccessMask = 0;
978 barrier.dstAccessMask = 0;
979 barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
980 barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
981 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
982 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
983 barrier.image = currAllocInfo.m_Image;
984 barrier.subresourceRange = subresourceRange;
985
986 vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,
987 0, nullptr,
988 0, nullptr,
989 1, &barrier);
990
991 // Copy image date
992 VkBufferImageCopy copy = {};
993 copy.bufferOffset = 0;
994 copy.bufferRowLength = 0;
995 copy.bufferImageHeight = 0;
996 copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
997 copy.imageSubresource.layerCount = 1;
998 copy.imageExtent = currAllocInfo.m_ImageInfo.extent;
999
1000 vkCmdCopyBufferToImage(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©);
1001
1002 // Transfer to desired layout
1003 barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1004 barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
1005 barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1006 barrier.newLayout = currAllocInfo.m_ImageLayout;
1007
1008 vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,
1009 0, nullptr,
1010 0, nullptr,
1011 1, &barrier);
1012 }
1013 }
1014
1015 if(cmdBufferStarted)
1016 {
1017 EndSingleTimeCommands();
1018 stagingBufs.ReleaseAllBuffers();
1019 }
1020 }
1021
ValidateGpuData(const AllocInfo * allocInfo,size_t allocInfoCount)1022 static void ValidateGpuData(const AllocInfo* allocInfo, size_t allocInfoCount)
1023 {
1024 StagingBufferCollection stagingBufs;
1025
1026 bool cmdBufferStarted = false;
1027 size_t validateAllocIndexOffset = 0;
1028 std::vector<void*> validateStagingBuffers;
1029 for(size_t allocInfoIndex = 0; allocInfoIndex < allocInfoCount; ++allocInfoIndex)
1030 {
1031 const AllocInfo& currAllocInfo = allocInfo[allocInfoIndex];
1032 if(currAllocInfo.m_Buffer)
1033 {
1034 const VkDeviceSize size = currAllocInfo.m_BufferInfo.size;
1035
1036 VkBuffer stagingBuf = VK_NULL_HANDLE;
1037 void* stagingBufMappedPtr = nullptr;
1038 if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
1039 {
1040 TEST(cmdBufferStarted);
1041 EndSingleTimeCommands();
1042 cmdBufferStarted = false;
1043
1044 for(size_t validateIndex = 0;
1045 validateIndex < validateStagingBuffers.size();
1046 ++validateIndex)
1047 {
1048 const size_t validateAllocIndex = validateIndex + validateAllocIndexOffset;
1049 const VkDeviceSize validateSize = allocInfo[validateAllocIndex].m_BufferInfo.size;
1050 TEST(validateSize % sizeof(uint32_t) == 0);
1051 const uint32_t* stagingValPtr = (const uint32_t*)validateStagingBuffers[validateIndex];
1052 uint32_t val = allocInfo[validateAllocIndex].m_StartValue;
1053 bool valid = true;
1054 for(size_t i = 0; i < validateSize / sizeof(uint32_t); ++i)
1055 {
1056 if(*stagingValPtr != val)
1057 {
1058 valid = false;
1059 break;
1060 }
1061 ++stagingValPtr;
1062 ++val;
1063 }
1064 TEST(valid);
1065 }
1066
1067 stagingBufs.ReleaseAllBuffers();
1068
1069 validateAllocIndexOffset = allocInfoIndex;
1070 validateStagingBuffers.clear();
1071
1072 bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
1073 TEST(ok);
1074 }
1075
1076 // Issue copy command from staging buffer to destination buffer.
1077 if(!cmdBufferStarted)
1078 {
1079 cmdBufferStarted = true;
1080 BeginSingleTimeCommands();
1081 }
1082
1083 VkBufferCopy copy = {};
1084 copy.srcOffset = 0;
1085 copy.dstOffset = 0;
1086 copy.size = size;
1087 vkCmdCopyBuffer(g_hTemporaryCommandBuffer, currAllocInfo.m_Buffer, stagingBuf, 1, ©);
1088
1089 // Sava mapped pointer for later validation.
1090 validateStagingBuffers.push_back(stagingBufMappedPtr);
1091 }
1092 else
1093 {
1094 TEST(0 && "Images not currently supported.");
1095 }
1096 }
1097
1098 if(cmdBufferStarted)
1099 {
1100 EndSingleTimeCommands();
1101
1102 for(size_t validateIndex = 0;
1103 validateIndex < validateStagingBuffers.size();
1104 ++validateIndex)
1105 {
1106 const size_t validateAllocIndex = validateIndex + validateAllocIndexOffset;
1107 const VkDeviceSize validateSize = allocInfo[validateAllocIndex].m_BufferInfo.size;
1108 TEST(validateSize % sizeof(uint32_t) == 0);
1109 const uint32_t* stagingValPtr = (const uint32_t*)validateStagingBuffers[validateIndex];
1110 uint32_t val = allocInfo[validateAllocIndex].m_StartValue;
1111 bool valid = true;
1112 for(size_t i = 0; i < validateSize / sizeof(uint32_t); ++i)
1113 {
1114 if(*stagingValPtr != val)
1115 {
1116 valid = false;
1117 break;
1118 }
1119 ++stagingValPtr;
1120 ++val;
1121 }
1122 TEST(valid);
1123 }
1124
1125 stagingBufs.ReleaseAllBuffers();
1126 }
1127 }
1128
GetMemReq(VmaAllocationCreateInfo & outMemReq)1129 static void GetMemReq(VmaAllocationCreateInfo& outMemReq)
1130 {
1131 outMemReq = {};
1132 outMemReq.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
1133 //outMemReq.flags = VMA_ALLOCATION_CREATE_PERSISTENT_MAP_BIT;
1134 }
1135
CreateBuffer(VmaPool pool,const VkBufferCreateInfo & bufCreateInfo,bool persistentlyMapped,AllocInfo & outAllocInfo)1136 static void CreateBuffer(
1137 VmaPool pool,
1138 const VkBufferCreateInfo& bufCreateInfo,
1139 bool persistentlyMapped,
1140 AllocInfo& outAllocInfo)
1141 {
1142 outAllocInfo = {};
1143 outAllocInfo.m_BufferInfo = bufCreateInfo;
1144
1145 VmaAllocationCreateInfo allocCreateInfo = {};
1146 allocCreateInfo.pool = pool;
1147 if(persistentlyMapped)
1148 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
1149
1150 VmaAllocationInfo vmaAllocInfo = {};
1151 ERR_GUARD_VULKAN( vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &outAllocInfo.m_Buffer, &outAllocInfo.m_Allocation, &vmaAllocInfo) );
1152
1153 // Setup StartValue and fill.
1154 {
1155 outAllocInfo.m_StartValue = (uint32_t)rand();
1156 uint32_t* data = (uint32_t*)vmaAllocInfo.pMappedData;
1157 TEST((data != nullptr) == persistentlyMapped);
1158 if(!persistentlyMapped)
1159 {
1160 ERR_GUARD_VULKAN( vmaMapMemory(g_hAllocator, outAllocInfo.m_Allocation, (void**)&data) );
1161 }
1162
1163 uint32_t value = outAllocInfo.m_StartValue;
1164 TEST(bufCreateInfo.size % 4 == 0);
1165 for(size_t i = 0; i < bufCreateInfo.size / sizeof(uint32_t); ++i)
1166 data[i] = value++;
1167
1168 if(!persistentlyMapped)
1169 vmaUnmapMemory(g_hAllocator, outAllocInfo.m_Allocation);
1170 }
1171 }
1172
CreateAllocation(AllocInfo & outAllocation)1173 static void CreateAllocation(AllocInfo& outAllocation)
1174 {
1175 outAllocation.m_Allocation = nullptr;
1176 outAllocation.m_Buffer = nullptr;
1177 outAllocation.m_Image = nullptr;
1178 outAllocation.m_StartValue = (uint32_t)rand();
1179
1180 VmaAllocationCreateInfo vmaMemReq;
1181 GetMemReq(vmaMemReq);
1182
1183 VmaAllocationInfo allocInfo;
1184
1185 const bool isBuffer = true;//(rand() & 0x1) != 0;
1186 const bool isLarge = (rand() % 16) == 0;
1187 if(isBuffer)
1188 {
1189 const uint32_t bufferSize = isLarge ?
1190 (rand() % 10 + 1) * (1024 * 1024) : // 1 MB ... 10 MB
1191 (rand() % 1024 + 1) * 1024; // 1 KB ... 1 MB
1192
1193 VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1194 bufferInfo.size = bufferSize;
1195 bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1196
1197 VkResult res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &vmaMemReq, &outAllocation.m_Buffer, &outAllocation.m_Allocation, &allocInfo);
1198 outAllocation.m_BufferInfo = bufferInfo;
1199 TEST(res == VK_SUCCESS);
1200 }
1201 else
1202 {
1203 const uint32_t imageSizeX = isLarge ?
1204 1024 + rand() % (4096 - 1024) : // 1024 ... 4096
1205 rand() % 1024 + 1; // 1 ... 1024
1206 const uint32_t imageSizeY = isLarge ?
1207 1024 + rand() % (4096 - 1024) : // 1024 ... 4096
1208 rand() % 1024 + 1; // 1 ... 1024
1209
1210 VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
1211 imageInfo.imageType = VK_IMAGE_TYPE_2D;
1212 imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
1213 imageInfo.extent.width = imageSizeX;
1214 imageInfo.extent.height = imageSizeY;
1215 imageInfo.extent.depth = 1;
1216 imageInfo.mipLevels = 1;
1217 imageInfo.arrayLayers = 1;
1218 imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
1219 imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
1220 imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
1221 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
1222
1223 VkResult res = vmaCreateImage(g_hAllocator, &imageInfo, &vmaMemReq, &outAllocation.m_Image, &outAllocation.m_Allocation, &allocInfo);
1224 outAllocation.m_ImageInfo = imageInfo;
1225 TEST(res == VK_SUCCESS);
1226 }
1227
1228 uint32_t* data = (uint32_t*)allocInfo.pMappedData;
1229 if(allocInfo.pMappedData == nullptr)
1230 {
1231 VkResult res = vmaMapMemory(g_hAllocator, outAllocation.m_Allocation, (void**)&data);
1232 TEST(res == VK_SUCCESS);
1233 }
1234
1235 uint32_t value = outAllocation.m_StartValue;
1236 TEST(allocInfo.size % 4 == 0);
1237 for(size_t i = 0; i < allocInfo.size / sizeof(uint32_t); ++i)
1238 data[i] = value++;
1239
1240 if(allocInfo.pMappedData == nullptr)
1241 vmaUnmapMemory(g_hAllocator, outAllocation.m_Allocation);
1242 }
1243
DestroyAllocation(const AllocInfo & allocation)1244 static void DestroyAllocation(const AllocInfo& allocation)
1245 {
1246 if(allocation.m_Buffer)
1247 vmaDestroyBuffer(g_hAllocator, allocation.m_Buffer, allocation.m_Allocation);
1248 else
1249 vmaDestroyImage(g_hAllocator, allocation.m_Image, allocation.m_Allocation);
1250 }
1251
DestroyAllAllocations(std::vector<AllocInfo> & allocations)1252 static void DestroyAllAllocations(std::vector<AllocInfo>& allocations)
1253 {
1254 for(size_t i = allocations.size(); i--; )
1255 DestroyAllocation(allocations[i]);
1256 allocations.clear();
1257 }
1258
ValidateAllocationData(const AllocInfo & allocation)1259 static void ValidateAllocationData(const AllocInfo& allocation)
1260 {
1261 VmaAllocationInfo allocInfo;
1262 vmaGetAllocationInfo(g_hAllocator, allocation.m_Allocation, &allocInfo);
1263
1264 uint32_t* data = (uint32_t*)allocInfo.pMappedData;
1265 if(allocInfo.pMappedData == nullptr)
1266 {
1267 VkResult res = vmaMapMemory(g_hAllocator, allocation.m_Allocation, (void**)&data);
1268 TEST(res == VK_SUCCESS);
1269 }
1270
1271 uint32_t value = allocation.m_StartValue;
1272 bool ok = true;
1273 size_t i;
1274 TEST(allocInfo.size % 4 == 0);
1275 for(i = 0; i < allocInfo.size / sizeof(uint32_t); ++i)
1276 {
1277 if(data[i] != value++)
1278 {
1279 ok = false;
1280 break;
1281 }
1282 }
1283 TEST(ok);
1284
1285 if(allocInfo.pMappedData == nullptr)
1286 vmaUnmapMemory(g_hAllocator, allocation.m_Allocation);
1287 }
1288
RecreateAllocationResource(AllocInfo & allocation)1289 static void RecreateAllocationResource(AllocInfo& allocation)
1290 {
1291 VmaAllocationInfo allocInfo;
1292 vmaGetAllocationInfo(g_hAllocator, allocation.m_Allocation, &allocInfo);
1293
1294 if(allocation.m_Buffer)
1295 {
1296 vkDestroyBuffer(g_hDevice, allocation.m_Buffer, g_Allocs);
1297
1298 VkResult res = vkCreateBuffer(g_hDevice, &allocation.m_BufferInfo, g_Allocs, &allocation.m_Buffer);
1299 TEST(res == VK_SUCCESS);
1300
1301 // Just to silence validation layer warnings.
1302 VkMemoryRequirements vkMemReq;
1303 vkGetBufferMemoryRequirements(g_hDevice, allocation.m_Buffer, &vkMemReq);
1304 TEST(vkMemReq.size >= allocation.m_BufferInfo.size);
1305
1306 res = vmaBindBufferMemory(g_hAllocator, allocation.m_Allocation, allocation.m_Buffer);
1307 TEST(res == VK_SUCCESS);
1308 }
1309 else
1310 {
1311 vkDestroyImage(g_hDevice, allocation.m_Image, g_Allocs);
1312
1313 VkResult res = vkCreateImage(g_hDevice, &allocation.m_ImageInfo, g_Allocs, &allocation.m_Image);
1314 TEST(res == VK_SUCCESS);
1315
1316 // Just to silence validation layer warnings.
1317 VkMemoryRequirements vkMemReq;
1318 vkGetImageMemoryRequirements(g_hDevice, allocation.m_Image, &vkMemReq);
1319
1320 res = vmaBindImageMemory(g_hAllocator, allocation.m_Allocation, allocation.m_Image);
1321 TEST(res == VK_SUCCESS);
1322 }
1323 }
1324
Defragment(AllocInfo * allocs,size_t allocCount,const VmaDefragmentationInfo * defragmentationInfo=nullptr,VmaDefragmentationStats * defragmentationStats=nullptr)1325 static void Defragment(AllocInfo* allocs, size_t allocCount,
1326 const VmaDefragmentationInfo* defragmentationInfo = nullptr,
1327 VmaDefragmentationStats* defragmentationStats = nullptr)
1328 {
1329 std::vector<VmaAllocation> vmaAllocs(allocCount);
1330 for(size_t i = 0; i < allocCount; ++i)
1331 vmaAllocs[i] = allocs[i].m_Allocation;
1332
1333 std::vector<VkBool32> allocChanged(allocCount);
1334
1335 ERR_GUARD_VULKAN( vmaDefragment(g_hAllocator, vmaAllocs.data(), allocCount, allocChanged.data(),
1336 defragmentationInfo, defragmentationStats) );
1337
1338 for(size_t i = 0; i < allocCount; ++i)
1339 {
1340 if(allocChanged[i])
1341 {
1342 RecreateAllocationResource(allocs[i]);
1343 }
1344 }
1345 }
1346
ValidateAllocationsData(const AllocInfo * allocs,size_t allocCount)1347 static void ValidateAllocationsData(const AllocInfo* allocs, size_t allocCount)
1348 {
1349 std::for_each(allocs, allocs + allocCount, [](const AllocInfo& allocInfo) {
1350 ValidateAllocationData(allocInfo);
1351 });
1352 }
1353
TestDefragmentationSimple()1354 void TestDefragmentationSimple()
1355 {
1356 wprintf(L"Test defragmentation simple\n");
1357
1358 RandomNumberGenerator rand(667);
1359
1360 const VkDeviceSize BUF_SIZE = 0x10000;
1361 const VkDeviceSize BLOCK_SIZE = BUF_SIZE * 8;
1362
1363 const VkDeviceSize MIN_BUF_SIZE = 32;
1364 const VkDeviceSize MAX_BUF_SIZE = BUF_SIZE * 4;
1365 auto RandomBufSize = [&]() -> VkDeviceSize {
1366 return align_up<VkDeviceSize>(rand.Generate() % (MAX_BUF_SIZE - MIN_BUF_SIZE + 1) + MIN_BUF_SIZE, 32);
1367 };
1368
1369 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1370 bufCreateInfo.size = BUF_SIZE;
1371 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1372
1373 VmaAllocationCreateInfo exampleAllocCreateInfo = {};
1374 exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
1375
1376 uint32_t memTypeIndex = UINT32_MAX;
1377 vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex);
1378
1379 VmaPoolCreateInfo poolCreateInfo = {};
1380 poolCreateInfo.blockSize = BLOCK_SIZE;
1381 poolCreateInfo.memoryTypeIndex = memTypeIndex;
1382
1383 VmaPool pool;
1384 ERR_GUARD_VULKAN( vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool) );
1385
1386 // Defragmentation of empty pool.
1387 {
1388 VmaDefragmentationInfo2 defragInfo = {};
1389 defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE;
1390 defragInfo.maxCpuAllocationsToMove = UINT32_MAX;
1391 defragInfo.poolCount = 1;
1392 defragInfo.pPools = &pool;
1393
1394 VmaDefragmentationStats defragStats = {};
1395 VmaDefragmentationContext defragCtx = nullptr;
1396 VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &defragStats, &defragCtx);
1397 TEST(res >= VK_SUCCESS);
1398 vmaDefragmentationEnd(g_hAllocator, defragCtx);
1399 TEST(defragStats.allocationsMoved == 0 && defragStats.bytesFreed == 0 &&
1400 defragStats.bytesMoved == 0 && defragStats.deviceMemoryBlocksFreed == 0);
1401 }
1402
1403 std::vector<AllocInfo> allocations;
1404
1405 // persistentlyMappedOption = 0 - not persistently mapped.
1406 // persistentlyMappedOption = 1 - persistently mapped.
1407 for(uint32_t persistentlyMappedOption = 0; persistentlyMappedOption < 2; ++persistentlyMappedOption)
1408 {
1409 wprintf(L" Persistently mapped option = %u\n", persistentlyMappedOption);
1410 const bool persistentlyMapped = persistentlyMappedOption != 0;
1411
1412 // # Test 1
1413 // Buffers of fixed size.
1414 // Fill 2 blocks. Remove odd buffers. Defragment everything.
1415 // Expected result: at least 1 block freed.
1416 {
1417 for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
1418 {
1419 AllocInfo allocInfo;
1420 CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
1421 allocations.push_back(allocInfo);
1422 }
1423
1424 for(size_t i = 1; i < allocations.size(); ++i)
1425 {
1426 DestroyAllocation(allocations[i]);
1427 allocations.erase(allocations.begin() + i);
1428 }
1429
1430 VmaDefragmentationStats defragStats;
1431 Defragment(allocations.data(), allocations.size(), nullptr, &defragStats);
1432 TEST(defragStats.allocationsMoved > 0 && defragStats.bytesMoved > 0);
1433 TEST(defragStats.deviceMemoryBlocksFreed >= 1);
1434
1435 ValidateAllocationsData(allocations.data(), allocations.size());
1436
1437 DestroyAllAllocations(allocations);
1438 }
1439
1440 // # Test 2
1441 // Buffers of fixed size.
1442 // Fill 2 blocks. Remove odd buffers. Defragment one buffer at time.
1443 // Expected result: Each of 4 interations makes some progress.
1444 {
1445 for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
1446 {
1447 AllocInfo allocInfo;
1448 CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
1449 allocations.push_back(allocInfo);
1450 }
1451
1452 for(size_t i = 1; i < allocations.size(); ++i)
1453 {
1454 DestroyAllocation(allocations[i]);
1455 allocations.erase(allocations.begin() + i);
1456 }
1457
1458 VmaDefragmentationInfo defragInfo = {};
1459 defragInfo.maxAllocationsToMove = 1;
1460 defragInfo.maxBytesToMove = BUF_SIZE;
1461
1462 for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE / 2; ++i)
1463 {
1464 VmaDefragmentationStats defragStats;
1465 Defragment(allocations.data(), allocations.size(), &defragInfo, &defragStats);
1466 TEST(defragStats.allocationsMoved > 0 && defragStats.bytesMoved > 0);
1467 }
1468
1469 ValidateAllocationsData(allocations.data(), allocations.size());
1470
1471 DestroyAllAllocations(allocations);
1472 }
1473
1474 // # Test 3
1475 // Buffers of variable size.
1476 // Create a number of buffers. Remove some percent of them.
1477 // Defragment while having some percent of them unmovable.
1478 // Expected result: Just simple validation.
1479 {
1480 for(size_t i = 0; i < 100; ++i)
1481 {
1482 VkBufferCreateInfo localBufCreateInfo = bufCreateInfo;
1483 localBufCreateInfo.size = RandomBufSize();
1484
1485 AllocInfo allocInfo;
1486 CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
1487 allocations.push_back(allocInfo);
1488 }
1489
1490 const uint32_t percentToDelete = 60;
1491 const size_t numberToDelete = allocations.size() * percentToDelete / 100;
1492 for(size_t i = 0; i < numberToDelete; ++i)
1493 {
1494 size_t indexToDelete = rand.Generate() % (uint32_t)allocations.size();
1495 DestroyAllocation(allocations[indexToDelete]);
1496 allocations.erase(allocations.begin() + indexToDelete);
1497 }
1498
1499 // Non-movable allocations will be at the beginning of allocations array.
1500 const uint32_t percentNonMovable = 20;
1501 const size_t numberNonMovable = allocations.size() * percentNonMovable / 100;
1502 for(size_t i = 0; i < numberNonMovable; ++i)
1503 {
1504 size_t indexNonMovable = i + rand.Generate() % (uint32_t)(allocations.size() - i);
1505 if(indexNonMovable != i)
1506 std::swap(allocations[i], allocations[indexNonMovable]);
1507 }
1508
1509 VmaDefragmentationStats defragStats;
1510 Defragment(
1511 allocations.data() + numberNonMovable,
1512 allocations.size() - numberNonMovable,
1513 nullptr, &defragStats);
1514
1515 ValidateAllocationsData(allocations.data(), allocations.size());
1516
1517 DestroyAllAllocations(allocations);
1518 }
1519 }
1520
1521 /*
1522 Allocation that must be move to an overlapping place using memmove().
1523 Create 2 buffers, second slightly bigger than the first. Delete first. Then defragment.
1524 */
1525 if(VMA_DEBUG_MARGIN == 0) // FAST algorithm works only when DEBUG_MARGIN disabled.
1526 {
1527 AllocInfo allocInfo[2];
1528
1529 bufCreateInfo.size = BUF_SIZE;
1530 CreateBuffer(pool, bufCreateInfo, false, allocInfo[0]);
1531 const VkDeviceSize biggerBufSize = BUF_SIZE + BUF_SIZE / 256;
1532 bufCreateInfo.size = biggerBufSize;
1533 CreateBuffer(pool, bufCreateInfo, false, allocInfo[1]);
1534
1535 DestroyAllocation(allocInfo[0]);
1536
1537 VmaDefragmentationStats defragStats;
1538 Defragment(&allocInfo[1], 1, nullptr, &defragStats);
1539 // If this fails, it means we couldn't do memmove with overlapping regions.
1540 TEST(defragStats.allocationsMoved == 1 && defragStats.bytesMoved > 0);
1541
1542 ValidateAllocationsData(&allocInfo[1], 1);
1543 DestroyAllocation(allocInfo[1]);
1544 }
1545
1546 vmaDestroyPool(g_hAllocator, pool);
1547 }
1548
TestDefragmentationWholePool()1549 void TestDefragmentationWholePool()
1550 {
1551 wprintf(L"Test defragmentation whole pool\n");
1552
1553 RandomNumberGenerator rand(668);
1554
1555 const VkDeviceSize BUF_SIZE = 0x10000;
1556 const VkDeviceSize BLOCK_SIZE = BUF_SIZE * 8;
1557
1558 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1559 bufCreateInfo.size = BUF_SIZE;
1560 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1561
1562 VmaAllocationCreateInfo exampleAllocCreateInfo = {};
1563 exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
1564
1565 uint32_t memTypeIndex = UINT32_MAX;
1566 vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex);
1567
1568 VmaPoolCreateInfo poolCreateInfo = {};
1569 poolCreateInfo.blockSize = BLOCK_SIZE;
1570 poolCreateInfo.memoryTypeIndex = memTypeIndex;
1571
1572 VmaDefragmentationStats defragStats[2];
1573 for(size_t caseIndex = 0; caseIndex < 2; ++caseIndex)
1574 {
1575 VmaPool pool;
1576 ERR_GUARD_VULKAN( vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool) );
1577
1578 std::vector<AllocInfo> allocations;
1579
1580 // Buffers of fixed size.
1581 // Fill 2 blocks. Remove odd buffers. Defragment all of them.
1582 for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
1583 {
1584 AllocInfo allocInfo;
1585 CreateBuffer(pool, bufCreateInfo, false, allocInfo);
1586 allocations.push_back(allocInfo);
1587 }
1588
1589 for(size_t i = 1; i < allocations.size(); ++i)
1590 {
1591 DestroyAllocation(allocations[i]);
1592 allocations.erase(allocations.begin() + i);
1593 }
1594
1595 VmaDefragmentationInfo2 defragInfo = {};
1596 defragInfo.maxCpuAllocationsToMove = UINT32_MAX;
1597 defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE;
1598 std::vector<VmaAllocation> allocationsToDefrag;
1599 if(caseIndex == 0)
1600 {
1601 defragInfo.poolCount = 1;
1602 defragInfo.pPools = &pool;
1603 }
1604 else
1605 {
1606 const size_t allocCount = allocations.size();
1607 allocationsToDefrag.resize(allocCount);
1608 std::transform(
1609 allocations.begin(), allocations.end(),
1610 allocationsToDefrag.begin(),
1611 [](const AllocInfo& allocInfo) { return allocInfo.m_Allocation; });
1612 defragInfo.allocationCount = (uint32_t)allocCount;
1613 defragInfo.pAllocations = allocationsToDefrag.data();
1614 }
1615
1616 VmaDefragmentationContext defragCtx = VK_NULL_HANDLE;
1617 VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &defragStats[caseIndex], &defragCtx);
1618 TEST(res >= VK_SUCCESS);
1619 vmaDefragmentationEnd(g_hAllocator, defragCtx);
1620
1621 TEST(defragStats[caseIndex].allocationsMoved > 0 && defragStats[caseIndex].bytesMoved > 0);
1622
1623 ValidateAllocationsData(allocations.data(), allocations.size());
1624
1625 DestroyAllAllocations(allocations);
1626
1627 vmaDestroyPool(g_hAllocator, pool);
1628 }
1629
1630 TEST(defragStats[0].bytesMoved == defragStats[1].bytesMoved);
1631 TEST(defragStats[0].allocationsMoved == defragStats[1].allocationsMoved);
1632 TEST(defragStats[0].bytesFreed == defragStats[1].bytesFreed);
1633 TEST(defragStats[0].deviceMemoryBlocksFreed == defragStats[1].deviceMemoryBlocksFreed);
1634 }
1635
TestDefragmentationFull()1636 void TestDefragmentationFull()
1637 {
1638 std::vector<AllocInfo> allocations;
1639
1640 // Create initial allocations.
1641 for(size_t i = 0; i < 400; ++i)
1642 {
1643 AllocInfo allocation;
1644 CreateAllocation(allocation);
1645 allocations.push_back(allocation);
1646 }
1647
1648 // Delete random allocations
1649 const size_t allocationsToDeletePercent = 80;
1650 size_t allocationsToDelete = allocations.size() * allocationsToDeletePercent / 100;
1651 for(size_t i = 0; i < allocationsToDelete; ++i)
1652 {
1653 size_t index = (size_t)rand() % allocations.size();
1654 DestroyAllocation(allocations[index]);
1655 allocations.erase(allocations.begin() + index);
1656 }
1657
1658 for(size_t i = 0; i < allocations.size(); ++i)
1659 ValidateAllocationData(allocations[i]);
1660
1661 //SaveAllocatorStatsToFile(L"Before.csv");
1662
1663 {
1664 std::vector<VmaAllocation> vmaAllocations(allocations.size());
1665 for(size_t i = 0; i < allocations.size(); ++i)
1666 vmaAllocations[i] = allocations[i].m_Allocation;
1667
1668 const size_t nonMovablePercent = 0;
1669 size_t nonMovableCount = vmaAllocations.size() * nonMovablePercent / 100;
1670 for(size_t i = 0; i < nonMovableCount; ++i)
1671 {
1672 size_t index = (size_t)rand() % vmaAllocations.size();
1673 vmaAllocations.erase(vmaAllocations.begin() + index);
1674 }
1675
1676 const uint32_t defragCount = 1;
1677 for(uint32_t defragIndex = 0; defragIndex < defragCount; ++defragIndex)
1678 {
1679 std::vector<VkBool32> allocationsChanged(vmaAllocations.size());
1680
1681 VmaDefragmentationInfo defragmentationInfo;
1682 defragmentationInfo.maxAllocationsToMove = UINT_MAX;
1683 defragmentationInfo.maxBytesToMove = SIZE_MAX;
1684
1685 wprintf(L"Defragmentation #%u\n", defragIndex);
1686
1687 time_point begTime = std::chrono::high_resolution_clock::now();
1688
1689 VmaDefragmentationStats stats;
1690 VkResult res = vmaDefragment(g_hAllocator, vmaAllocations.data(), vmaAllocations.size(), allocationsChanged.data(), &defragmentationInfo, &stats);
1691 TEST(res >= 0);
1692
1693 float defragmentDuration = ToFloatSeconds(std::chrono::high_resolution_clock::now() - begTime);
1694
1695 wprintf(L"Moved allocations %u, bytes %llu\n", stats.allocationsMoved, stats.bytesMoved);
1696 wprintf(L"Freed blocks %u, bytes %llu\n", stats.deviceMemoryBlocksFreed, stats.bytesFreed);
1697 wprintf(L"Time: %.2f s\n", defragmentDuration);
1698
1699 for(size_t i = 0; i < vmaAllocations.size(); ++i)
1700 {
1701 if(allocationsChanged[i])
1702 {
1703 RecreateAllocationResource(allocations[i]);
1704 }
1705 }
1706
1707 for(size_t i = 0; i < allocations.size(); ++i)
1708 ValidateAllocationData(allocations[i]);
1709
1710 //wchar_t fileName[MAX_PATH];
1711 //swprintf(fileName, MAX_PATH, L"After_%02u.csv", defragIndex);
1712 //SaveAllocatorStatsToFile(fileName);
1713 }
1714 }
1715
1716 // Destroy all remaining allocations.
1717 DestroyAllAllocations(allocations);
1718 }
1719
TestDefragmentationGpu()1720 static void TestDefragmentationGpu()
1721 {
1722 wprintf(L"Test defragmentation GPU\n");
1723 g_MemoryAliasingWarningEnabled = false;
1724
1725 std::vector<AllocInfo> allocations;
1726
1727 // Create that many allocations to surely fill 3 new blocks of 256 MB.
1728 const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
1729 const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
1730 const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
1731 const size_t bufCount = (size_t)(totalSize / bufSizeMin);
1732 const size_t percentToLeave = 30;
1733 const size_t percentNonMovable = 3;
1734 RandomNumberGenerator rand = { 234522 };
1735
1736 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1737
1738 VmaAllocationCreateInfo allocCreateInfo = {};
1739 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
1740 allocCreateInfo.flags = 0;
1741
1742 // Create all intended buffers.
1743 for(size_t i = 0; i < bufCount; ++i)
1744 {
1745 bufCreateInfo.size = align_up(rand.Generate() % (bufSizeMax - bufSizeMin) + bufSizeMin, 32ull);
1746
1747 if(rand.Generate() % 100 < percentNonMovable)
1748 {
1749 bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
1750 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1751 VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1752 allocCreateInfo.pUserData = (void*)(uintptr_t)2;
1753 }
1754 else
1755 {
1756 // Different usage just to see different color in output from VmaDumpVis.
1757 bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
1758 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1759 VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1760 // And in JSON dump.
1761 allocCreateInfo.pUserData = (void*)(uintptr_t)1;
1762 }
1763
1764 AllocInfo alloc;
1765 alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
1766 alloc.m_StartValue = rand.Generate();
1767 allocations.push_back(alloc);
1768 }
1769
1770 // Destroy some percentage of them.
1771 {
1772 const size_t buffersToDestroy = round_div<size_t>(bufCount * (100 - percentToLeave), 100);
1773 for(size_t i = 0; i < buffersToDestroy; ++i)
1774 {
1775 const size_t index = rand.Generate() % allocations.size();
1776 allocations[index].Destroy();
1777 allocations.erase(allocations.begin() + index);
1778 }
1779 }
1780
1781 // Fill them with meaningful data.
1782 UploadGpuData(allocations.data(), allocations.size());
1783
1784 wchar_t fileName[MAX_PATH];
1785 swprintf_s(fileName, L"GPU_defragmentation_A_before.json");
1786 SaveAllocatorStatsToFile(fileName);
1787
1788 // Defragment using GPU only.
1789 {
1790 const size_t allocCount = allocations.size();
1791
1792 std::vector<VmaAllocation> allocationPtrs;
1793 std::vector<VkBool32> allocationChanged;
1794 std::vector<size_t> allocationOriginalIndex;
1795
1796 for(size_t i = 0; i < allocCount; ++i)
1797 {
1798 VmaAllocationInfo allocInfo = {};
1799 vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo);
1800 if((uintptr_t)allocInfo.pUserData == 1) // Movable
1801 {
1802 allocationPtrs.push_back(allocations[i].m_Allocation);
1803 allocationChanged.push_back(VK_FALSE);
1804 allocationOriginalIndex.push_back(i);
1805 }
1806 }
1807
1808 const size_t movableAllocCount = allocationPtrs.size();
1809
1810 BeginSingleTimeCommands();
1811
1812 VmaDefragmentationInfo2 defragInfo = {};
1813 defragInfo.flags = 0;
1814 defragInfo.allocationCount = (uint32_t)movableAllocCount;
1815 defragInfo.pAllocations = allocationPtrs.data();
1816 defragInfo.pAllocationsChanged = allocationChanged.data();
1817 defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
1818 defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
1819 defragInfo.commandBuffer = g_hTemporaryCommandBuffer;
1820
1821 VmaDefragmentationStats stats = {};
1822 VmaDefragmentationContext ctx = VK_NULL_HANDLE;
1823 VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
1824 TEST(res >= VK_SUCCESS);
1825
1826 EndSingleTimeCommands();
1827
1828 vmaDefragmentationEnd(g_hAllocator, ctx);
1829
1830 for(size_t i = 0; i < movableAllocCount; ++i)
1831 {
1832 if(allocationChanged[i])
1833 {
1834 const size_t origAllocIndex = allocationOriginalIndex[i];
1835 RecreateAllocationResource(allocations[origAllocIndex]);
1836 }
1837 }
1838
1839 // If corruption detection is enabled, GPU defragmentation may not work on
1840 // memory types that have this detection active, e.g. on Intel.
1841 #if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
1842 TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
1843 TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
1844 #endif
1845 }
1846
1847 ValidateGpuData(allocations.data(), allocations.size());
1848
1849 swprintf_s(fileName, L"GPU_defragmentation_B_after.json");
1850 SaveAllocatorStatsToFile(fileName);
1851
1852 // Destroy all remaining buffers.
1853 for(size_t i = allocations.size(); i--; )
1854 {
1855 allocations[i].Destroy();
1856 }
1857
1858 g_MemoryAliasingWarningEnabled = true;
1859 }
1860
ProcessDefragmentationStepInfo(VmaDefragmentationPassInfo & stepInfo)1861 static void ProcessDefragmentationStepInfo(VmaDefragmentationPassInfo &stepInfo)
1862 {
1863 std::vector<VkImageMemoryBarrier> beginImageBarriers;
1864 std::vector<VkImageMemoryBarrier> finalizeImageBarriers;
1865
1866 VkPipelineStageFlags beginSrcStageMask = 0;
1867 VkPipelineStageFlags beginDstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
1868
1869 VkPipelineStageFlags finalizeSrcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
1870 VkPipelineStageFlags finalizeDstStageMask = 0;
1871
1872 bool wantsMemoryBarrier = false;
1873
1874 VkMemoryBarrier beginMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
1875 VkMemoryBarrier finalizeMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
1876
1877 for(uint32_t i = 0; i < stepInfo.moveCount; ++i)
1878 {
1879 VmaAllocationInfo info;
1880 vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info);
1881
1882 AllocInfo *allocInfo = (AllocInfo *)info.pUserData;
1883
1884 if(allocInfo->m_Image)
1885 {
1886 VkImage newImage;
1887
1888 const VkResult result = vkCreateImage(g_hDevice, &allocInfo->m_ImageInfo, g_Allocs, &newImage);
1889 TEST(result >= VK_SUCCESS);
1890
1891 vkBindImageMemory(g_hDevice, newImage, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset);
1892 allocInfo->m_NewImage = newImage;
1893
1894 // Keep track of our pipeline stages that we need to wait/signal on
1895 beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1896 finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1897
1898 // We need one pipeline barrier and two image layout transitions here
1899 // First we'll have to turn our newly created image into VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
1900 // And the second one is turning the old image into VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
1901
1902 VkImageSubresourceRange subresourceRange = {
1903 VK_IMAGE_ASPECT_COLOR_BIT,
1904 0, VK_REMAINING_MIP_LEVELS,
1905 0, VK_REMAINING_ARRAY_LAYERS
1906 };
1907
1908 VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
1909 barrier.srcAccessMask = 0;
1910 barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
1911 barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
1912 barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1913 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1914 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1915 barrier.image = newImage;
1916 barrier.subresourceRange = subresourceRange;
1917
1918 beginImageBarriers.push_back(barrier);
1919
1920 // Second barrier to convert the existing image. This one actually needs a real barrier
1921 barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
1922 barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
1923 barrier.oldLayout = allocInfo->m_ImageLayout;
1924 barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1925 barrier.image = allocInfo->m_Image;
1926
1927 beginImageBarriers.push_back(barrier);
1928
1929 // And lastly we need a barrier that turns our new image into the layout of the old one
1930 barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1931 barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
1932 barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1933 barrier.newLayout = allocInfo->m_ImageLayout;
1934 barrier.image = newImage;
1935
1936 finalizeImageBarriers.push_back(barrier);
1937 }
1938 else if(allocInfo->m_Buffer)
1939 {
1940 VkBuffer newBuffer;
1941
1942 const VkResult result = vkCreateBuffer(g_hDevice, &allocInfo->m_BufferInfo, g_Allocs, &newBuffer);
1943 TEST(result >= VK_SUCCESS);
1944
1945 vkBindBufferMemory(g_hDevice, newBuffer, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset);
1946 allocInfo->m_NewBuffer = newBuffer;
1947
1948 // Keep track of our pipeline stages that we need to wait/signal on
1949 beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1950 finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1951
1952 beginMemoryBarrier.srcAccessMask |= VK_ACCESS_MEMORY_WRITE_BIT;
1953 beginMemoryBarrier.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT;
1954
1955 finalizeMemoryBarrier.srcAccessMask |= VK_ACCESS_TRANSFER_WRITE_BIT;
1956 finalizeMemoryBarrier.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT;
1957
1958 wantsMemoryBarrier = true;
1959 }
1960 }
1961
1962 if(!beginImageBarriers.empty() || wantsMemoryBarrier)
1963 {
1964 const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0;
1965
1966 vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, beginSrcStageMask, beginDstStageMask, 0,
1967 memoryBarrierCount, &beginMemoryBarrier,
1968 0, nullptr,
1969 (uint32_t)beginImageBarriers.size(), beginImageBarriers.data());
1970 }
1971
1972 for(uint32_t i = 0; i < stepInfo.moveCount; ++ i)
1973 {
1974 VmaAllocationInfo info;
1975 vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info);
1976
1977 AllocInfo *allocInfo = (AllocInfo *)info.pUserData;
1978
1979 if(allocInfo->m_Image)
1980 {
1981 std::vector<VkImageCopy> imageCopies;
1982
1983 // Copy all mips of the source image into the target image
1984 VkOffset3D offset = { 0, 0, 0 };
1985 VkExtent3D extent = allocInfo->m_ImageInfo.extent;
1986
1987 VkImageSubresourceLayers subresourceLayers = {
1988 VK_IMAGE_ASPECT_COLOR_BIT,
1989 0,
1990 0, 1
1991 };
1992
1993 for(uint32_t mip = 0; mip < allocInfo->m_ImageInfo.mipLevels; ++ mip)
1994 {
1995 subresourceLayers.mipLevel = mip;
1996
1997 VkImageCopy imageCopy{
1998 subresourceLayers,
1999 offset,
2000 subresourceLayers,
2001 offset,
2002 extent
2003 };
2004
2005 imageCopies.push_back(imageCopy);
2006
2007 extent.width = std::max(uint32_t(1), extent.width >> 1);
2008 extent.height = std::max(uint32_t(1), extent.height >> 1);
2009 extent.depth = std::max(uint32_t(1), extent.depth >> 1);
2010 }
2011
2012 vkCmdCopyImage(
2013 g_hTemporaryCommandBuffer,
2014 allocInfo->m_Image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2015 allocInfo->m_NewImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
2016 (uint32_t)imageCopies.size(), imageCopies.data());
2017 }
2018 else if(allocInfo->m_Buffer)
2019 {
2020 VkBufferCopy region = {
2021 0,
2022 0,
2023 allocInfo->m_BufferInfo.size };
2024
2025 vkCmdCopyBuffer(g_hTemporaryCommandBuffer,
2026 allocInfo->m_Buffer, allocInfo->m_NewBuffer,
2027 1, ®ion);
2028 }
2029 }
2030
2031 if(!finalizeImageBarriers.empty() || wantsMemoryBarrier)
2032 {
2033 const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0;
2034
2035 vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, finalizeSrcStageMask, finalizeDstStageMask, 0,
2036 memoryBarrierCount, &finalizeMemoryBarrier,
2037 0, nullptr,
2038 (uint32_t)finalizeImageBarriers.size(), finalizeImageBarriers.data());
2039 }
2040 }
2041
2042
TestDefragmentationIncrementalBasic()2043 static void TestDefragmentationIncrementalBasic()
2044 {
2045 wprintf(L"Test defragmentation incremental basic\n");
2046 g_MemoryAliasingWarningEnabled = false;
2047
2048 std::vector<AllocInfo> allocations;
2049
2050 // Create that many allocations to surely fill 3 new blocks of 256 MB.
2051 const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 };
2052 const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
2053 const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
2054 const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
2055 const size_t imageCount = totalSize / ((size_t)imageSizes[0] * imageSizes[0] * 4) / 2;
2056 const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2;
2057 const size_t percentToLeave = 30;
2058 RandomNumberGenerator rand = { 234522 };
2059
2060 VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
2061 imageInfo.imageType = VK_IMAGE_TYPE_2D;
2062 imageInfo.extent.depth = 1;
2063 imageInfo.mipLevels = 1;
2064 imageInfo.arrayLayers = 1;
2065 imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
2066 imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
2067 imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
2068 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
2069 imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
2070
2071 VmaAllocationCreateInfo allocCreateInfo = {};
2072 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2073 allocCreateInfo.flags = 0;
2074
2075 // Create all intended images.
2076 for(size_t i = 0; i < imageCount; ++i)
2077 {
2078 const uint32_t size = imageSizes[rand.Generate() % 3];
2079
2080 imageInfo.extent.width = size;
2081 imageInfo.extent.height = size;
2082
2083 AllocInfo alloc;
2084 alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
2085 alloc.m_StartValue = 0;
2086
2087 allocations.push_back(alloc);
2088 }
2089
2090 // And all buffers
2091 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2092
2093 for(size_t i = 0; i < bufCount; ++i)
2094 {
2095 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
2096 bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2097
2098 AllocInfo alloc;
2099 alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
2100 alloc.m_StartValue = 0;
2101
2102 allocations.push_back(alloc);
2103 }
2104
2105 // Destroy some percentage of them.
2106 {
2107 const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100);
2108 for(size_t i = 0; i < allocationsToDestroy; ++i)
2109 {
2110 const size_t index = rand.Generate() % allocations.size();
2111 allocations[index].Destroy();
2112 allocations.erase(allocations.begin() + index);
2113 }
2114 }
2115
2116 {
2117 // Set our user data pointers. A real application should probably be more clever here
2118 const size_t allocationCount = allocations.size();
2119 for(size_t i = 0; i < allocationCount; ++i)
2120 {
2121 AllocInfo &alloc = allocations[i];
2122 vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc);
2123 }
2124 }
2125
2126 // Fill them with meaningful data.
2127 UploadGpuData(allocations.data(), allocations.size());
2128
2129 wchar_t fileName[MAX_PATH];
2130 swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_A_before.json");
2131 SaveAllocatorStatsToFile(fileName);
2132
2133 // Defragment using GPU only.
2134 {
2135 const size_t allocCount = allocations.size();
2136
2137 std::vector<VmaAllocation> allocationPtrs;
2138
2139 for(size_t i = 0; i < allocCount; ++i)
2140 {
2141 allocationPtrs.push_back(allocations[i].m_Allocation);
2142 }
2143
2144 const size_t movableAllocCount = allocationPtrs.size();
2145
2146 VmaDefragmentationInfo2 defragInfo = {};
2147 defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL;
2148 defragInfo.allocationCount = (uint32_t)movableAllocCount;
2149 defragInfo.pAllocations = allocationPtrs.data();
2150 defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
2151 defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
2152
2153 VmaDefragmentationStats stats = {};
2154 VmaDefragmentationContext ctx = VK_NULL_HANDLE;
2155 VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
2156 TEST(res >= VK_SUCCESS);
2157
2158 res = VK_NOT_READY;
2159
2160 std::vector<VmaDefragmentationPassMoveInfo> moveInfo;
2161 moveInfo.resize(movableAllocCount);
2162
2163 while(res == VK_NOT_READY)
2164 {
2165 VmaDefragmentationPassInfo stepInfo = {};
2166 stepInfo.pMoves = moveInfo.data();
2167 stepInfo.moveCount = (uint32_t)moveInfo.size();
2168
2169 res = vmaBeginDefragmentationPass(g_hAllocator, ctx, &stepInfo);
2170 TEST(res >= VK_SUCCESS);
2171
2172 BeginSingleTimeCommands();
2173 std::vector<void*> newHandles;
2174 ProcessDefragmentationStepInfo(stepInfo);
2175 EndSingleTimeCommands();
2176
2177 res = vmaEndDefragmentationPass(g_hAllocator, ctx);
2178
2179 // Destroy old buffers/images and replace them with new handles.
2180 for(size_t i = 0; i < stepInfo.moveCount; ++i)
2181 {
2182 VmaAllocation const alloc = stepInfo.pMoves[i].allocation;
2183 VmaAllocationInfo vmaAllocInfo;
2184 vmaGetAllocationInfo(g_hAllocator, alloc, &vmaAllocInfo);
2185 AllocInfo* allocInfo = (AllocInfo*)vmaAllocInfo.pUserData;
2186 if(allocInfo->m_Buffer)
2187 {
2188 assert(allocInfo->m_NewBuffer && !allocInfo->m_Image && !allocInfo->m_NewImage);
2189 vkDestroyBuffer(g_hDevice, allocInfo->m_Buffer, g_Allocs);
2190 allocInfo->m_Buffer = allocInfo->m_NewBuffer;
2191 allocInfo->m_NewBuffer = VK_NULL_HANDLE;
2192 }
2193 else if(allocInfo->m_Image)
2194 {
2195 assert(allocInfo->m_NewImage && !allocInfo->m_Buffer && !allocInfo->m_NewBuffer);
2196 vkDestroyImage(g_hDevice, allocInfo->m_Image, g_Allocs);
2197 allocInfo->m_Image = allocInfo->m_NewImage;
2198 allocInfo->m_NewImage = VK_NULL_HANDLE;
2199 }
2200 else
2201 assert(0);
2202 }
2203 }
2204
2205 TEST(res >= VK_SUCCESS);
2206 vmaDefragmentationEnd(g_hAllocator, ctx);
2207
2208 // If corruption detection is enabled, GPU defragmentation may not work on
2209 // memory types that have this detection active, e.g. on Intel.
2210 #if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
2211 TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
2212 TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
2213 #endif
2214 }
2215
2216 //ValidateGpuData(allocations.data(), allocations.size());
2217
2218 swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_B_after.json");
2219 SaveAllocatorStatsToFile(fileName);
2220
2221 // Destroy all remaining buffers and images.
2222 for(size_t i = allocations.size(); i--; )
2223 {
2224 allocations[i].Destroy();
2225 }
2226
2227 g_MemoryAliasingWarningEnabled = true;
2228 }
2229
TestDefragmentationIncrementalComplex()2230 void TestDefragmentationIncrementalComplex()
2231 {
2232 wprintf(L"Test defragmentation incremental complex\n");
2233 g_MemoryAliasingWarningEnabled = false;
2234
2235 std::vector<AllocInfo> allocations;
2236
2237 // Create that many allocations to surely fill 3 new blocks of 256 MB.
2238 const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 };
2239 const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
2240 const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
2241 const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
2242 const size_t imageCount = (size_t)(totalSize / (imageSizes[0] * imageSizes[0] * 4)) / 2;
2243 const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2;
2244 const size_t percentToLeave = 30;
2245 RandomNumberGenerator rand = { 234522 };
2246
2247 VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
2248 imageInfo.imageType = VK_IMAGE_TYPE_2D;
2249 imageInfo.extent.depth = 1;
2250 imageInfo.mipLevels = 1;
2251 imageInfo.arrayLayers = 1;
2252 imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
2253 imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
2254 imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
2255 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
2256 imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
2257
2258 VmaAllocationCreateInfo allocCreateInfo = {};
2259 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2260 allocCreateInfo.flags = 0;
2261
2262 // Create all intended images.
2263 for(size_t i = 0; i < imageCount; ++i)
2264 {
2265 const uint32_t size = imageSizes[rand.Generate() % 3];
2266
2267 imageInfo.extent.width = size;
2268 imageInfo.extent.height = size;
2269
2270 AllocInfo alloc;
2271 alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
2272 alloc.m_StartValue = 0;
2273
2274 allocations.push_back(alloc);
2275 }
2276
2277 // And all buffers
2278 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2279
2280 for(size_t i = 0; i < bufCount; ++i)
2281 {
2282 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
2283 bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2284
2285 AllocInfo alloc;
2286 alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
2287 alloc.m_StartValue = 0;
2288
2289 allocations.push_back(alloc);
2290 }
2291
2292 // Destroy some percentage of them.
2293 {
2294 const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100);
2295 for(size_t i = 0; i < allocationsToDestroy; ++i)
2296 {
2297 const size_t index = rand.Generate() % allocations.size();
2298 allocations[index].Destroy();
2299 allocations.erase(allocations.begin() + index);
2300 }
2301 }
2302
2303 {
2304 // Set our user data pointers. A real application should probably be more clever here
2305 const size_t allocationCount = allocations.size();
2306 for(size_t i = 0; i < allocationCount; ++i)
2307 {
2308 AllocInfo &alloc = allocations[i];
2309 vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc);
2310 }
2311 }
2312
2313 // Fill them with meaningful data.
2314 UploadGpuData(allocations.data(), allocations.size());
2315
2316 wchar_t fileName[MAX_PATH];
2317 swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_A_before.json");
2318 SaveAllocatorStatsToFile(fileName);
2319
2320 std::vector<AllocInfo> additionalAllocations;
2321
2322 #define MakeAdditionalAllocation() \
2323 do { \
2324 { \
2325 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16); \
2326 bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; \
2327 \
2328 AllocInfo alloc; \
2329 alloc.CreateBuffer(bufCreateInfo, allocCreateInfo); \
2330 \
2331 additionalAllocations.push_back(alloc); \
2332 } \
2333 } while(0)
2334
2335 // Defragment using GPU only.
2336 {
2337 const size_t allocCount = allocations.size();
2338
2339 std::vector<VmaAllocation> allocationPtrs;
2340
2341 for(size_t i = 0; i < allocCount; ++i)
2342 {
2343 VmaAllocationInfo allocInfo = {};
2344 vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo);
2345
2346 allocationPtrs.push_back(allocations[i].m_Allocation);
2347 }
2348
2349 const size_t movableAllocCount = allocationPtrs.size();
2350
2351 VmaDefragmentationInfo2 defragInfo = {};
2352 defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL;
2353 defragInfo.allocationCount = (uint32_t)movableAllocCount;
2354 defragInfo.pAllocations = allocationPtrs.data();
2355 defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
2356 defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
2357
2358 VmaDefragmentationStats stats = {};
2359 VmaDefragmentationContext ctx = VK_NULL_HANDLE;
2360 VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
2361 TEST(res >= VK_SUCCESS);
2362
2363 res = VK_NOT_READY;
2364
2365 std::vector<VmaDefragmentationPassMoveInfo> moveInfo;
2366 moveInfo.resize(movableAllocCount);
2367
2368 MakeAdditionalAllocation();
2369
2370 while(res == VK_NOT_READY)
2371 {
2372 VmaDefragmentationPassInfo stepInfo = {};
2373 stepInfo.pMoves = moveInfo.data();
2374 stepInfo.moveCount = (uint32_t)moveInfo.size();
2375
2376 res = vmaBeginDefragmentationPass(g_hAllocator, ctx, &stepInfo);
2377 TEST(res >= VK_SUCCESS);
2378
2379 MakeAdditionalAllocation();
2380
2381 BeginSingleTimeCommands();
2382 ProcessDefragmentationStepInfo(stepInfo);
2383 EndSingleTimeCommands();
2384
2385 res = vmaEndDefragmentationPass(g_hAllocator, ctx);
2386
2387 // Destroy old buffers/images and replace them with new handles.
2388 for(size_t i = 0; i < stepInfo.moveCount; ++i)
2389 {
2390 VmaAllocation const alloc = stepInfo.pMoves[i].allocation;
2391 VmaAllocationInfo vmaAllocInfo;
2392 vmaGetAllocationInfo(g_hAllocator, alloc, &vmaAllocInfo);
2393 AllocInfo* allocInfo = (AllocInfo*)vmaAllocInfo.pUserData;
2394 if(allocInfo->m_Buffer)
2395 {
2396 assert(allocInfo->m_NewBuffer && !allocInfo->m_Image && !allocInfo->m_NewImage);
2397 vkDestroyBuffer(g_hDevice, allocInfo->m_Buffer, g_Allocs);
2398 allocInfo->m_Buffer = allocInfo->m_NewBuffer;
2399 allocInfo->m_NewBuffer = VK_NULL_HANDLE;
2400 }
2401 else if(allocInfo->m_Image)
2402 {
2403 assert(allocInfo->m_NewImage && !allocInfo->m_Buffer && !allocInfo->m_NewBuffer);
2404 vkDestroyImage(g_hDevice, allocInfo->m_Image, g_Allocs);
2405 allocInfo->m_Image = allocInfo->m_NewImage;
2406 allocInfo->m_NewImage = VK_NULL_HANDLE;
2407 }
2408 else
2409 assert(0);
2410 }
2411
2412 MakeAdditionalAllocation();
2413 }
2414
2415 TEST(res >= VK_SUCCESS);
2416 vmaDefragmentationEnd(g_hAllocator, ctx);
2417
2418 // If corruption detection is enabled, GPU defragmentation may not work on
2419 // memory types that have this detection active, e.g. on Intel.
2420 #if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
2421 TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
2422 TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
2423 #endif
2424 }
2425
2426 //ValidateGpuData(allocations.data(), allocations.size());
2427
2428 swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_B_after.json");
2429 SaveAllocatorStatsToFile(fileName);
2430
2431 // Destroy all remaining buffers.
2432 for(size_t i = allocations.size(); i--; )
2433 {
2434 allocations[i].Destroy();
2435 }
2436
2437 for(size_t i = additionalAllocations.size(); i--; )
2438 {
2439 additionalAllocations[i].Destroy();
2440 }
2441
2442 g_MemoryAliasingWarningEnabled = true;
2443 }
2444
2445
TestUserData()2446 static void TestUserData()
2447 {
2448 VkResult res;
2449
2450 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2451 bufCreateInfo.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
2452 bufCreateInfo.size = 0x10000;
2453
2454 for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
2455 {
2456 // Opaque pointer
2457 {
2458
2459 void* numberAsPointer = (void*)(size_t)0xC2501FF3u;
2460 void* pointerToSomething = &res;
2461
2462 VmaAllocationCreateInfo allocCreateInfo = {};
2463 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2464 allocCreateInfo.pUserData = numberAsPointer;
2465 if(testIndex == 1)
2466 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2467
2468 VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo;
2469 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2470 TEST(res == VK_SUCCESS);
2471 TEST(allocInfo.pUserData = numberAsPointer);
2472
2473 vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2474 TEST(allocInfo.pUserData == numberAsPointer);
2475
2476 vmaSetAllocationUserData(g_hAllocator, alloc, pointerToSomething);
2477 vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2478 TEST(allocInfo.pUserData == pointerToSomething);
2479
2480 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2481 }
2482
2483 // String
2484 {
2485 const char* name1 = "Buffer name \\\"\'<>&% \nSecond line .,;=";
2486 const char* name2 = "2";
2487 const size_t name1Len = strlen(name1);
2488
2489 char* name1Buf = new char[name1Len + 1];
2490 strcpy_s(name1Buf, name1Len + 1, name1);
2491
2492 VmaAllocationCreateInfo allocCreateInfo = {};
2493 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2494 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT;
2495 allocCreateInfo.pUserData = name1Buf;
2496 if(testIndex == 1)
2497 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2498
2499 VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo;
2500 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2501 TEST(res == VK_SUCCESS);
2502 TEST(allocInfo.pUserData != nullptr && allocInfo.pUserData != name1Buf);
2503 TEST(strcmp(name1, (const char*)allocInfo.pUserData) == 0);
2504
2505 delete[] name1Buf;
2506
2507 vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2508 TEST(strcmp(name1, (const char*)allocInfo.pUserData) == 0);
2509
2510 vmaSetAllocationUserData(g_hAllocator, alloc, (void*)name2);
2511 vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2512 TEST(strcmp(name2, (const char*)allocInfo.pUserData) == 0);
2513
2514 vmaSetAllocationUserData(g_hAllocator, alloc, nullptr);
2515 vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2516 TEST(allocInfo.pUserData == nullptr);
2517
2518 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2519 }
2520 }
2521 }
2522
TestInvalidAllocations()2523 static void TestInvalidAllocations()
2524 {
2525 VkResult res;
2526
2527 VmaAllocationCreateInfo allocCreateInfo = {};
2528 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2529
2530 // Try to allocate 0 bytes.
2531 {
2532 VkMemoryRequirements memReq = {};
2533 memReq.size = 0; // !!!
2534 memReq.alignment = 4;
2535 memReq.memoryTypeBits = UINT32_MAX;
2536 VmaAllocation alloc = VK_NULL_HANDLE;
2537 res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
2538 TEST(res == VK_ERROR_VALIDATION_FAILED_EXT && alloc == VK_NULL_HANDLE);
2539 }
2540
2541 // Try to create buffer with size = 0.
2542 {
2543 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2544 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2545 bufCreateInfo.size = 0; // !!!
2546 VkBuffer buf = VK_NULL_HANDLE;
2547 VmaAllocation alloc = VK_NULL_HANDLE;
2548 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr);
2549 TEST(res == VK_ERROR_VALIDATION_FAILED_EXT && buf == VK_NULL_HANDLE && alloc == VK_NULL_HANDLE);
2550 }
2551
2552 // Try to create image with one dimension = 0.
2553 {
2554 VkImageCreateInfo imageCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2555 imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
2556 imageCreateInfo.format = VK_FORMAT_B8G8R8A8_UNORM;
2557 imageCreateInfo.extent.width = 128;
2558 imageCreateInfo.extent.height = 0; // !!!
2559 imageCreateInfo.extent.depth = 1;
2560 imageCreateInfo.mipLevels = 1;
2561 imageCreateInfo.arrayLayers = 1;
2562 imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
2563 imageCreateInfo.tiling = VK_IMAGE_TILING_LINEAR;
2564 imageCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
2565 imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
2566 VkImage image = VK_NULL_HANDLE;
2567 VmaAllocation alloc = VK_NULL_HANDLE;
2568 res = vmaCreateImage(g_hAllocator, &imageCreateInfo, &allocCreateInfo, &image, &alloc, nullptr);
2569 TEST(res == VK_ERROR_VALIDATION_FAILED_EXT && image == VK_NULL_HANDLE && alloc == VK_NULL_HANDLE);
2570 }
2571 }
2572
TestMemoryRequirements()2573 static void TestMemoryRequirements()
2574 {
2575 VkResult res;
2576 VkBuffer buf;
2577 VmaAllocation alloc;
2578 VmaAllocationInfo allocInfo;
2579
2580 const VkPhysicalDeviceMemoryProperties* memProps;
2581 vmaGetMemoryProperties(g_hAllocator, &memProps);
2582
2583 VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2584 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2585 bufInfo.size = 128;
2586
2587 VmaAllocationCreateInfo allocCreateInfo = {};
2588
2589 // No requirements.
2590 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2591 TEST(res == VK_SUCCESS);
2592 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2593
2594 // Usage.
2595 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2596 allocCreateInfo.requiredFlags = 0;
2597 allocCreateInfo.preferredFlags = 0;
2598 allocCreateInfo.memoryTypeBits = UINT32_MAX;
2599
2600 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2601 TEST(res == VK_SUCCESS);
2602 TEST(memProps->memoryTypes[allocInfo.memoryType].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
2603 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2604
2605 // Required flags, preferred flags.
2606 allocCreateInfo.usage = VMA_MEMORY_USAGE_UNKNOWN;
2607 allocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
2608 allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
2609 allocCreateInfo.memoryTypeBits = 0;
2610
2611 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2612 TEST(res == VK_SUCCESS);
2613 TEST(memProps->memoryTypes[allocInfo.memoryType].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
2614 TEST(memProps->memoryTypes[allocInfo.memoryType].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
2615 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2616
2617 // memoryTypeBits.
2618 const uint32_t memType = allocInfo.memoryType;
2619 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2620 allocCreateInfo.requiredFlags = 0;
2621 allocCreateInfo.preferredFlags = 0;
2622 allocCreateInfo.memoryTypeBits = 1u << memType;
2623
2624 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2625 TEST(res == VK_SUCCESS);
2626 TEST(allocInfo.memoryType == memType);
2627 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2628
2629 }
2630
TestGetAllocatorInfo()2631 static void TestGetAllocatorInfo()
2632 {
2633 wprintf(L"Test vnaGetAllocatorInfo\n");
2634
2635 VmaAllocatorInfo allocInfo = {};
2636 vmaGetAllocatorInfo(g_hAllocator, &allocInfo);
2637 TEST(allocInfo.instance == g_hVulkanInstance);
2638 TEST(allocInfo.physicalDevice == g_hPhysicalDevice);
2639 TEST(allocInfo.device == g_hDevice);
2640 }
2641
TestBasics()2642 static void TestBasics()
2643 {
2644 VkResult res;
2645
2646 TestGetAllocatorInfo();
2647
2648 TestMemoryRequirements();
2649
2650 // Lost allocation
2651 {
2652 VmaAllocation alloc = VK_NULL_HANDLE;
2653 vmaCreateLostAllocation(g_hAllocator, &alloc);
2654 TEST(alloc != VK_NULL_HANDLE);
2655
2656 VmaAllocationInfo allocInfo;
2657 vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2658 TEST(allocInfo.deviceMemory == VK_NULL_HANDLE);
2659 TEST(allocInfo.size == 0);
2660
2661 vmaFreeMemory(g_hAllocator, alloc);
2662 }
2663
2664 // Allocation that is MAPPED and not necessarily HOST_VISIBLE.
2665 {
2666 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2667 bufCreateInfo.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
2668 bufCreateInfo.size = 128;
2669
2670 VmaAllocationCreateInfo allocCreateInfo = {};
2671 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2672 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
2673
2674 VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo;
2675 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2676 TEST(res == VK_SUCCESS);
2677
2678 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2679
2680 // Same with OWN_MEMORY.
2681 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2682
2683 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2684 TEST(res == VK_SUCCESS);
2685
2686 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2687 }
2688
2689 TestUserData();
2690
2691 TestInvalidAllocations();
2692 }
2693
TestPool_MinBlockCount()2694 static void TestPool_MinBlockCount()
2695 {
2696 #if defined(VMA_DEBUG_MARGIN) && VMA_DEBUG_MARGIN > 0
2697 return;
2698 #endif
2699
2700 wprintf(L"Test Pool MinBlockCount\n");
2701 VkResult res;
2702
2703 static const VkDeviceSize ALLOC_SIZE = 512ull * 1024;
2704 static const VkDeviceSize BLOCK_SIZE = ALLOC_SIZE * 2; // Each block can fit 2 allocations.
2705
2706 VmaAllocationCreateInfo allocCreateInfo = {};
2707 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_COPY;
2708
2709 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2710 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2711 bufCreateInfo.size = ALLOC_SIZE;
2712
2713 VmaPoolCreateInfo poolCreateInfo = {};
2714 poolCreateInfo.blockSize = BLOCK_SIZE;
2715 poolCreateInfo.minBlockCount = 2; // At least 2 blocks always present.
2716 res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex);
2717 TEST(res == VK_SUCCESS);
2718
2719 VmaPool pool = VK_NULL_HANDLE;
2720 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
2721 TEST(res == VK_SUCCESS && pool != VK_NULL_HANDLE);
2722
2723 // Check that there are 2 blocks preallocated as requested.
2724 VmaPoolStats begPoolStats = {};
2725 vmaGetPoolStats(g_hAllocator, pool, &begPoolStats);
2726 TEST(begPoolStats.blockCount == 2 && begPoolStats.allocationCount == 0 && begPoolStats.size == BLOCK_SIZE * 2);
2727
2728 // Allocate 5 buffers to create 3 blocks.
2729 static const uint32_t BUF_COUNT = 5;
2730 allocCreateInfo.pool = pool;
2731 std::vector<AllocInfo> allocs(BUF_COUNT);
2732 for(uint32_t i = 0; i < BUF_COUNT; ++i)
2733 {
2734 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &allocs[i].m_Buffer, &allocs[i].m_Allocation, nullptr);
2735 TEST(res == VK_SUCCESS && allocs[i].m_Buffer != VK_NULL_HANDLE && allocs[i].m_Allocation != VK_NULL_HANDLE);
2736 }
2737
2738 // Check that there are really 3 blocks.
2739 VmaPoolStats poolStats2 = {};
2740 vmaGetPoolStats(g_hAllocator, pool, &poolStats2);
2741 TEST(poolStats2.blockCount == 3 && poolStats2.allocationCount == BUF_COUNT && poolStats2.size == BLOCK_SIZE * 3);
2742
2743 // Free two first allocations to make one block empty.
2744 allocs[0].Destroy();
2745 allocs[1].Destroy();
2746
2747 // Check that there are still 3 blocks due to hysteresis.
2748 VmaPoolStats poolStats3 = {};
2749 vmaGetPoolStats(g_hAllocator, pool, &poolStats3);
2750 TEST(poolStats3.blockCount == 3 && poolStats3.allocationCount == BUF_COUNT - 2 && poolStats2.size == BLOCK_SIZE * 3);
2751
2752 // Free the last allocation to make second block empty.
2753 allocs[BUF_COUNT - 1].Destroy();
2754
2755 // Check that there are now 2 blocks only.
2756 VmaPoolStats poolStats4 = {};
2757 vmaGetPoolStats(g_hAllocator, pool, &poolStats4);
2758 TEST(poolStats4.blockCount == 2 && poolStats4.allocationCount == BUF_COUNT - 3 && poolStats4.size == BLOCK_SIZE * 2);
2759
2760 // Cleanup.
2761 for(size_t i = allocs.size(); i--; )
2762 {
2763 allocs[i].Destroy();
2764 }
2765 vmaDestroyPool(g_hAllocator, pool);
2766 }
2767
TestHeapSizeLimit()2768 void TestHeapSizeLimit()
2769 {
2770 const VkDeviceSize HEAP_SIZE_LIMIT = 100ull * 1024 * 1024; // 100 MB
2771 const VkDeviceSize BLOCK_SIZE = 10ull * 1024 * 1024; // 10 MB
2772
2773 VkDeviceSize heapSizeLimit[VK_MAX_MEMORY_HEAPS];
2774 for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i)
2775 {
2776 heapSizeLimit[i] = HEAP_SIZE_LIMIT;
2777 }
2778
2779 VmaAllocatorCreateInfo allocatorCreateInfo = {};
2780 allocatorCreateInfo.physicalDevice = g_hPhysicalDevice;
2781 allocatorCreateInfo.device = g_hDevice;
2782 allocatorCreateInfo.instance = g_hVulkanInstance;
2783 allocatorCreateInfo.pHeapSizeLimit = heapSizeLimit;
2784
2785 VmaAllocator hAllocator;
2786 VkResult res = vmaCreateAllocator(&allocatorCreateInfo, &hAllocator);
2787 TEST(res == VK_SUCCESS);
2788
2789 struct Item
2790 {
2791 VkBuffer hBuf;
2792 VmaAllocation hAlloc;
2793 };
2794 std::vector<Item> items;
2795
2796 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2797 bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
2798
2799 // 1. Allocate two blocks of dedicated memory, half the size of BLOCK_SIZE.
2800 VmaAllocationInfo dedicatedAllocInfo;
2801 {
2802 VmaAllocationCreateInfo allocCreateInfo = {};
2803 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2804 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2805
2806 bufCreateInfo.size = BLOCK_SIZE / 2;
2807
2808 for(size_t i = 0; i < 2; ++i)
2809 {
2810 Item item;
2811 res = vmaCreateBuffer(hAllocator, &bufCreateInfo, &allocCreateInfo, &item.hBuf, &item.hAlloc, &dedicatedAllocInfo);
2812 TEST(res == VK_SUCCESS);
2813 items.push_back(item);
2814 }
2815 }
2816
2817 // Create pool to make sure allocations must be out of this memory type.
2818 VmaPoolCreateInfo poolCreateInfo = {};
2819 poolCreateInfo.memoryTypeIndex = dedicatedAllocInfo.memoryType;
2820 poolCreateInfo.blockSize = BLOCK_SIZE;
2821
2822 VmaPool hPool;
2823 res = vmaCreatePool(hAllocator, &poolCreateInfo, &hPool);
2824 TEST(res == VK_SUCCESS);
2825
2826 // 2. Allocate normal buffers from all the remaining memory.
2827 {
2828 VmaAllocationCreateInfo allocCreateInfo = {};
2829 allocCreateInfo.pool = hPool;
2830
2831 bufCreateInfo.size = BLOCK_SIZE / 2;
2832
2833 const size_t bufCount = ((HEAP_SIZE_LIMIT / BLOCK_SIZE) - 1) * 2;
2834 for(size_t i = 0; i < bufCount; ++i)
2835 {
2836 Item item;
2837 res = vmaCreateBuffer(hAllocator, &bufCreateInfo, &allocCreateInfo, &item.hBuf, &item.hAlloc, nullptr);
2838 TEST(res == VK_SUCCESS);
2839 items.push_back(item);
2840 }
2841 }
2842
2843 // 3. Allocation of one more (even small) buffer should fail.
2844 {
2845 VmaAllocationCreateInfo allocCreateInfo = {};
2846 allocCreateInfo.pool = hPool;
2847
2848 bufCreateInfo.size = 128;
2849
2850 VkBuffer hBuf;
2851 VmaAllocation hAlloc;
2852 res = vmaCreateBuffer(hAllocator, &bufCreateInfo, &allocCreateInfo, &hBuf, &hAlloc, nullptr);
2853 TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY);
2854 }
2855
2856 // Destroy everything.
2857 for(size_t i = items.size(); i--; )
2858 {
2859 vmaDestroyBuffer(hAllocator, items[i].hBuf, items[i].hAlloc);
2860 }
2861
2862 vmaDestroyPool(hAllocator, hPool);
2863
2864 vmaDestroyAllocator(hAllocator);
2865 }
2866
2867 #if VMA_DEBUG_MARGIN
TestDebugMargin()2868 static void TestDebugMargin()
2869 {
2870 if(VMA_DEBUG_MARGIN == 0)
2871 {
2872 return;
2873 }
2874
2875 VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2876 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2877
2878 VmaAllocationCreateInfo allocCreateInfo = {};
2879 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2880
2881 // Create few buffers of different size.
2882 const size_t BUF_COUNT = 10;
2883 BufferInfo buffers[BUF_COUNT];
2884 VmaAllocationInfo allocInfo[BUF_COUNT];
2885 for(size_t i = 0; i < 10; ++i)
2886 {
2887 bufInfo.size = (VkDeviceSize)(i + 1) * 64;
2888 // Last one will be mapped.
2889 allocCreateInfo.flags = (i == BUF_COUNT - 1) ? VMA_ALLOCATION_CREATE_MAPPED_BIT : 0;
2890
2891 VkResult res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buffers[i].Buffer, &buffers[i].Allocation, &allocInfo[i]);
2892 TEST(res == VK_SUCCESS);
2893 // Margin is preserved also at the beginning of a block.
2894 TEST(allocInfo[i].offset >= VMA_DEBUG_MARGIN);
2895
2896 if(i == BUF_COUNT - 1)
2897 {
2898 // Fill with data.
2899 TEST(allocInfo[i].pMappedData != nullptr);
2900 // Uncomment this "+ 1" to overwrite past end of allocation and check corruption detection.
2901 memset(allocInfo[i].pMappedData, 0xFF, bufInfo.size /* + 1 */);
2902 }
2903 }
2904
2905 // Check if their offsets preserve margin between them.
2906 std::sort(allocInfo, allocInfo + BUF_COUNT, [](const VmaAllocationInfo& lhs, const VmaAllocationInfo& rhs) -> bool
2907 {
2908 if(lhs.deviceMemory != rhs.deviceMemory)
2909 {
2910 return lhs.deviceMemory < rhs.deviceMemory;
2911 }
2912 return lhs.offset < rhs.offset;
2913 });
2914 for(size_t i = 1; i < BUF_COUNT; ++i)
2915 {
2916 if(allocInfo[i].deviceMemory == allocInfo[i - 1].deviceMemory)
2917 {
2918 TEST(allocInfo[i].offset >= allocInfo[i - 1].offset + VMA_DEBUG_MARGIN);
2919 }
2920 }
2921
2922 VkResult res = vmaCheckCorruption(g_hAllocator, UINT32_MAX);
2923 TEST(res == VK_SUCCESS);
2924
2925 // Destroy all buffers.
2926 for(size_t i = BUF_COUNT; i--; )
2927 {
2928 vmaDestroyBuffer(g_hAllocator, buffers[i].Buffer, buffers[i].Allocation);
2929 }
2930 }
2931 #endif
2932
TestLinearAllocator()2933 static void TestLinearAllocator()
2934 {
2935 wprintf(L"Test linear allocator\n");
2936
2937 RandomNumberGenerator rand{645332};
2938
2939 VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2940 sampleBufCreateInfo.size = 1024; // Whatever.
2941 sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
2942
2943 VmaAllocationCreateInfo sampleAllocCreateInfo = {};
2944 sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2945
2946 VmaPoolCreateInfo poolCreateInfo = {};
2947 VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
2948 TEST(res == VK_SUCCESS);
2949
2950 poolCreateInfo.blockSize = 1024 * 300;
2951 poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
2952 poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
2953
2954 VmaPool pool = nullptr;
2955 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
2956 TEST(res == VK_SUCCESS);
2957
2958 VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
2959
2960 VmaAllocationCreateInfo allocCreateInfo = {};
2961 allocCreateInfo.pool = pool;
2962
2963 constexpr size_t maxBufCount = 100;
2964 std::vector<BufferInfo> bufInfo;
2965
2966 constexpr VkDeviceSize bufSizeMin = 16;
2967 constexpr VkDeviceSize bufSizeMax = 1024;
2968 VmaAllocationInfo allocInfo;
2969 VkDeviceSize prevOffset = 0;
2970
2971 // Test one-time free.
2972 for(size_t i = 0; i < 2; ++i)
2973 {
2974 // Allocate number of buffers of varying size that surely fit into this block.
2975 VkDeviceSize bufSumSize = 0;
2976 for(size_t i = 0; i < maxBufCount; ++i)
2977 {
2978 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
2979 BufferInfo newBufInfo;
2980 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
2981 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
2982 TEST(res == VK_SUCCESS);
2983 TEST(i == 0 || allocInfo.offset > prevOffset);
2984 bufInfo.push_back(newBufInfo);
2985 prevOffset = allocInfo.offset;
2986 bufSumSize += bufCreateInfo.size;
2987 }
2988
2989 // Validate pool stats.
2990 VmaPoolStats stats;
2991 vmaGetPoolStats(g_hAllocator, pool, &stats);
2992 TEST(stats.size == poolCreateInfo.blockSize);
2993 TEST(stats.unusedSize = poolCreateInfo.blockSize - bufSumSize);
2994 TEST(stats.allocationCount == bufInfo.size());
2995
2996 // Destroy the buffers in random order.
2997 while(!bufInfo.empty())
2998 {
2999 const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3000 const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3001 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3002 bufInfo.erase(bufInfo.begin() + indexToDestroy);
3003 }
3004 }
3005
3006 // Test stack.
3007 {
3008 // Allocate number of buffers of varying size that surely fit into this block.
3009 for(size_t i = 0; i < maxBufCount; ++i)
3010 {
3011 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3012 BufferInfo newBufInfo;
3013 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3014 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3015 TEST(res == VK_SUCCESS);
3016 TEST(i == 0 || allocInfo.offset > prevOffset);
3017 bufInfo.push_back(newBufInfo);
3018 prevOffset = allocInfo.offset;
3019 }
3020
3021 // Destroy few buffers from top of the stack.
3022 for(size_t i = 0; i < maxBufCount / 5; ++i)
3023 {
3024 const BufferInfo& currBufInfo = bufInfo.back();
3025 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3026 bufInfo.pop_back();
3027 }
3028
3029 // Create some more
3030 for(size_t i = 0; i < maxBufCount / 5; ++i)
3031 {
3032 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3033 BufferInfo newBufInfo;
3034 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3035 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3036 TEST(res == VK_SUCCESS);
3037 TEST(i == 0 || allocInfo.offset > prevOffset);
3038 bufInfo.push_back(newBufInfo);
3039 prevOffset = allocInfo.offset;
3040 }
3041
3042 // Destroy the buffers in reverse order.
3043 while(!bufInfo.empty())
3044 {
3045 const BufferInfo& currBufInfo = bufInfo.back();
3046 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3047 bufInfo.pop_back();
3048 }
3049 }
3050
3051 // Test ring buffer.
3052 {
3053 // Allocate number of buffers that surely fit into this block.
3054 bufCreateInfo.size = bufSizeMax;
3055 for(size_t i = 0; i < maxBufCount; ++i)
3056 {
3057 BufferInfo newBufInfo;
3058 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3059 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3060 TEST(res == VK_SUCCESS);
3061 TEST(i == 0 || allocInfo.offset > prevOffset);
3062 bufInfo.push_back(newBufInfo);
3063 prevOffset = allocInfo.offset;
3064 }
3065
3066 // Free and allocate new buffers so many times that we make sure we wrap-around at least once.
3067 const size_t buffersPerIter = maxBufCount / 10 - 1;
3068 const size_t iterCount = poolCreateInfo.blockSize / bufCreateInfo.size / buffersPerIter * 2;
3069 for(size_t iter = 0; iter < iterCount; ++iter)
3070 {
3071 for(size_t bufPerIter = 0; bufPerIter < buffersPerIter; ++bufPerIter)
3072 {
3073 const BufferInfo& currBufInfo = bufInfo.front();
3074 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3075 bufInfo.erase(bufInfo.begin());
3076 }
3077 for(size_t bufPerIter = 0; bufPerIter < buffersPerIter; ++bufPerIter)
3078 {
3079 BufferInfo newBufInfo;
3080 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3081 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3082 TEST(res == VK_SUCCESS);
3083 bufInfo.push_back(newBufInfo);
3084 }
3085 }
3086
3087 // Allocate buffers until we reach out-of-memory.
3088 uint32_t debugIndex = 0;
3089 while(res == VK_SUCCESS)
3090 {
3091 BufferInfo newBufInfo;
3092 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3093 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3094 if(res == VK_SUCCESS)
3095 {
3096 bufInfo.push_back(newBufInfo);
3097 }
3098 else
3099 {
3100 TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY);
3101 }
3102 ++debugIndex;
3103 }
3104
3105 // Destroy the buffers in random order.
3106 while(!bufInfo.empty())
3107 {
3108 const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3109 const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3110 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3111 bufInfo.erase(bufInfo.begin() + indexToDestroy);
3112 }
3113 }
3114
3115 // Test double stack.
3116 {
3117 // Allocate number of buffers of varying size that surely fit into this block, alternate from bottom/top.
3118 VkDeviceSize prevOffsetLower = 0;
3119 VkDeviceSize prevOffsetUpper = poolCreateInfo.blockSize;
3120 for(size_t i = 0; i < maxBufCount; ++i)
3121 {
3122 const bool upperAddress = (i % 2) != 0;
3123 if(upperAddress)
3124 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3125 else
3126 allocCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3127 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3128 BufferInfo newBufInfo;
3129 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3130 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3131 TEST(res == VK_SUCCESS);
3132 if(upperAddress)
3133 {
3134 TEST(allocInfo.offset < prevOffsetUpper);
3135 prevOffsetUpper = allocInfo.offset;
3136 }
3137 else
3138 {
3139 TEST(allocInfo.offset >= prevOffsetLower);
3140 prevOffsetLower = allocInfo.offset;
3141 }
3142 TEST(prevOffsetLower < prevOffsetUpper);
3143 bufInfo.push_back(newBufInfo);
3144 }
3145
3146 // Destroy few buffers from top of the stack.
3147 for(size_t i = 0; i < maxBufCount / 5; ++i)
3148 {
3149 const BufferInfo& currBufInfo = bufInfo.back();
3150 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3151 bufInfo.pop_back();
3152 }
3153
3154 // Create some more
3155 for(size_t i = 0; i < maxBufCount / 5; ++i)
3156 {
3157 const bool upperAddress = (i % 2) != 0;
3158 if(upperAddress)
3159 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3160 else
3161 allocCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3162 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3163 BufferInfo newBufInfo;
3164 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3165 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3166 TEST(res == VK_SUCCESS);
3167 bufInfo.push_back(newBufInfo);
3168 }
3169
3170 // Destroy the buffers in reverse order.
3171 while(!bufInfo.empty())
3172 {
3173 const BufferInfo& currBufInfo = bufInfo.back();
3174 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3175 bufInfo.pop_back();
3176 }
3177
3178 // Create buffers on both sides until we reach out of memory.
3179 prevOffsetLower = 0;
3180 prevOffsetUpper = poolCreateInfo.blockSize;
3181 res = VK_SUCCESS;
3182 for(size_t i = 0; res == VK_SUCCESS; ++i)
3183 {
3184 const bool upperAddress = (i % 2) != 0;
3185 if(upperAddress)
3186 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3187 else
3188 allocCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3189 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3190 BufferInfo newBufInfo;
3191 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3192 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3193 if(res == VK_SUCCESS)
3194 {
3195 if(upperAddress)
3196 {
3197 TEST(allocInfo.offset < prevOffsetUpper);
3198 prevOffsetUpper = allocInfo.offset;
3199 }
3200 else
3201 {
3202 TEST(allocInfo.offset >= prevOffsetLower);
3203 prevOffsetLower = allocInfo.offset;
3204 }
3205 TEST(prevOffsetLower < prevOffsetUpper);
3206 bufInfo.push_back(newBufInfo);
3207 }
3208 }
3209
3210 // Destroy the buffers in random order.
3211 while(!bufInfo.empty())
3212 {
3213 const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3214 const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3215 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3216 bufInfo.erase(bufInfo.begin() + indexToDestroy);
3217 }
3218
3219 // Create buffers on upper side only, constant size, until we reach out of memory.
3220 prevOffsetUpper = poolCreateInfo.blockSize;
3221 res = VK_SUCCESS;
3222 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3223 bufCreateInfo.size = bufSizeMax;
3224 for(size_t i = 0; res == VK_SUCCESS; ++i)
3225 {
3226 BufferInfo newBufInfo;
3227 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3228 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3229 if(res == VK_SUCCESS)
3230 {
3231 TEST(allocInfo.offset < prevOffsetUpper);
3232 prevOffsetUpper = allocInfo.offset;
3233 bufInfo.push_back(newBufInfo);
3234 }
3235 }
3236
3237 // Destroy the buffers in reverse order.
3238 while(!bufInfo.empty())
3239 {
3240 const BufferInfo& currBufInfo = bufInfo.back();
3241 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3242 bufInfo.pop_back();
3243 }
3244 }
3245
3246 // Test ring buffer with lost allocations.
3247 {
3248 // Allocate number of buffers until pool is full.
3249 // Notice CAN_BECOME_LOST flag and call to vmaSetCurrentFrameIndex.
3250 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT;
3251 res = VK_SUCCESS;
3252 for(size_t i = 0; res == VK_SUCCESS; ++i)
3253 {
3254 vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3255
3256 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3257
3258 BufferInfo newBufInfo;
3259 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3260 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3261 if(res == VK_SUCCESS)
3262 bufInfo.push_back(newBufInfo);
3263 }
3264
3265 // Free first half of it.
3266 {
3267 const size_t buffersToDelete = bufInfo.size() / 2;
3268 for(size_t i = 0; i < buffersToDelete; ++i)
3269 {
3270 vmaDestroyBuffer(g_hAllocator, bufInfo[i].Buffer, bufInfo[i].Allocation);
3271 }
3272 bufInfo.erase(bufInfo.begin(), bufInfo.begin() + buffersToDelete);
3273 }
3274
3275 // Allocate number of buffers until pool is full again.
3276 // This way we make sure ring buffers wraps around, front in in the middle.
3277 res = VK_SUCCESS;
3278 for(size_t i = 0; res == VK_SUCCESS; ++i)
3279 {
3280 vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3281
3282 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3283
3284 BufferInfo newBufInfo;
3285 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3286 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3287 if(res == VK_SUCCESS)
3288 bufInfo.push_back(newBufInfo);
3289 }
3290
3291 VkDeviceSize firstNewOffset;
3292 {
3293 vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3294
3295 // Allocate a large buffer with CAN_MAKE_OTHER_LOST.
3296 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
3297 bufCreateInfo.size = bufSizeMax;
3298
3299 BufferInfo newBufInfo;
3300 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3301 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3302 TEST(res == VK_SUCCESS);
3303 bufInfo.push_back(newBufInfo);
3304 firstNewOffset = allocInfo.offset;
3305
3306 // Make sure at least one buffer from the beginning became lost.
3307 vmaGetAllocationInfo(g_hAllocator, bufInfo[0].Allocation, &allocInfo);
3308 TEST(allocInfo.deviceMemory == VK_NULL_HANDLE);
3309 }
3310
3311 #if 0 // TODO Fix and uncomment. Failing on Intel.
3312 // Allocate more buffers that CAN_MAKE_OTHER_LOST until we wrap-around with this.
3313 size_t newCount = 1;
3314 for(;;)
3315 {
3316 vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3317
3318 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3319
3320 BufferInfo newBufInfo;
3321 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3322 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3323
3324 TEST(res == VK_SUCCESS);
3325 bufInfo.push_back(newBufInfo);
3326 ++newCount;
3327 if(allocInfo.offset < firstNewOffset)
3328 break;
3329 }
3330 #endif
3331
3332 // Delete buffers that are lost.
3333 for(size_t i = bufInfo.size(); i--; )
3334 {
3335 vmaGetAllocationInfo(g_hAllocator, bufInfo[i].Allocation, &allocInfo);
3336 if(allocInfo.deviceMemory == VK_NULL_HANDLE)
3337 {
3338 vmaDestroyBuffer(g_hAllocator, bufInfo[i].Buffer, bufInfo[i].Allocation);
3339 bufInfo.erase(bufInfo.begin() + i);
3340 }
3341 }
3342
3343 // Test vmaMakePoolAllocationsLost
3344 {
3345 vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3346
3347 size_t lostAllocCount = 0;
3348 vmaMakePoolAllocationsLost(g_hAllocator, pool, &lostAllocCount);
3349 TEST(lostAllocCount > 0);
3350
3351 size_t realLostAllocCount = 0;
3352 for(size_t i = 0; i < bufInfo.size(); ++i)
3353 {
3354 vmaGetAllocationInfo(g_hAllocator, bufInfo[i].Allocation, &allocInfo);
3355 if(allocInfo.deviceMemory == VK_NULL_HANDLE)
3356 ++realLostAllocCount;
3357 }
3358 TEST(realLostAllocCount == lostAllocCount);
3359 }
3360
3361 // Destroy all the buffers in forward order.
3362 for(size_t i = 0; i < bufInfo.size(); ++i)
3363 vmaDestroyBuffer(g_hAllocator, bufInfo[i].Buffer, bufInfo[i].Allocation);
3364 bufInfo.clear();
3365 }
3366
3367 vmaDestroyPool(g_hAllocator, pool);
3368 }
3369
TestLinearAllocatorMultiBlock()3370 static void TestLinearAllocatorMultiBlock()
3371 {
3372 wprintf(L"Test linear allocator multi block\n");
3373
3374 RandomNumberGenerator rand{345673};
3375
3376 VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3377 sampleBufCreateInfo.size = 1024 * 1024;
3378 sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
3379
3380 VmaAllocationCreateInfo sampleAllocCreateInfo = {};
3381 sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
3382
3383 VmaPoolCreateInfo poolCreateInfo = {};
3384 poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
3385 VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3386 TEST(res == VK_SUCCESS);
3387
3388 VmaPool pool = nullptr;
3389 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3390 TEST(res == VK_SUCCESS);
3391
3392 VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
3393
3394 VmaAllocationCreateInfo allocCreateInfo = {};
3395 allocCreateInfo.pool = pool;
3396
3397 std::vector<BufferInfo> bufInfo;
3398 VmaAllocationInfo allocInfo;
3399
3400 // Test one-time free.
3401 {
3402 // Allocate buffers until we move to a second block.
3403 VkDeviceMemory lastMem = VK_NULL_HANDLE;
3404 for(uint32_t i = 0; ; ++i)
3405 {
3406 BufferInfo newBufInfo;
3407 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3408 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3409 TEST(res == VK_SUCCESS);
3410 bufInfo.push_back(newBufInfo);
3411 if(lastMem && allocInfo.deviceMemory != lastMem)
3412 {
3413 break;
3414 }
3415 lastMem = allocInfo.deviceMemory;
3416 }
3417
3418 TEST(bufInfo.size() > 2);
3419
3420 // Make sure that pool has now two blocks.
3421 VmaPoolStats poolStats = {};
3422 vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3423 TEST(poolStats.blockCount == 2);
3424
3425 // Destroy all the buffers in random order.
3426 while(!bufInfo.empty())
3427 {
3428 const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3429 const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3430 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3431 bufInfo.erase(bufInfo.begin() + indexToDestroy);
3432 }
3433
3434 // Make sure that pool has now at most one block.
3435 vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3436 TEST(poolStats.blockCount <= 1);
3437 }
3438
3439 // Test stack.
3440 {
3441 // Allocate buffers until we move to a second block.
3442 VkDeviceMemory lastMem = VK_NULL_HANDLE;
3443 for(uint32_t i = 0; ; ++i)
3444 {
3445 BufferInfo newBufInfo;
3446 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3447 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3448 TEST(res == VK_SUCCESS);
3449 bufInfo.push_back(newBufInfo);
3450 if(lastMem && allocInfo.deviceMemory != lastMem)
3451 {
3452 break;
3453 }
3454 lastMem = allocInfo.deviceMemory;
3455 }
3456
3457 TEST(bufInfo.size() > 2);
3458
3459 // Add few more buffers.
3460 for(uint32_t i = 0; i < 5; ++i)
3461 {
3462 BufferInfo newBufInfo;
3463 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3464 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3465 TEST(res == VK_SUCCESS);
3466 bufInfo.push_back(newBufInfo);
3467 }
3468
3469 // Make sure that pool has now two blocks.
3470 VmaPoolStats poolStats = {};
3471 vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3472 TEST(poolStats.blockCount == 2);
3473
3474 // Delete half of buffers, LIFO.
3475 for(size_t i = 0, countToDelete = bufInfo.size() / 2; i < countToDelete; ++i)
3476 {
3477 const BufferInfo& currBufInfo = bufInfo.back();
3478 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3479 bufInfo.pop_back();
3480 }
3481
3482 // Add one more buffer.
3483 BufferInfo newBufInfo;
3484 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3485 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3486 TEST(res == VK_SUCCESS);
3487 bufInfo.push_back(newBufInfo);
3488
3489 // Make sure that pool has now one block.
3490 vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3491 TEST(poolStats.blockCount == 1);
3492
3493 // Delete all the remaining buffers, LIFO.
3494 while(!bufInfo.empty())
3495 {
3496 const BufferInfo& currBufInfo = bufInfo.back();
3497 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3498 bufInfo.pop_back();
3499 }
3500 }
3501
3502 vmaDestroyPool(g_hAllocator, pool);
3503 }
3504
ManuallyTestLinearAllocator()3505 static void ManuallyTestLinearAllocator()
3506 {
3507 VmaStats origStats;
3508 vmaCalculateStats(g_hAllocator, &origStats);
3509
3510 wprintf(L"Manually test linear allocator\n");
3511
3512 RandomNumberGenerator rand{645332};
3513
3514 VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3515 sampleBufCreateInfo.size = 1024; // Whatever.
3516 sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
3517
3518 VmaAllocationCreateInfo sampleAllocCreateInfo = {};
3519 sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
3520
3521 VmaPoolCreateInfo poolCreateInfo = {};
3522 VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3523 TEST(res == VK_SUCCESS);
3524
3525 poolCreateInfo.blockSize = 10 * 1024;
3526 poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
3527 poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
3528
3529 VmaPool pool = nullptr;
3530 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3531 TEST(res == VK_SUCCESS);
3532
3533 VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
3534
3535 VmaAllocationCreateInfo allocCreateInfo = {};
3536 allocCreateInfo.pool = pool;
3537
3538 std::vector<BufferInfo> bufInfo;
3539 VmaAllocationInfo allocInfo;
3540 BufferInfo newBufInfo;
3541
3542 // Test double stack.
3543 {
3544 /*
3545 Lower: Buffer 32 B, Buffer 1024 B, Buffer 32 B
3546 Upper: Buffer 16 B, Buffer 1024 B, Buffer 128 B
3547
3548 Totally:
3549 1 block allocated
3550 10240 Vulkan bytes
3551 6 new allocations
3552 2256 bytes in allocations
3553 */
3554
3555 bufCreateInfo.size = 32;
3556 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3557 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3558 TEST(res == VK_SUCCESS);
3559 bufInfo.push_back(newBufInfo);
3560
3561 bufCreateInfo.size = 1024;
3562 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3563 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3564 TEST(res == VK_SUCCESS);
3565 bufInfo.push_back(newBufInfo);
3566
3567 bufCreateInfo.size = 32;
3568 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3569 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3570 TEST(res == VK_SUCCESS);
3571 bufInfo.push_back(newBufInfo);
3572
3573 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3574
3575 bufCreateInfo.size = 128;
3576 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3577 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3578 TEST(res == VK_SUCCESS);
3579 bufInfo.push_back(newBufInfo);
3580
3581 bufCreateInfo.size = 1024;
3582 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3583 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3584 TEST(res == VK_SUCCESS);
3585 bufInfo.push_back(newBufInfo);
3586
3587 bufCreateInfo.size = 16;
3588 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3589 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3590 TEST(res == VK_SUCCESS);
3591 bufInfo.push_back(newBufInfo);
3592
3593 VmaStats currStats;
3594 vmaCalculateStats(g_hAllocator, &currStats);
3595 VmaPoolStats poolStats;
3596 vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3597
3598 char* statsStr = nullptr;
3599 vmaBuildStatsString(g_hAllocator, &statsStr, VK_TRUE);
3600
3601 // PUT BREAKPOINT HERE TO CHECK.
3602 // Inspect: currStats versus origStats, poolStats, statsStr.
3603 int I = 0;
3604
3605 vmaFreeStatsString(g_hAllocator, statsStr);
3606
3607 // Destroy the buffers in reverse order.
3608 while(!bufInfo.empty())
3609 {
3610 const BufferInfo& currBufInfo = bufInfo.back();
3611 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3612 bufInfo.pop_back();
3613 }
3614 }
3615
3616 vmaDestroyPool(g_hAllocator, pool);
3617 }
3618
BenchmarkAlgorithmsCase(FILE * file,uint32_t algorithm,bool empty,VmaAllocationCreateFlags allocStrategy,FREE_ORDER freeOrder)3619 static void BenchmarkAlgorithmsCase(FILE* file,
3620 uint32_t algorithm,
3621 bool empty,
3622 VmaAllocationCreateFlags allocStrategy,
3623 FREE_ORDER freeOrder)
3624 {
3625 RandomNumberGenerator rand{16223};
3626
3627 const VkDeviceSize bufSizeMin = 32;
3628 const VkDeviceSize bufSizeMax = 1024;
3629 const size_t maxBufCapacity = 10000;
3630 const uint32_t iterationCount = 10;
3631
3632 VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3633 sampleBufCreateInfo.size = bufSizeMax;
3634 sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
3635
3636 VmaAllocationCreateInfo sampleAllocCreateInfo = {};
3637 sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
3638
3639 VmaPoolCreateInfo poolCreateInfo = {};
3640 VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3641 TEST(res == VK_SUCCESS);
3642
3643 poolCreateInfo.blockSize = bufSizeMax * maxBufCapacity;
3644 poolCreateInfo.flags |= algorithm;
3645 poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
3646
3647 VmaPool pool = nullptr;
3648 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3649 TEST(res == VK_SUCCESS);
3650
3651 // Buffer created just to get memory requirements. Never bound to any memory.
3652 VkBuffer dummyBuffer = VK_NULL_HANDLE;
3653 res = vkCreateBuffer(g_hDevice, &sampleBufCreateInfo, g_Allocs, &dummyBuffer);
3654 TEST(res == VK_SUCCESS && dummyBuffer);
3655
3656 VkMemoryRequirements memReq = {};
3657 vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
3658
3659 vkDestroyBuffer(g_hDevice, dummyBuffer, g_Allocs);
3660
3661 VmaAllocationCreateInfo allocCreateInfo = {};
3662 allocCreateInfo.pool = pool;
3663 allocCreateInfo.flags = allocStrategy;
3664
3665 VmaAllocation alloc;
3666 std::vector<VmaAllocation> baseAllocations;
3667
3668 if(!empty)
3669 {
3670 // Make allocations up to 1/3 of pool size.
3671 VkDeviceSize totalSize = 0;
3672 while(totalSize < poolCreateInfo.blockSize / 3)
3673 {
3674 // This test intentionally allows sizes that are aligned to 4 or 16 bytes.
3675 // This is theoretically allowed and already uncovered one bug.
3676 memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);
3677 res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
3678 TEST(res == VK_SUCCESS);
3679 baseAllocations.push_back(alloc);
3680 totalSize += memReq.size;
3681 }
3682
3683 // Delete half of them, choose randomly.
3684 size_t allocsToDelete = baseAllocations.size() / 2;
3685 for(size_t i = 0; i < allocsToDelete; ++i)
3686 {
3687 const size_t index = (size_t)rand.Generate() % baseAllocations.size();
3688 vmaFreeMemory(g_hAllocator, baseAllocations[index]);
3689 baseAllocations.erase(baseAllocations.begin() + index);
3690 }
3691 }
3692
3693 // BENCHMARK
3694 const size_t allocCount = maxBufCapacity / 3;
3695 std::vector<VmaAllocation> testAllocations;
3696 testAllocations.reserve(allocCount);
3697 duration allocTotalDuration = duration::zero();
3698 duration freeTotalDuration = duration::zero();
3699 for(uint32_t iterationIndex = 0; iterationIndex < iterationCount; ++iterationIndex)
3700 {
3701 // Allocations
3702 time_point allocTimeBeg = std::chrono::high_resolution_clock::now();
3703 for(size_t i = 0; i < allocCount; ++i)
3704 {
3705 memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);
3706 res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
3707 TEST(res == VK_SUCCESS);
3708 testAllocations.push_back(alloc);
3709 }
3710 allocTotalDuration += std::chrono::high_resolution_clock::now() - allocTimeBeg;
3711
3712 // Deallocations
3713 switch(freeOrder)
3714 {
3715 case FREE_ORDER::FORWARD:
3716 // Leave testAllocations unchanged.
3717 break;
3718 case FREE_ORDER::BACKWARD:
3719 std::reverse(testAllocations.begin(), testAllocations.end());
3720 break;
3721 case FREE_ORDER::RANDOM:
3722 std::shuffle(testAllocations.begin(), testAllocations.end(), MyUniformRandomNumberGenerator(rand));
3723 break;
3724 default: assert(0);
3725 }
3726
3727 time_point freeTimeBeg = std::chrono::high_resolution_clock::now();
3728 for(size_t i = 0; i < allocCount; ++i)
3729 vmaFreeMemory(g_hAllocator, testAllocations[i]);
3730 freeTotalDuration += std::chrono::high_resolution_clock::now() - freeTimeBeg;
3731
3732 testAllocations.clear();
3733 }
3734
3735 // Delete baseAllocations
3736 while(!baseAllocations.empty())
3737 {
3738 vmaFreeMemory(g_hAllocator, baseAllocations.back());
3739 baseAllocations.pop_back();
3740 }
3741
3742 vmaDestroyPool(g_hAllocator, pool);
3743
3744 const float allocTotalSeconds = ToFloatSeconds(allocTotalDuration);
3745 const float freeTotalSeconds = ToFloatSeconds(freeTotalDuration);
3746
3747 printf(" Algorithm=%s %s Allocation=%s FreeOrder=%s: allocations %g s, free %g s\n",
3748 AlgorithmToStr(algorithm),
3749 empty ? "Empty" : "Not empty",
3750 GetAllocationStrategyName(allocStrategy),
3751 FREE_ORDER_NAMES[(size_t)freeOrder],
3752 allocTotalSeconds,
3753 freeTotalSeconds);
3754
3755 if(file)
3756 {
3757 std::string currTime;
3758 CurrentTimeToStr(currTime);
3759
3760 fprintf(file, "%s,%s,%s,%u,%s,%s,%g,%g\n",
3761 CODE_DESCRIPTION, currTime.c_str(),
3762 AlgorithmToStr(algorithm),
3763 empty ? 1 : 0,
3764 GetAllocationStrategyName(allocStrategy),
3765 FREE_ORDER_NAMES[(uint32_t)freeOrder],
3766 allocTotalSeconds,
3767 freeTotalSeconds);
3768 }
3769 }
3770
TestBufferDeviceAddress()3771 static void TestBufferDeviceAddress()
3772 {
3773 wprintf(L"Test buffer device address\n");
3774
3775 assert(g_BufferDeviceAddressEnabled);
3776
3777 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3778 bufCreateInfo.size = 0x10000;
3779 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
3780 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; // !!!
3781
3782 VmaAllocationCreateInfo allocCreateInfo = {};
3783 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
3784
3785 for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
3786 {
3787 // 1st is placed, 2nd is dedicated.
3788 if(testIndex == 1)
3789 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
3790
3791 BufferInfo bufInfo = {};
3792 VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3793 &bufInfo.Buffer, &bufInfo.Allocation, nullptr);
3794 TEST(res == VK_SUCCESS);
3795
3796 VkBufferDeviceAddressInfoEXT bufferDeviceAddressInfo = { VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT };
3797 bufferDeviceAddressInfo.buffer = bufInfo.Buffer;
3798 //assert(g_vkGetBufferDeviceAddressEXT != nullptr);
3799 if(g_vkGetBufferDeviceAddressEXT != nullptr)
3800 {
3801 VkDeviceAddress addr = g_vkGetBufferDeviceAddressEXT(g_hDevice, &bufferDeviceAddressInfo);
3802 TEST(addr != 0);
3803 }
3804
3805 vmaDestroyBuffer(g_hAllocator, bufInfo.Buffer, bufInfo.Allocation);
3806 }
3807 }
3808
BenchmarkAlgorithms(FILE * file)3809 static void BenchmarkAlgorithms(FILE* file)
3810 {
3811 wprintf(L"Benchmark algorithms\n");
3812
3813 if(file)
3814 {
3815 fprintf(file,
3816 "Code,Time,"
3817 "Algorithm,Empty,Allocation strategy,Free order,"
3818 "Allocation time (s),Deallocation time (s)\n");
3819 }
3820
3821 uint32_t freeOrderCount = 1;
3822 if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_LARGE)
3823 freeOrderCount = 3;
3824 else if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL)
3825 freeOrderCount = 2;
3826
3827 const uint32_t emptyCount = ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL ? 2 : 1;
3828 const uint32_t allocStrategyCount = GetAllocationStrategyCount();
3829
3830 for(uint32_t freeOrderIndex = 0; freeOrderIndex < freeOrderCount; ++freeOrderIndex)
3831 {
3832 FREE_ORDER freeOrder = FREE_ORDER::COUNT;
3833 switch(freeOrderIndex)
3834 {
3835 case 0: freeOrder = FREE_ORDER::BACKWARD; break;
3836 case 1: freeOrder = FREE_ORDER::FORWARD; break;
3837 case 2: freeOrder = FREE_ORDER::RANDOM; break;
3838 default: assert(0);
3839 }
3840
3841 for(uint32_t emptyIndex = 0; emptyIndex < emptyCount; ++emptyIndex)
3842 {
3843 for(uint32_t algorithmIndex = 0; algorithmIndex < 3; ++algorithmIndex)
3844 {
3845 uint32_t algorithm = 0;
3846 switch(algorithmIndex)
3847 {
3848 case 0:
3849 break;
3850 case 1:
3851 algorithm = VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT;
3852 break;
3853 case 2:
3854 algorithm = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
3855 break;
3856 default:
3857 assert(0);
3858 }
3859
3860 uint32_t currAllocStrategyCount = algorithm != 0 ? 1 : allocStrategyCount;
3861 for(uint32_t allocStrategyIndex = 0; allocStrategyIndex < currAllocStrategyCount; ++allocStrategyIndex)
3862 {
3863 VmaAllocatorCreateFlags strategy = 0;
3864 if(currAllocStrategyCount > 1)
3865 {
3866 switch(allocStrategyIndex)
3867 {
3868 case 0: strategy = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT; break;
3869 case 1: strategy = VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT; break;
3870 case 2: strategy = VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT; break;
3871 default: assert(0);
3872 }
3873 }
3874
3875 BenchmarkAlgorithmsCase(
3876 file,
3877 algorithm,
3878 (emptyIndex == 0), // empty
3879 strategy,
3880 freeOrder); // freeOrder
3881 }
3882 }
3883 }
3884 }
3885 }
3886
TestPool_SameSize()3887 static void TestPool_SameSize()
3888 {
3889 const VkDeviceSize BUF_SIZE = 1024 * 1024;
3890 const size_t BUF_COUNT = 100;
3891 VkResult res;
3892
3893 RandomNumberGenerator rand{123};
3894
3895 VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3896 bufferInfo.size = BUF_SIZE;
3897 bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
3898
3899 uint32_t memoryTypeBits = UINT32_MAX;
3900 {
3901 VkBuffer dummyBuffer;
3902 res = vkCreateBuffer(g_hDevice, &bufferInfo, g_Allocs, &dummyBuffer);
3903 TEST(res == VK_SUCCESS);
3904
3905 VkMemoryRequirements memReq;
3906 vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
3907 memoryTypeBits = memReq.memoryTypeBits;
3908
3909 vkDestroyBuffer(g_hDevice, dummyBuffer, g_Allocs);
3910 }
3911
3912 VmaAllocationCreateInfo poolAllocInfo = {};
3913 poolAllocInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
3914 uint32_t memTypeIndex;
3915 res = vmaFindMemoryTypeIndex(
3916 g_hAllocator,
3917 memoryTypeBits,
3918 &poolAllocInfo,
3919 &memTypeIndex);
3920
3921 VmaPoolCreateInfo poolCreateInfo = {};
3922 poolCreateInfo.memoryTypeIndex = memTypeIndex;
3923 poolCreateInfo.blockSize = BUF_SIZE * BUF_COUNT / 4;
3924 poolCreateInfo.minBlockCount = 1;
3925 poolCreateInfo.maxBlockCount = 4;
3926 poolCreateInfo.frameInUseCount = 0;
3927
3928 VmaPool pool;
3929 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3930 TEST(res == VK_SUCCESS);
3931
3932 // Test pool name
3933 {
3934 static const char* const POOL_NAME = "Pool name";
3935 vmaSetPoolName(g_hAllocator, pool, POOL_NAME);
3936
3937 const char* fetchedPoolName = nullptr;
3938 vmaGetPoolName(g_hAllocator, pool, &fetchedPoolName);
3939 TEST(strcmp(fetchedPoolName, POOL_NAME) == 0);
3940
3941 vmaSetPoolName(g_hAllocator, pool, nullptr);
3942 }
3943
3944 vmaSetCurrentFrameIndex(g_hAllocator, 1);
3945
3946 VmaAllocationCreateInfo allocInfo = {};
3947 allocInfo.pool = pool;
3948 allocInfo.flags = VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT |
3949 VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
3950
3951 struct BufItem
3952 {
3953 VkBuffer Buf;
3954 VmaAllocation Alloc;
3955 };
3956 std::vector<BufItem> items;
3957
3958 // Fill entire pool.
3959 for(size_t i = 0; i < BUF_COUNT; ++i)
3960 {
3961 BufItem item;
3962 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
3963 TEST(res == VK_SUCCESS);
3964 items.push_back(item);
3965 }
3966
3967 // Make sure that another allocation would fail.
3968 {
3969 BufItem item;
3970 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
3971 TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY);
3972 }
3973
3974 // Validate that no buffer is lost. Also check that they are not mapped.
3975 for(size_t i = 0; i < items.size(); ++i)
3976 {
3977 VmaAllocationInfo allocInfo;
3978 vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
3979 TEST(allocInfo.deviceMemory != VK_NULL_HANDLE);
3980 TEST(allocInfo.pMappedData == nullptr);
3981 }
3982
3983 // Free some percent of random items.
3984 {
3985 const size_t PERCENT_TO_FREE = 10;
3986 size_t itemsToFree = items.size() * PERCENT_TO_FREE / 100;
3987 for(size_t i = 0; i < itemsToFree; ++i)
3988 {
3989 size_t index = (size_t)rand.Generate() % items.size();
3990 vmaDestroyBuffer(g_hAllocator, items[index].Buf, items[index].Alloc);
3991 items.erase(items.begin() + index);
3992 }
3993 }
3994
3995 // Randomly allocate and free items.
3996 {
3997 const size_t OPERATION_COUNT = BUF_COUNT;
3998 for(size_t i = 0; i < OPERATION_COUNT; ++i)
3999 {
4000 bool allocate = rand.Generate() % 2 != 0;
4001 if(allocate)
4002 {
4003 if(items.size() < BUF_COUNT)
4004 {
4005 BufItem item;
4006 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4007 TEST(res == VK_SUCCESS);
4008 items.push_back(item);
4009 }
4010 }
4011 else // Free
4012 {
4013 if(!items.empty())
4014 {
4015 size_t index = (size_t)rand.Generate() % items.size();
4016 vmaDestroyBuffer(g_hAllocator, items[index].Buf, items[index].Alloc);
4017 items.erase(items.begin() + index);
4018 }
4019 }
4020 }
4021 }
4022
4023 // Allocate up to maximum.
4024 while(items.size() < BUF_COUNT)
4025 {
4026 BufItem item;
4027 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4028 TEST(res == VK_SUCCESS);
4029 items.push_back(item);
4030 }
4031
4032 // Validate that no buffer is lost.
4033 for(size_t i = 0; i < items.size(); ++i)
4034 {
4035 VmaAllocationInfo allocInfo;
4036 vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4037 TEST(allocInfo.deviceMemory != VK_NULL_HANDLE);
4038 }
4039
4040 // Next frame.
4041 vmaSetCurrentFrameIndex(g_hAllocator, 2);
4042
4043 // Allocate another BUF_COUNT buffers.
4044 for(size_t i = 0; i < BUF_COUNT; ++i)
4045 {
4046 BufItem item;
4047 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4048 TEST(res == VK_SUCCESS);
4049 items.push_back(item);
4050 }
4051
4052 // Make sure the first BUF_COUNT is lost. Delete them.
4053 for(size_t i = 0; i < BUF_COUNT; ++i)
4054 {
4055 VmaAllocationInfo allocInfo;
4056 vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4057 TEST(allocInfo.deviceMemory == VK_NULL_HANDLE);
4058 vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4059 }
4060 items.erase(items.begin(), items.begin() + BUF_COUNT);
4061
4062 // Validate that no buffer is lost.
4063 for(size_t i = 0; i < items.size(); ++i)
4064 {
4065 VmaAllocationInfo allocInfo;
4066 vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4067 TEST(allocInfo.deviceMemory != VK_NULL_HANDLE);
4068 }
4069
4070 // Free one item.
4071 vmaDestroyBuffer(g_hAllocator, items.back().Buf, items.back().Alloc);
4072 items.pop_back();
4073
4074 // Validate statistics.
4075 {
4076 VmaPoolStats poolStats = {};
4077 vmaGetPoolStats(g_hAllocator, pool, &poolStats);
4078 TEST(poolStats.allocationCount == items.size());
4079 TEST(poolStats.size = BUF_COUNT * BUF_SIZE);
4080 TEST(poolStats.unusedRangeCount == 1);
4081 TEST(poolStats.unusedRangeSizeMax == BUF_SIZE);
4082 TEST(poolStats.unusedSize == BUF_SIZE);
4083 }
4084
4085 // Free all remaining items.
4086 for(size_t i = items.size(); i--; )
4087 vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4088 items.clear();
4089
4090 // Allocate maximum items again.
4091 for(size_t i = 0; i < BUF_COUNT; ++i)
4092 {
4093 BufItem item;
4094 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4095 TEST(res == VK_SUCCESS);
4096 items.push_back(item);
4097 }
4098
4099 // Delete every other item.
4100 for(size_t i = 0; i < BUF_COUNT / 2; ++i)
4101 {
4102 vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4103 items.erase(items.begin() + i);
4104 }
4105
4106 // Defragment!
4107 {
4108 std::vector<VmaAllocation> allocationsToDefragment(items.size());
4109 for(size_t i = 0; i < items.size(); ++i)
4110 allocationsToDefragment[i] = items[i].Alloc;
4111
4112 VmaDefragmentationStats defragmentationStats;
4113 res = vmaDefragment(g_hAllocator, allocationsToDefragment.data(), items.size(), nullptr, nullptr, &defragmentationStats);
4114 TEST(res == VK_SUCCESS);
4115 TEST(defragmentationStats.deviceMemoryBlocksFreed == 2);
4116 }
4117
4118 // Free all remaining items.
4119 for(size_t i = items.size(); i--; )
4120 vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4121 items.clear();
4122
4123 ////////////////////////////////////////////////////////////////////////////////
4124 // Test for vmaMakePoolAllocationsLost
4125
4126 // Allocate 4 buffers on frame 10.
4127 vmaSetCurrentFrameIndex(g_hAllocator, 10);
4128 for(size_t i = 0; i < 4; ++i)
4129 {
4130 BufItem item;
4131 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4132 TEST(res == VK_SUCCESS);
4133 items.push_back(item);
4134 }
4135
4136 // Touch first 2 of them on frame 11.
4137 vmaSetCurrentFrameIndex(g_hAllocator, 11);
4138 for(size_t i = 0; i < 2; ++i)
4139 {
4140 VmaAllocationInfo allocInfo;
4141 vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4142 }
4143
4144 // vmaMakePoolAllocationsLost. Only remaining 2 should be lost.
4145 size_t lostCount = 0xDEADC0DE;
4146 vmaMakePoolAllocationsLost(g_hAllocator, pool, &lostCount);
4147 TEST(lostCount == 2);
4148
4149 // Make another call. Now 0 should be lost.
4150 vmaMakePoolAllocationsLost(g_hAllocator, pool, &lostCount);
4151 TEST(lostCount == 0);
4152
4153 // Make another call, with null count. Should not crash.
4154 vmaMakePoolAllocationsLost(g_hAllocator, pool, nullptr);
4155
4156 // END: Free all remaining items.
4157 for(size_t i = items.size(); i--; )
4158 vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4159
4160 items.clear();
4161
4162 ////////////////////////////////////////////////////////////////////////////////
4163 // Test for allocation too large for pool
4164
4165 {
4166 VmaAllocationCreateInfo allocCreateInfo = {};
4167 allocCreateInfo.pool = pool;
4168
4169 VkMemoryRequirements memReq;
4170 memReq.memoryTypeBits = UINT32_MAX;
4171 memReq.alignment = 1;
4172 memReq.size = poolCreateInfo.blockSize + 4;
4173
4174 VmaAllocation alloc = nullptr;
4175 res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
4176 TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY && alloc == nullptr);
4177 }
4178
4179 vmaDestroyPool(g_hAllocator, pool);
4180 }
4181
ValidatePattern(const void * pMemory,size_t size,uint8_t pattern)4182 static bool ValidatePattern(const void* pMemory, size_t size, uint8_t pattern)
4183 {
4184 const uint8_t* pBytes = (const uint8_t*)pMemory;
4185 for(size_t i = 0; i < size; ++i)
4186 {
4187 if(pBytes[i] != pattern)
4188 {
4189 return false;
4190 }
4191 }
4192 return true;
4193 }
4194
TestAllocationsInitialization()4195 static void TestAllocationsInitialization()
4196 {
4197 VkResult res;
4198
4199 const size_t BUF_SIZE = 1024;
4200
4201 // Create pool.
4202
4203 VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4204 bufInfo.size = BUF_SIZE;
4205 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
4206
4207 VmaAllocationCreateInfo dummyBufAllocCreateInfo = {};
4208 dummyBufAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
4209
4210 VmaPoolCreateInfo poolCreateInfo = {};
4211 poolCreateInfo.blockSize = BUF_SIZE * 10;
4212 poolCreateInfo.minBlockCount = 1; // To keep memory alive while pool exists.
4213 poolCreateInfo.maxBlockCount = 1;
4214 res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufInfo, &dummyBufAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
4215 TEST(res == VK_SUCCESS);
4216
4217 VmaAllocationCreateInfo bufAllocCreateInfo = {};
4218 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &bufAllocCreateInfo.pool);
4219 TEST(res == VK_SUCCESS);
4220
4221 // Create one persistently mapped buffer to keep memory of this block mapped,
4222 // so that pointer to mapped data will remain (more or less...) valid even
4223 // after destruction of other allocations.
4224
4225 bufAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
4226 VkBuffer firstBuf;
4227 VmaAllocation firstAlloc;
4228 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &bufAllocCreateInfo, &firstBuf, &firstAlloc, nullptr);
4229 TEST(res == VK_SUCCESS);
4230
4231 // Test buffers.
4232
4233 for(uint32_t i = 0; i < 2; ++i)
4234 {
4235 const bool persistentlyMapped = i == 0;
4236 bufAllocCreateInfo.flags = persistentlyMapped ? VMA_ALLOCATION_CREATE_MAPPED_BIT : 0;
4237 VkBuffer buf;
4238 VmaAllocation alloc;
4239 VmaAllocationInfo allocInfo;
4240 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &bufAllocCreateInfo, &buf, &alloc, &allocInfo);
4241 TEST(res == VK_SUCCESS);
4242
4243 void* pMappedData;
4244 if(!persistentlyMapped)
4245 {
4246 res = vmaMapMemory(g_hAllocator, alloc, &pMappedData);
4247 TEST(res == VK_SUCCESS);
4248 }
4249 else
4250 {
4251 pMappedData = allocInfo.pMappedData;
4252 }
4253
4254 // Validate initialized content
4255 bool valid = ValidatePattern(pMappedData, BUF_SIZE, 0xDC);
4256 TEST(valid);
4257
4258 if(!persistentlyMapped)
4259 {
4260 vmaUnmapMemory(g_hAllocator, alloc);
4261 }
4262
4263 vmaDestroyBuffer(g_hAllocator, buf, alloc);
4264
4265 // Validate freed content
4266 valid = ValidatePattern(pMappedData, BUF_SIZE, 0xEF);
4267 TEST(valid);
4268 }
4269
4270 vmaDestroyBuffer(g_hAllocator, firstBuf, firstAlloc);
4271 vmaDestroyPool(g_hAllocator, bufAllocCreateInfo.pool);
4272 }
4273
TestPool_Benchmark(PoolTestResult & outResult,const PoolTestConfig & config)4274 static void TestPool_Benchmark(
4275 PoolTestResult& outResult,
4276 const PoolTestConfig& config)
4277 {
4278 TEST(config.ThreadCount > 0);
4279
4280 RandomNumberGenerator mainRand{config.RandSeed};
4281
4282 uint32_t allocationSizeProbabilitySum = std::accumulate(
4283 config.AllocationSizes.begin(),
4284 config.AllocationSizes.end(),
4285 0u,
4286 [](uint32_t sum, const AllocationSize& allocSize) {
4287 return sum + allocSize.Probability;
4288 });
4289
4290 VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4291 bufferInfo.size = 256; // Whatever.
4292 bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4293
4294 VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
4295 imageInfo.imageType = VK_IMAGE_TYPE_2D;
4296 imageInfo.extent.width = 256; // Whatever.
4297 imageInfo.extent.height = 256; // Whatever.
4298 imageInfo.extent.depth = 1;
4299 imageInfo.mipLevels = 1;
4300 imageInfo.arrayLayers = 1;
4301 imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
4302 imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; // LINEAR if CPU memory.
4303 imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
4304 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; // TRANSFER_SRC if CPU memory.
4305 imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
4306
4307 uint32_t bufferMemoryTypeBits = UINT32_MAX;
4308 {
4309 VkBuffer dummyBuffer;
4310 VkResult res = vkCreateBuffer(g_hDevice, &bufferInfo, g_Allocs, &dummyBuffer);
4311 TEST(res == VK_SUCCESS);
4312
4313 VkMemoryRequirements memReq;
4314 vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
4315 bufferMemoryTypeBits = memReq.memoryTypeBits;
4316
4317 vkDestroyBuffer(g_hDevice, dummyBuffer, g_Allocs);
4318 }
4319
4320 uint32_t imageMemoryTypeBits = UINT32_MAX;
4321 {
4322 VkImage dummyImage;
4323 VkResult res = vkCreateImage(g_hDevice, &imageInfo, g_Allocs, &dummyImage);
4324 TEST(res == VK_SUCCESS);
4325
4326 VkMemoryRequirements memReq;
4327 vkGetImageMemoryRequirements(g_hDevice, dummyImage, &memReq);
4328 imageMemoryTypeBits = memReq.memoryTypeBits;
4329
4330 vkDestroyImage(g_hDevice, dummyImage, g_Allocs);
4331 }
4332
4333 uint32_t memoryTypeBits = 0;
4334 if(config.UsesBuffers() && config.UsesImages())
4335 {
4336 memoryTypeBits = bufferMemoryTypeBits & imageMemoryTypeBits;
4337 if(memoryTypeBits == 0)
4338 {
4339 PrintWarning(L"Cannot test buffers + images in the same memory pool on this GPU.");
4340 return;
4341 }
4342 }
4343 else if(config.UsesBuffers())
4344 memoryTypeBits = bufferMemoryTypeBits;
4345 else if(config.UsesImages())
4346 memoryTypeBits = imageMemoryTypeBits;
4347 else
4348 TEST(0);
4349
4350 VmaPoolCreateInfo poolCreateInfo = {};
4351 poolCreateInfo.memoryTypeIndex = 0;
4352 poolCreateInfo.minBlockCount = 1;
4353 poolCreateInfo.maxBlockCount = 1;
4354 poolCreateInfo.blockSize = config.PoolSize;
4355 poolCreateInfo.frameInUseCount = 1;
4356
4357 VmaAllocationCreateInfo dummyAllocCreateInfo = {};
4358 dummyAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
4359 vmaFindMemoryTypeIndex(g_hAllocator, memoryTypeBits, &dummyAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
4360
4361 VmaPool pool;
4362 VkResult res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
4363 TEST(res == VK_SUCCESS);
4364
4365 // Start time measurement - after creating pool and initializing data structures.
4366 time_point timeBeg = std::chrono::high_resolution_clock::now();
4367
4368 ////////////////////////////////////////////////////////////////////////////////
4369 // ThreadProc
4370 auto ThreadProc = [&](
4371 PoolTestThreadResult* outThreadResult,
4372 uint32_t randSeed,
4373 HANDLE frameStartEvent,
4374 HANDLE frameEndEvent) -> void
4375 {
4376 RandomNumberGenerator threadRand{randSeed};
4377
4378 outThreadResult->AllocationTimeMin = duration::max();
4379 outThreadResult->AllocationTimeSum = duration::zero();
4380 outThreadResult->AllocationTimeMax = duration::min();
4381 outThreadResult->DeallocationTimeMin = duration::max();
4382 outThreadResult->DeallocationTimeSum = duration::zero();
4383 outThreadResult->DeallocationTimeMax = duration::min();
4384 outThreadResult->AllocationCount = 0;
4385 outThreadResult->DeallocationCount = 0;
4386 outThreadResult->LostAllocationCount = 0;
4387 outThreadResult->LostAllocationTotalSize = 0;
4388 outThreadResult->FailedAllocationCount = 0;
4389 outThreadResult->FailedAllocationTotalSize = 0;
4390
4391 struct Item
4392 {
4393 VkDeviceSize BufferSize;
4394 VkExtent2D ImageSize;
4395 VkBuffer Buf;
4396 VkImage Image;
4397 VmaAllocation Alloc;
4398
4399 VkDeviceSize CalcSizeBytes() const
4400 {
4401 return BufferSize +
4402 ImageSize.width * ImageSize.height * 4;
4403 }
4404 };
4405 std::vector<Item> unusedItems, usedItems;
4406
4407 const size_t threadTotalItemCount = config.TotalItemCount / config.ThreadCount;
4408
4409 // Create all items - all unused, not yet allocated.
4410 for(size_t i = 0; i < threadTotalItemCount; ++i)
4411 {
4412 Item item = {};
4413
4414 uint32_t allocSizeIndex = 0;
4415 uint32_t r = threadRand.Generate() % allocationSizeProbabilitySum;
4416 while(r >= config.AllocationSizes[allocSizeIndex].Probability)
4417 r -= config.AllocationSizes[allocSizeIndex++].Probability;
4418
4419 const AllocationSize& allocSize = config.AllocationSizes[allocSizeIndex];
4420 if(allocSize.BufferSizeMax > 0)
4421 {
4422 TEST(allocSize.BufferSizeMin > 0);
4423 TEST(allocSize.ImageSizeMin == 0 && allocSize.ImageSizeMax == 0);
4424 if(allocSize.BufferSizeMax == allocSize.BufferSizeMin)
4425 item.BufferSize = allocSize.BufferSizeMin;
4426 else
4427 {
4428 item.BufferSize = allocSize.BufferSizeMin + threadRand.Generate() % (allocSize.BufferSizeMax - allocSize.BufferSizeMin);
4429 item.BufferSize = item.BufferSize / 16 * 16;
4430 }
4431 }
4432 else
4433 {
4434 TEST(allocSize.ImageSizeMin > 0 && allocSize.ImageSizeMax > 0);
4435 if(allocSize.ImageSizeMax == allocSize.ImageSizeMin)
4436 item.ImageSize.width = item.ImageSize.height = allocSize.ImageSizeMax;
4437 else
4438 {
4439 item.ImageSize.width = allocSize.ImageSizeMin + threadRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
4440 item.ImageSize.height = allocSize.ImageSizeMin + threadRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
4441 }
4442 }
4443
4444 unusedItems.push_back(item);
4445 }
4446
4447 auto Allocate = [&](Item& item) -> VkResult
4448 {
4449 VmaAllocationCreateInfo allocCreateInfo = {};
4450 allocCreateInfo.pool = pool;
4451 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT |
4452 VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
4453
4454 if(item.BufferSize)
4455 {
4456 bufferInfo.size = item.BufferSize;
4457 PoolAllocationTimeRegisterObj timeRegisterObj(*outThreadResult);
4458 return vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocCreateInfo, &item.Buf, &item.Alloc, nullptr);
4459 }
4460 else
4461 {
4462 TEST(item.ImageSize.width && item.ImageSize.height);
4463
4464 imageInfo.extent.width = item.ImageSize.width;
4465 imageInfo.extent.height = item.ImageSize.height;
4466 PoolAllocationTimeRegisterObj timeRegisterObj(*outThreadResult);
4467 return vmaCreateImage(g_hAllocator, &imageInfo, &allocCreateInfo, &item.Image, &item.Alloc, nullptr);
4468 }
4469 };
4470
4471 ////////////////////////////////////////////////////////////////////////////////
4472 // Frames
4473 for(uint32_t frameIndex = 0; frameIndex < config.FrameCount; ++frameIndex)
4474 {
4475 WaitForSingleObject(frameStartEvent, INFINITE);
4476
4477 // Always make some percent of used bufs unused, to choose different used ones.
4478 const size_t bufsToMakeUnused = usedItems.size() * config.ItemsToMakeUnusedPercent / 100;
4479 for(size_t i = 0; i < bufsToMakeUnused; ++i)
4480 {
4481 size_t index = threadRand.Generate() % usedItems.size();
4482 unusedItems.push_back(usedItems[index]);
4483 usedItems.erase(usedItems.begin() + index);
4484 }
4485
4486 // Determine which bufs we want to use in this frame.
4487 const size_t usedBufCount = (threadRand.Generate() % (config.UsedItemCountMax - config.UsedItemCountMin) + config.UsedItemCountMin)
4488 / config.ThreadCount;
4489 TEST(usedBufCount < usedItems.size() + unusedItems.size());
4490 // Move some used to unused.
4491 while(usedBufCount < usedItems.size())
4492 {
4493 size_t index = threadRand.Generate() % usedItems.size();
4494 unusedItems.push_back(usedItems[index]);
4495 usedItems.erase(usedItems.begin() + index);
4496 }
4497 // Move some unused to used.
4498 while(usedBufCount > usedItems.size())
4499 {
4500 size_t index = threadRand.Generate() % unusedItems.size();
4501 usedItems.push_back(unusedItems[index]);
4502 unusedItems.erase(unusedItems.begin() + index);
4503 }
4504
4505 uint32_t touchExistingCount = 0;
4506 uint32_t touchLostCount = 0;
4507 uint32_t createSucceededCount = 0;
4508 uint32_t createFailedCount = 0;
4509
4510 // Touch all used bufs. If not created or lost, allocate.
4511 for(size_t i = 0; i < usedItems.size(); ++i)
4512 {
4513 Item& item = usedItems[i];
4514 // Not yet created.
4515 if(item.Alloc == VK_NULL_HANDLE)
4516 {
4517 res = Allocate(item);
4518 ++outThreadResult->AllocationCount;
4519 if(res != VK_SUCCESS)
4520 {
4521 item.Alloc = VK_NULL_HANDLE;
4522 item.Buf = VK_NULL_HANDLE;
4523 ++outThreadResult->FailedAllocationCount;
4524 outThreadResult->FailedAllocationTotalSize += item.CalcSizeBytes();
4525 ++createFailedCount;
4526 }
4527 else
4528 ++createSucceededCount;
4529 }
4530 else
4531 {
4532 // Touch.
4533 VmaAllocationInfo allocInfo;
4534 vmaGetAllocationInfo(g_hAllocator, item.Alloc, &allocInfo);
4535 // Lost.
4536 if(allocInfo.deviceMemory == VK_NULL_HANDLE)
4537 {
4538 ++touchLostCount;
4539
4540 // Destroy.
4541 {
4542 PoolDeallocationTimeRegisterObj timeRegisterObj(*outThreadResult);
4543 if(item.Buf)
4544 vmaDestroyBuffer(g_hAllocator, item.Buf, item.Alloc);
4545 else
4546 vmaDestroyImage(g_hAllocator, item.Image, item.Alloc);
4547 ++outThreadResult->DeallocationCount;
4548 }
4549 item.Alloc = VK_NULL_HANDLE;
4550 item.Buf = VK_NULL_HANDLE;
4551
4552 ++outThreadResult->LostAllocationCount;
4553 outThreadResult->LostAllocationTotalSize += item.CalcSizeBytes();
4554
4555 // Recreate.
4556 res = Allocate(item);
4557 ++outThreadResult->AllocationCount;
4558 // Creation failed.
4559 if(res != VK_SUCCESS)
4560 {
4561 ++outThreadResult->FailedAllocationCount;
4562 outThreadResult->FailedAllocationTotalSize += item.CalcSizeBytes();
4563 ++createFailedCount;
4564 }
4565 else
4566 ++createSucceededCount;
4567 }
4568 else
4569 ++touchExistingCount;
4570 }
4571 }
4572
4573 /*
4574 printf("Thread %u frame %u: Touch existing %u lost %u, create succeeded %u failed %u\n",
4575 randSeed, frameIndex,
4576 touchExistingCount, touchLostCount,
4577 createSucceededCount, createFailedCount);
4578 */
4579
4580 SetEvent(frameEndEvent);
4581 }
4582
4583 // Free all remaining items.
4584 for(size_t i = usedItems.size(); i--; )
4585 {
4586 PoolDeallocationTimeRegisterObj timeRegisterObj(*outThreadResult);
4587 if(usedItems[i].Buf)
4588 vmaDestroyBuffer(g_hAllocator, usedItems[i].Buf, usedItems[i].Alloc);
4589 else
4590 vmaDestroyImage(g_hAllocator, usedItems[i].Image, usedItems[i].Alloc);
4591 ++outThreadResult->DeallocationCount;
4592 }
4593 for(size_t i = unusedItems.size(); i--; )
4594 {
4595 PoolDeallocationTimeRegisterObj timeRegisterOb(*outThreadResult);
4596 if(unusedItems[i].Buf)
4597 vmaDestroyBuffer(g_hAllocator, unusedItems[i].Buf, unusedItems[i].Alloc);
4598 else
4599 vmaDestroyImage(g_hAllocator, unusedItems[i].Image, unusedItems[i].Alloc);
4600 ++outThreadResult->DeallocationCount;
4601 }
4602 };
4603
4604 // Launch threads.
4605 uint32_t threadRandSeed = mainRand.Generate();
4606 std::vector<HANDLE> frameStartEvents{config.ThreadCount};
4607 std::vector<HANDLE> frameEndEvents{config.ThreadCount};
4608 std::vector<std::thread> bkgThreads;
4609 std::vector<PoolTestThreadResult> threadResults{config.ThreadCount};
4610 for(uint32_t threadIndex = 0; threadIndex < config.ThreadCount; ++threadIndex)
4611 {
4612 frameStartEvents[threadIndex] = CreateEvent(NULL, FALSE, FALSE, NULL);
4613 frameEndEvents[threadIndex] = CreateEvent(NULL, FALSE, FALSE, NULL);
4614 bkgThreads.emplace_back(std::bind(
4615 ThreadProc,
4616 &threadResults[threadIndex],
4617 threadRandSeed + threadIndex,
4618 frameStartEvents[threadIndex],
4619 frameEndEvents[threadIndex]));
4620 }
4621
4622 // Execute frames.
4623 TEST(config.ThreadCount <= MAXIMUM_WAIT_OBJECTS);
4624 for(uint32_t frameIndex = 0; frameIndex < config.FrameCount; ++frameIndex)
4625 {
4626 vmaSetCurrentFrameIndex(g_hAllocator, frameIndex);
4627 for(size_t threadIndex = 0; threadIndex < config.ThreadCount; ++threadIndex)
4628 SetEvent(frameStartEvents[threadIndex]);
4629 WaitForMultipleObjects(config.ThreadCount, &frameEndEvents[0], TRUE, INFINITE);
4630 }
4631
4632 // Wait for threads finished
4633 for(size_t i = 0; i < bkgThreads.size(); ++i)
4634 {
4635 bkgThreads[i].join();
4636 CloseHandle(frameEndEvents[i]);
4637 CloseHandle(frameStartEvents[i]);
4638 }
4639 bkgThreads.clear();
4640
4641 // Finish time measurement - before destroying pool.
4642 outResult.TotalTime = std::chrono::high_resolution_clock::now() - timeBeg;
4643
4644 vmaDestroyPool(g_hAllocator, pool);
4645
4646 outResult.AllocationTimeMin = duration::max();
4647 outResult.AllocationTimeAvg = duration::zero();
4648 outResult.AllocationTimeMax = duration::min();
4649 outResult.DeallocationTimeMin = duration::max();
4650 outResult.DeallocationTimeAvg = duration::zero();
4651 outResult.DeallocationTimeMax = duration::min();
4652 outResult.LostAllocationCount = 0;
4653 outResult.LostAllocationTotalSize = 0;
4654 outResult.FailedAllocationCount = 0;
4655 outResult.FailedAllocationTotalSize = 0;
4656 size_t allocationCount = 0;
4657 size_t deallocationCount = 0;
4658 for(size_t threadIndex = 0; threadIndex < config.ThreadCount; ++threadIndex)
4659 {
4660 const PoolTestThreadResult& threadResult = threadResults[threadIndex];
4661 outResult.AllocationTimeMin = std::min(outResult.AllocationTimeMin, threadResult.AllocationTimeMin);
4662 outResult.AllocationTimeMax = std::max(outResult.AllocationTimeMax, threadResult.AllocationTimeMax);
4663 outResult.AllocationTimeAvg += threadResult.AllocationTimeSum;
4664 outResult.DeallocationTimeMin = std::min(outResult.DeallocationTimeMin, threadResult.DeallocationTimeMin);
4665 outResult.DeallocationTimeMax = std::max(outResult.DeallocationTimeMax, threadResult.DeallocationTimeMax);
4666 outResult.DeallocationTimeAvg += threadResult.DeallocationTimeSum;
4667 allocationCount += threadResult.AllocationCount;
4668 deallocationCount += threadResult.DeallocationCount;
4669 outResult.FailedAllocationCount += threadResult.FailedAllocationCount;
4670 outResult.FailedAllocationTotalSize += threadResult.FailedAllocationTotalSize;
4671 outResult.LostAllocationCount += threadResult.LostAllocationCount;
4672 outResult.LostAllocationTotalSize += threadResult.LostAllocationTotalSize;
4673 }
4674 if(allocationCount)
4675 outResult.AllocationTimeAvg /= allocationCount;
4676 if(deallocationCount)
4677 outResult.DeallocationTimeAvg /= deallocationCount;
4678 }
4679
MemoryRegionsOverlap(char * ptr1,size_t size1,char * ptr2,size_t size2)4680 static inline bool MemoryRegionsOverlap(char* ptr1, size_t size1, char* ptr2, size_t size2)
4681 {
4682 if(ptr1 < ptr2)
4683 return ptr1 + size1 > ptr2;
4684 else if(ptr2 < ptr1)
4685 return ptr2 + size2 > ptr1;
4686 else
4687 return true;
4688 }
4689
TestMemoryUsage()4690 static void TestMemoryUsage()
4691 {
4692 wprintf(L"Testing memory usage:\n");
4693
4694 static const VmaMemoryUsage lastUsage = VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED;
4695 for(uint32_t usage = 0; usage <= lastUsage; ++usage)
4696 {
4697 switch(usage)
4698 {
4699 case VMA_MEMORY_USAGE_UNKNOWN: printf(" VMA_MEMORY_USAGE_UNKNOWN:\n"); break;
4700 case VMA_MEMORY_USAGE_GPU_ONLY: printf(" VMA_MEMORY_USAGE_GPU_ONLY:\n"); break;
4701 case VMA_MEMORY_USAGE_CPU_ONLY: printf(" VMA_MEMORY_USAGE_CPU_ONLY:\n"); break;
4702 case VMA_MEMORY_USAGE_CPU_TO_GPU: printf(" VMA_MEMORY_USAGE_CPU_TO_GPU:\n"); break;
4703 case VMA_MEMORY_USAGE_GPU_TO_CPU: printf(" VMA_MEMORY_USAGE_GPU_TO_CPU:\n"); break;
4704 case VMA_MEMORY_USAGE_CPU_COPY: printf(" VMA_MEMORY_USAGE_CPU_COPY:\n"); break;
4705 case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: printf(" VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED:\n"); break;
4706 default: assert(0);
4707 }
4708
4709 auto printResult = [](const char* testName, VkResult res, uint32_t memoryTypeBits, uint32_t memoryTypeIndex)
4710 {
4711 if(res == VK_SUCCESS)
4712 printf(" %s: memoryTypeBits=0x%X, memoryTypeIndex=%u\n", testName, memoryTypeBits, memoryTypeIndex);
4713 else
4714 printf(" %s: memoryTypeBits=0x%X, FAILED with res=%d\n", testName, memoryTypeBits, (int32_t)res);
4715 };
4716
4717 // 1: Buffer for copy
4718 {
4719 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4720 bufCreateInfo.size = 65536;
4721 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
4722
4723 VkBuffer buf = VK_NULL_HANDLE;
4724 VkResult res = vkCreateBuffer(g_hDevice, &bufCreateInfo, g_Allocs, &buf);
4725 TEST(res == VK_SUCCESS && buf != VK_NULL_HANDLE);
4726
4727 VkMemoryRequirements memReq = {};
4728 vkGetBufferMemoryRequirements(g_hDevice, buf, &memReq);
4729
4730 VmaAllocationCreateInfo allocCreateInfo = {};
4731 allocCreateInfo.usage = (VmaMemoryUsage)usage;
4732 VmaAllocation alloc = VK_NULL_HANDLE;
4733 VmaAllocationInfo allocInfo = {};
4734 res = vmaAllocateMemoryForBuffer(g_hAllocator, buf, &allocCreateInfo, &alloc, &allocInfo);
4735 if(res == VK_SUCCESS)
4736 {
4737 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
4738 res = vkBindBufferMemory(g_hDevice, buf, allocInfo.deviceMemory, allocInfo.offset);
4739 TEST(res == VK_SUCCESS);
4740 }
4741 printResult("Buffer TRANSFER_DST + TRANSFER_SRC", res, memReq.memoryTypeBits, allocInfo.memoryType);
4742 vmaDestroyBuffer(g_hAllocator, buf, alloc);
4743 }
4744
4745 // 2: Vertex buffer
4746 {
4747 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4748 bufCreateInfo.size = 65536;
4749 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
4750
4751 VkBuffer buf = VK_NULL_HANDLE;
4752 VkResult res = vkCreateBuffer(g_hDevice, &bufCreateInfo, g_Allocs, &buf);
4753 TEST(res == VK_SUCCESS && buf != VK_NULL_HANDLE);
4754
4755 VkMemoryRequirements memReq = {};
4756 vkGetBufferMemoryRequirements(g_hDevice, buf, &memReq);
4757
4758 VmaAllocationCreateInfo allocCreateInfo = {};
4759 allocCreateInfo.usage = (VmaMemoryUsage)usage;
4760 VmaAllocation alloc = VK_NULL_HANDLE;
4761 VmaAllocationInfo allocInfo = {};
4762 res = vmaAllocateMemoryForBuffer(g_hAllocator, buf, &allocCreateInfo, &alloc, &allocInfo);
4763 if(res == VK_SUCCESS)
4764 {
4765 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
4766 res = vkBindBufferMemory(g_hDevice, buf, allocInfo.deviceMemory, allocInfo.offset);
4767 TEST(res == VK_SUCCESS);
4768 }
4769 printResult("Buffer TRANSFER_DST + VERTEX_BUFFER", res, memReq.memoryTypeBits, allocInfo.memoryType);
4770 vmaDestroyBuffer(g_hAllocator, buf, alloc);
4771 }
4772
4773 // 3: Image for copy, OPTIMAL
4774 {
4775 VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
4776 imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
4777 imgCreateInfo.extent.width = 256;
4778 imgCreateInfo.extent.height = 256;
4779 imgCreateInfo.extent.depth = 1;
4780 imgCreateInfo.mipLevels = 1;
4781 imgCreateInfo.arrayLayers = 1;
4782 imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
4783 imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
4784 imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
4785 imgCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
4786 imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
4787
4788 VkImage img = VK_NULL_HANDLE;
4789 VkResult res = vkCreateImage(g_hDevice, &imgCreateInfo, g_Allocs, &img);
4790 TEST(res == VK_SUCCESS && img != VK_NULL_HANDLE);
4791
4792 VkMemoryRequirements memReq = {};
4793 vkGetImageMemoryRequirements(g_hDevice, img, &memReq);
4794
4795 VmaAllocationCreateInfo allocCreateInfo = {};
4796 allocCreateInfo.usage = (VmaMemoryUsage)usage;
4797 VmaAllocation alloc = VK_NULL_HANDLE;
4798 VmaAllocationInfo allocInfo = {};
4799 res = vmaAllocateMemoryForImage(g_hAllocator, img, &allocCreateInfo, &alloc, &allocInfo);
4800 if(res == VK_SUCCESS)
4801 {
4802 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
4803 res = vkBindImageMemory(g_hDevice, img, allocInfo.deviceMemory, allocInfo.offset);
4804 TEST(res == VK_SUCCESS);
4805 }
4806 printResult("Image OPTIMAL TRANSFER_DST + TRANSFER_SRC", res, memReq.memoryTypeBits, allocInfo.memoryType);
4807
4808 vmaDestroyImage(g_hAllocator, img, alloc);
4809 }
4810
4811 // 4: Image SAMPLED, OPTIMAL
4812 {
4813 VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
4814 imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
4815 imgCreateInfo.extent.width = 256;
4816 imgCreateInfo.extent.height = 256;
4817 imgCreateInfo.extent.depth = 1;
4818 imgCreateInfo.mipLevels = 1;
4819 imgCreateInfo.arrayLayers = 1;
4820 imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
4821 imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
4822 imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
4823 imgCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
4824 imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
4825
4826 VkImage img = VK_NULL_HANDLE;
4827 VkResult res = vkCreateImage(g_hDevice, &imgCreateInfo, g_Allocs, &img);
4828 TEST(res == VK_SUCCESS && img != VK_NULL_HANDLE);
4829
4830 VkMemoryRequirements memReq = {};
4831 vkGetImageMemoryRequirements(g_hDevice, img, &memReq);
4832
4833 VmaAllocationCreateInfo allocCreateInfo = {};
4834 allocCreateInfo.usage = (VmaMemoryUsage)usage;
4835 VmaAllocation alloc = VK_NULL_HANDLE;
4836 VmaAllocationInfo allocInfo = {};
4837 res = vmaAllocateMemoryForImage(g_hAllocator, img, &allocCreateInfo, &alloc, &allocInfo);
4838 if(res == VK_SUCCESS)
4839 {
4840 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
4841 res = vkBindImageMemory(g_hDevice, img, allocInfo.deviceMemory, allocInfo.offset);
4842 TEST(res == VK_SUCCESS);
4843 }
4844 printResult("Image OPTIMAL TRANSFER_DST + SAMPLED", res, memReq.memoryTypeBits, allocInfo.memoryType);
4845 vmaDestroyImage(g_hAllocator, img, alloc);
4846 }
4847
4848 // 5: Image COLOR_ATTACHMENT, OPTIMAL
4849 {
4850 VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
4851 imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
4852 imgCreateInfo.extent.width = 256;
4853 imgCreateInfo.extent.height = 256;
4854 imgCreateInfo.extent.depth = 1;
4855 imgCreateInfo.mipLevels = 1;
4856 imgCreateInfo.arrayLayers = 1;
4857 imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
4858 imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
4859 imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
4860 imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
4861 imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
4862
4863 VkImage img = VK_NULL_HANDLE;
4864 VkResult res = vkCreateImage(g_hDevice, &imgCreateInfo, g_Allocs, &img);
4865 TEST(res == VK_SUCCESS && img != VK_NULL_HANDLE);
4866
4867 VkMemoryRequirements memReq = {};
4868 vkGetImageMemoryRequirements(g_hDevice, img, &memReq);
4869
4870 VmaAllocationCreateInfo allocCreateInfo = {};
4871 allocCreateInfo.usage = (VmaMemoryUsage)usage;
4872 VmaAllocation alloc = VK_NULL_HANDLE;
4873 VmaAllocationInfo allocInfo = {};
4874 res = vmaAllocateMemoryForImage(g_hAllocator, img, &allocCreateInfo, &alloc, &allocInfo);
4875 if(res == VK_SUCCESS)
4876 {
4877 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
4878 res = vkBindImageMemory(g_hDevice, img, allocInfo.deviceMemory, allocInfo.offset);
4879 TEST(res == VK_SUCCESS);
4880 }
4881 printResult("Image OPTIMAL SAMPLED + COLOR_ATTACHMENT", res, memReq.memoryTypeBits, allocInfo.memoryType);
4882 vmaDestroyImage(g_hAllocator, img, alloc);
4883 }
4884 }
4885 }
4886
FindDeviceCoherentMemoryTypeBits()4887 static uint32_t FindDeviceCoherentMemoryTypeBits()
4888 {
4889 VkPhysicalDeviceMemoryProperties memProps;
4890 vkGetPhysicalDeviceMemoryProperties(g_hPhysicalDevice, &memProps);
4891
4892 uint32_t memTypeBits = 0;
4893 for(uint32_t i = 0; i < memProps.memoryTypeCount; ++i)
4894 {
4895 if(memProps.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD)
4896 memTypeBits |= 1u << i;
4897 }
4898 return memTypeBits;
4899 }
4900
TestDeviceCoherentMemory()4901 static void TestDeviceCoherentMemory()
4902 {
4903 if(!VK_AMD_device_coherent_memory_enabled)
4904 return;
4905
4906 uint32_t deviceCoherentMemoryTypeBits = FindDeviceCoherentMemoryTypeBits();
4907 // Extension is enabled, feature is enabled, and the device still doesn't support any such memory type?
4908 // OK then, so it's just fake!
4909 if(deviceCoherentMemoryTypeBits == 0)
4910 return;
4911
4912 wprintf(L"Testing device coherent memory...\n");
4913
4914 // 1. Try to allocate buffer from a memory type that is DEVICE_COHERENT.
4915
4916 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4917 bufCreateInfo.size = 0x10000;
4918 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4919
4920 VmaAllocationCreateInfo allocCreateInfo = {};
4921 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
4922 allocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD;
4923
4924 AllocInfo alloc = {};
4925 VmaAllocationInfo allocInfo = {};
4926 VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &alloc.m_Buffer, &alloc.m_Allocation, &allocInfo);
4927
4928 // Make sure it succeeded and was really created in such memory type.
4929 TEST(res == VK_SUCCESS);
4930 TEST((1u << allocInfo.memoryType) & deviceCoherentMemoryTypeBits);
4931
4932 alloc.Destroy();
4933
4934 // 2. Try to create a pool in such memory type.
4935 {
4936 VmaPoolCreateInfo poolCreateInfo = {};
4937
4938 res = vmaFindMemoryTypeIndex(g_hAllocator, UINT32_MAX, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex);
4939 TEST(res == VK_SUCCESS);
4940 TEST((1u << poolCreateInfo.memoryTypeIndex) & deviceCoherentMemoryTypeBits);
4941
4942 VmaPool pool = VK_NULL_HANDLE;
4943 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
4944 TEST(res == VK_SUCCESS);
4945
4946 vmaDestroyPool(g_hAllocator, pool);
4947 }
4948
4949 // 3. Try the same with a local allocator created without VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT.
4950
4951 VmaAllocatorCreateInfo allocatorCreateInfo = {};
4952 SetAllocatorCreateInfo(allocatorCreateInfo);
4953 allocatorCreateInfo.flags &= ~VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT;
4954
4955 VmaAllocator localAllocator = VK_NULL_HANDLE;
4956 res = vmaCreateAllocator(&allocatorCreateInfo, &localAllocator);
4957 TEST(res == VK_SUCCESS && localAllocator);
4958
4959 res = vmaCreateBuffer(localAllocator, &bufCreateInfo, &allocCreateInfo, &alloc.m_Buffer, &alloc.m_Allocation, &allocInfo);
4960
4961 // Make sure it failed.
4962 TEST(res != VK_SUCCESS && !alloc.m_Buffer && !alloc.m_Allocation);
4963
4964 // 4. Try to find memory type.
4965 {
4966 uint32_t memTypeIndex = UINT_MAX;
4967 res = vmaFindMemoryTypeIndex(localAllocator, UINT32_MAX, &allocCreateInfo, &memTypeIndex);
4968 TEST(res != VK_SUCCESS);
4969 }
4970
4971 vmaDestroyAllocator(localAllocator);
4972 }
4973
TestBudget()4974 static void TestBudget()
4975 {
4976 wprintf(L"Testing budget...\n");
4977
4978 static const VkDeviceSize BUF_SIZE = 10ull * 1024 * 1024;
4979 static const uint32_t BUF_COUNT = 4;
4980
4981 const VkPhysicalDeviceMemoryProperties* memProps = {};
4982 vmaGetMemoryProperties(g_hAllocator, &memProps);
4983
4984 for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
4985 {
4986 vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
4987
4988 VmaBudget budgetBeg[VK_MAX_MEMORY_HEAPS] = {};
4989 vmaGetBudget(g_hAllocator, budgetBeg);
4990
4991 for(uint32_t i = 0; i < memProps->memoryHeapCount; ++i)
4992 {
4993 TEST(budgetBeg[i].budget > 0);
4994 TEST(budgetBeg[i].budget <= memProps->memoryHeaps[i].size);
4995 TEST(budgetBeg[i].allocationBytes <= budgetBeg[i].blockBytes);
4996 }
4997
4998 VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4999 bufInfo.size = BUF_SIZE;
5000 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
5001
5002 VmaAllocationCreateInfo allocCreateInfo = {};
5003 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
5004 if(testIndex == 0)
5005 {
5006 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
5007 }
5008
5009 // CREATE BUFFERS
5010 uint32_t heapIndex = 0;
5011 BufferInfo bufInfos[BUF_COUNT] = {};
5012 for(uint32_t bufIndex = 0; bufIndex < BUF_COUNT; ++bufIndex)
5013 {
5014 VmaAllocationInfo allocInfo;
5015 VkResult res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo,
5016 &bufInfos[bufIndex].Buffer, &bufInfos[bufIndex].Allocation, &allocInfo);
5017 TEST(res == VK_SUCCESS);
5018 if(bufIndex == 0)
5019 {
5020 heapIndex = MemoryTypeToHeap(allocInfo.memoryType);
5021 }
5022 else
5023 {
5024 // All buffers need to fall into the same heap.
5025 TEST(MemoryTypeToHeap(allocInfo.memoryType) == heapIndex);
5026 }
5027 }
5028
5029 VmaBudget budgetWithBufs[VK_MAX_MEMORY_HEAPS] = {};
5030 vmaGetBudget(g_hAllocator, budgetWithBufs);
5031
5032 // DESTROY BUFFERS
5033 for(size_t bufIndex = BUF_COUNT; bufIndex--; )
5034 {
5035 vmaDestroyBuffer(g_hAllocator, bufInfos[bufIndex].Buffer, bufInfos[bufIndex].Allocation);
5036 }
5037
5038 VmaBudget budgetEnd[VK_MAX_MEMORY_HEAPS] = {};
5039 vmaGetBudget(g_hAllocator, budgetEnd);
5040
5041 // CHECK
5042 for(uint32_t i = 0; i < memProps->memoryHeapCount; ++i)
5043 {
5044 TEST(budgetEnd[i].allocationBytes <= budgetEnd[i].blockBytes);
5045 if(i == heapIndex)
5046 {
5047 TEST(budgetEnd[i].allocationBytes == budgetBeg[i].allocationBytes);
5048 TEST(budgetWithBufs[i].allocationBytes == budgetBeg[i].allocationBytes + BUF_SIZE * BUF_COUNT);
5049 TEST(budgetWithBufs[i].blockBytes >= budgetEnd[i].blockBytes);
5050 }
5051 else
5052 {
5053 TEST(budgetEnd[i].allocationBytes == budgetEnd[i].allocationBytes &&
5054 budgetEnd[i].allocationBytes == budgetWithBufs[i].allocationBytes);
5055 TEST(budgetEnd[i].blockBytes == budgetEnd[i].blockBytes &&
5056 budgetEnd[i].blockBytes == budgetWithBufs[i].blockBytes);
5057 }
5058 }
5059 }
5060 }
5061
TestMapping()5062 static void TestMapping()
5063 {
5064 wprintf(L"Testing mapping...\n");
5065
5066 VkResult res;
5067 uint32_t memTypeIndex = UINT32_MAX;
5068
5069 enum TEST
5070 {
5071 TEST_NORMAL,
5072 TEST_POOL,
5073 TEST_DEDICATED,
5074 TEST_COUNT
5075 };
5076 for(uint32_t testIndex = 0; testIndex < TEST_COUNT; ++testIndex)
5077 {
5078 VmaPool pool = nullptr;
5079 if(testIndex == TEST_POOL)
5080 {
5081 TEST(memTypeIndex != UINT32_MAX);
5082 VmaPoolCreateInfo poolInfo = {};
5083 poolInfo.memoryTypeIndex = memTypeIndex;
5084 res = vmaCreatePool(g_hAllocator, &poolInfo, &pool);
5085 TEST(res == VK_SUCCESS);
5086 }
5087
5088 VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5089 bufInfo.size = 0x10000;
5090 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
5091
5092 VmaAllocationCreateInfo allocCreateInfo = {};
5093 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
5094 allocCreateInfo.pool = pool;
5095 if(testIndex == TEST_DEDICATED)
5096 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
5097
5098 VmaAllocationInfo allocInfo;
5099
5100 // Mapped manually
5101
5102 // Create 2 buffers.
5103 BufferInfo bufferInfos[3];
5104 for(size_t i = 0; i < 2; ++i)
5105 {
5106 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo,
5107 &bufferInfos[i].Buffer, &bufferInfos[i].Allocation, &allocInfo);
5108 TEST(res == VK_SUCCESS);
5109 TEST(allocInfo.pMappedData == nullptr);
5110 memTypeIndex = allocInfo.memoryType;
5111 }
5112
5113 // Map buffer 0.
5114 char* data00 = nullptr;
5115 res = vmaMapMemory(g_hAllocator, bufferInfos[0].Allocation, (void**)&data00);
5116 TEST(res == VK_SUCCESS && data00 != nullptr);
5117 data00[0xFFFF] = data00[0];
5118
5119 // Map buffer 0 second time.
5120 char* data01 = nullptr;
5121 res = vmaMapMemory(g_hAllocator, bufferInfos[0].Allocation, (void**)&data01);
5122 TEST(res == VK_SUCCESS && data01 == data00);
5123
5124 // Map buffer 1.
5125 char* data1 = nullptr;
5126 res = vmaMapMemory(g_hAllocator, bufferInfos[1].Allocation, (void**)&data1);
5127 TEST(res == VK_SUCCESS && data1 != nullptr);
5128 TEST(!MemoryRegionsOverlap(data00, (size_t)bufInfo.size, data1, (size_t)bufInfo.size));
5129 data1[0xFFFF] = data1[0];
5130
5131 // Unmap buffer 0 two times.
5132 vmaUnmapMemory(g_hAllocator, bufferInfos[0].Allocation);
5133 vmaUnmapMemory(g_hAllocator, bufferInfos[0].Allocation);
5134 vmaGetAllocationInfo(g_hAllocator, bufferInfos[0].Allocation, &allocInfo);
5135 TEST(allocInfo.pMappedData == nullptr);
5136
5137 // Unmap buffer 1.
5138 vmaUnmapMemory(g_hAllocator, bufferInfos[1].Allocation);
5139 vmaGetAllocationInfo(g_hAllocator, bufferInfos[1].Allocation, &allocInfo);
5140 TEST(allocInfo.pMappedData == nullptr);
5141
5142 // Create 3rd buffer - persistently mapped.
5143 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
5144 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo,
5145 &bufferInfos[2].Buffer, &bufferInfos[2].Allocation, &allocInfo);
5146 TEST(res == VK_SUCCESS && allocInfo.pMappedData != nullptr);
5147
5148 // Map buffer 2.
5149 char* data2 = nullptr;
5150 res = vmaMapMemory(g_hAllocator, bufferInfos[2].Allocation, (void**)&data2);
5151 TEST(res == VK_SUCCESS && data2 == allocInfo.pMappedData);
5152 data2[0xFFFF] = data2[0];
5153
5154 // Unmap buffer 2.
5155 vmaUnmapMemory(g_hAllocator, bufferInfos[2].Allocation);
5156 vmaGetAllocationInfo(g_hAllocator, bufferInfos[2].Allocation, &allocInfo);
5157 TEST(allocInfo.pMappedData == data2);
5158
5159 // Destroy all buffers.
5160 for(size_t i = 3; i--; )
5161 vmaDestroyBuffer(g_hAllocator, bufferInfos[i].Buffer, bufferInfos[i].Allocation);
5162
5163 vmaDestroyPool(g_hAllocator, pool);
5164 }
5165 }
5166
5167 // Test CREATE_MAPPED with required DEVICE_LOCAL. There was a bug with it.
TestDeviceLocalMapped()5168 static void TestDeviceLocalMapped()
5169 {
5170 VkResult res;
5171
5172 for(uint32_t testIndex = 0; testIndex < 3; ++testIndex)
5173 {
5174 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5175 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
5176 bufCreateInfo.size = 4096;
5177
5178 VmaPool pool = VK_NULL_HANDLE;
5179 VmaAllocationCreateInfo allocCreateInfo = {};
5180 allocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
5181 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
5182 if(testIndex == 2)
5183 {
5184 VmaPoolCreateInfo poolCreateInfo = {};
5185 res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex);
5186 TEST(res == VK_SUCCESS);
5187 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
5188 TEST(res == VK_SUCCESS);
5189 allocCreateInfo.pool = pool;
5190 }
5191 else if(testIndex == 1)
5192 {
5193 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
5194 }
5195
5196 VkBuffer buf = VK_NULL_HANDLE;
5197 VmaAllocation alloc = VK_NULL_HANDLE;
5198 VmaAllocationInfo allocInfo = {};
5199 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
5200 TEST(res == VK_SUCCESS && alloc);
5201
5202 VkMemoryPropertyFlags memTypeFlags = 0;
5203 vmaGetMemoryTypeProperties(g_hAllocator, allocInfo.memoryType, &memTypeFlags);
5204 const bool shouldBeMapped = (memTypeFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
5205 TEST((allocInfo.pMappedData != nullptr) == shouldBeMapped);
5206
5207 vmaDestroyBuffer(g_hAllocator, buf, alloc);
5208 vmaDestroyPool(g_hAllocator, pool);
5209 }
5210 }
5211
TestMappingMultithreaded()5212 static void TestMappingMultithreaded()
5213 {
5214 wprintf(L"Testing mapping multithreaded...\n");
5215
5216 static const uint32_t threadCount = 16;
5217 static const uint32_t bufferCount = 1024;
5218 static const uint32_t threadBufferCount = bufferCount / threadCount;
5219
5220 VkResult res;
5221 volatile uint32_t memTypeIndex = UINT32_MAX;
5222
5223 enum TEST
5224 {
5225 TEST_NORMAL,
5226 TEST_POOL,
5227 TEST_DEDICATED,
5228 TEST_COUNT
5229 };
5230 for(uint32_t testIndex = 0; testIndex < TEST_COUNT; ++testIndex)
5231 {
5232 VmaPool pool = nullptr;
5233 if(testIndex == TEST_POOL)
5234 {
5235 TEST(memTypeIndex != UINT32_MAX);
5236 VmaPoolCreateInfo poolInfo = {};
5237 poolInfo.memoryTypeIndex = memTypeIndex;
5238 res = vmaCreatePool(g_hAllocator, &poolInfo, &pool);
5239 TEST(res == VK_SUCCESS);
5240 }
5241
5242 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5243 bufCreateInfo.size = 0x10000;
5244 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
5245
5246 VmaAllocationCreateInfo allocCreateInfo = {};
5247 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
5248 allocCreateInfo.pool = pool;
5249 if(testIndex == TEST_DEDICATED)
5250 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
5251
5252 std::thread threads[threadCount];
5253 for(uint32_t threadIndex = 0; threadIndex < threadCount; ++threadIndex)
5254 {
5255 threads[threadIndex] = std::thread([=, &memTypeIndex](){
5256 // ======== THREAD FUNCTION ========
5257
5258 RandomNumberGenerator rand{threadIndex};
5259
5260 enum class MODE
5261 {
5262 // Don't map this buffer at all.
5263 DONT_MAP,
5264 // Map and quickly unmap.
5265 MAP_FOR_MOMENT,
5266 // Map and unmap before destruction.
5267 MAP_FOR_LONGER,
5268 // Map two times. Quickly unmap, second unmap before destruction.
5269 MAP_TWO_TIMES,
5270 // Create this buffer as persistently mapped.
5271 PERSISTENTLY_MAPPED,
5272 COUNT
5273 };
5274 std::vector<BufferInfo> bufInfos{threadBufferCount};
5275 std::vector<MODE> bufModes{threadBufferCount};
5276
5277 for(uint32_t bufferIndex = 0; bufferIndex < threadBufferCount; ++bufferIndex)
5278 {
5279 BufferInfo& bufInfo = bufInfos[bufferIndex];
5280 const MODE mode = (MODE)(rand.Generate() % (uint32_t)MODE::COUNT);
5281 bufModes[bufferIndex] = mode;
5282
5283 VmaAllocationCreateInfo localAllocCreateInfo = allocCreateInfo;
5284 if(mode == MODE::PERSISTENTLY_MAPPED)
5285 localAllocCreateInfo.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
5286
5287 VmaAllocationInfo allocInfo;
5288 VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &localAllocCreateInfo,
5289 &bufInfo.Buffer, &bufInfo.Allocation, &allocInfo);
5290 TEST(res == VK_SUCCESS);
5291
5292 if(memTypeIndex == UINT32_MAX)
5293 memTypeIndex = allocInfo.memoryType;
5294
5295 char* data = nullptr;
5296
5297 if(mode == MODE::PERSISTENTLY_MAPPED)
5298 {
5299 data = (char*)allocInfo.pMappedData;
5300 TEST(data != nullptr);
5301 }
5302 else if(mode == MODE::MAP_FOR_MOMENT || mode == MODE::MAP_FOR_LONGER ||
5303 mode == MODE::MAP_TWO_TIMES)
5304 {
5305 TEST(data == nullptr);
5306 res = vmaMapMemory(g_hAllocator, bufInfo.Allocation, (void**)&data);
5307 TEST(res == VK_SUCCESS && data != nullptr);
5308
5309 if(mode == MODE::MAP_TWO_TIMES)
5310 {
5311 char* data2 = nullptr;
5312 res = vmaMapMemory(g_hAllocator, bufInfo.Allocation, (void**)&data2);
5313 TEST(res == VK_SUCCESS && data2 == data);
5314 }
5315 }
5316 else if(mode == MODE::DONT_MAP)
5317 {
5318 TEST(allocInfo.pMappedData == nullptr);
5319 }
5320 else
5321 TEST(0);
5322
5323 // Test if reading and writing from the beginning and end of mapped memory doesn't crash.
5324 if(data)
5325 data[0xFFFF] = data[0];
5326
5327 if(mode == MODE::MAP_FOR_MOMENT || mode == MODE::MAP_TWO_TIMES)
5328 {
5329 vmaUnmapMemory(g_hAllocator, bufInfo.Allocation);
5330
5331 VmaAllocationInfo allocInfo;
5332 vmaGetAllocationInfo(g_hAllocator, bufInfo.Allocation, &allocInfo);
5333 if(mode == MODE::MAP_FOR_MOMENT)
5334 TEST(allocInfo.pMappedData == nullptr);
5335 else
5336 TEST(allocInfo.pMappedData == data);
5337 }
5338
5339 switch(rand.Generate() % 3)
5340 {
5341 case 0: Sleep(0); break; // Yield.
5342 case 1: Sleep(10); break; // 10 ms
5343 // default: No sleep.
5344 }
5345
5346 // Test if reading and writing from the beginning and end of mapped memory doesn't crash.
5347 if(data)
5348 data[0xFFFF] = data[0];
5349 }
5350
5351 for(size_t bufferIndex = threadBufferCount; bufferIndex--; )
5352 {
5353 if(bufModes[bufferIndex] == MODE::MAP_FOR_LONGER ||
5354 bufModes[bufferIndex] == MODE::MAP_TWO_TIMES)
5355 {
5356 vmaUnmapMemory(g_hAllocator, bufInfos[bufferIndex].Allocation);
5357
5358 VmaAllocationInfo allocInfo;
5359 vmaGetAllocationInfo(g_hAllocator, bufInfos[bufferIndex].Allocation, &allocInfo);
5360 TEST(allocInfo.pMappedData == nullptr);
5361 }
5362
5363 vmaDestroyBuffer(g_hAllocator, bufInfos[bufferIndex].Buffer, bufInfos[bufferIndex].Allocation);
5364 }
5365 });
5366 }
5367
5368 for(uint32_t threadIndex = 0; threadIndex < threadCount; ++threadIndex)
5369 threads[threadIndex].join();
5370
5371 vmaDestroyPool(g_hAllocator, pool);
5372 }
5373 }
5374
WriteMainTestResultHeader(FILE * file)5375 static void WriteMainTestResultHeader(FILE* file)
5376 {
5377 fprintf(file,
5378 "Code,Time,"
5379 "Threads,Buffers and images,Sizes,Operations,Allocation strategy,Free order,"
5380 "Total Time (us),"
5381 "Allocation Time Min (us),"
5382 "Allocation Time Avg (us),"
5383 "Allocation Time Max (us),"
5384 "Deallocation Time Min (us),"
5385 "Deallocation Time Avg (us),"
5386 "Deallocation Time Max (us),"
5387 "Total Memory Allocated (B),"
5388 "Free Range Size Avg (B),"
5389 "Free Range Size Max (B)\n");
5390 }
5391
WriteMainTestResult(FILE * file,const char * codeDescription,const char * testDescription,const Config & config,const Result & result)5392 static void WriteMainTestResult(
5393 FILE* file,
5394 const char* codeDescription,
5395 const char* testDescription,
5396 const Config& config, const Result& result)
5397 {
5398 float totalTimeSeconds = ToFloatSeconds(result.TotalTime);
5399 float allocationTimeMinSeconds = ToFloatSeconds(result.AllocationTimeMin);
5400 float allocationTimeAvgSeconds = ToFloatSeconds(result.AllocationTimeAvg);
5401 float allocationTimeMaxSeconds = ToFloatSeconds(result.AllocationTimeMax);
5402 float deallocationTimeMinSeconds = ToFloatSeconds(result.DeallocationTimeMin);
5403 float deallocationTimeAvgSeconds = ToFloatSeconds(result.DeallocationTimeAvg);
5404 float deallocationTimeMaxSeconds = ToFloatSeconds(result.DeallocationTimeMax);
5405
5406 std::string currTime;
5407 CurrentTimeToStr(currTime);
5408
5409 fprintf(file,
5410 "%s,%s,%s,"
5411 "%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%I64u,%I64u,%I64u\n",
5412 codeDescription,
5413 currTime.c_str(),
5414 testDescription,
5415 totalTimeSeconds * 1e6f,
5416 allocationTimeMinSeconds * 1e6f,
5417 allocationTimeAvgSeconds * 1e6f,
5418 allocationTimeMaxSeconds * 1e6f,
5419 deallocationTimeMinSeconds * 1e6f,
5420 deallocationTimeAvgSeconds * 1e6f,
5421 deallocationTimeMaxSeconds * 1e6f,
5422 result.TotalMemoryAllocated,
5423 result.FreeRangeSizeAvg,
5424 result.FreeRangeSizeMax);
5425 }
5426
WritePoolTestResultHeader(FILE * file)5427 static void WritePoolTestResultHeader(FILE* file)
5428 {
5429 fprintf(file,
5430 "Code,Test,Time,"
5431 "Config,"
5432 "Total Time (us),"
5433 "Allocation Time Min (us),"
5434 "Allocation Time Avg (us),"
5435 "Allocation Time Max (us),"
5436 "Deallocation Time Min (us),"
5437 "Deallocation Time Avg (us),"
5438 "Deallocation Time Max (us),"
5439 "Lost Allocation Count,"
5440 "Lost Allocation Total Size (B),"
5441 "Failed Allocation Count,"
5442 "Failed Allocation Total Size (B)\n");
5443 }
5444
WritePoolTestResult(FILE * file,const char * codeDescription,const char * testDescription,const PoolTestConfig & config,const PoolTestResult & result)5445 static void WritePoolTestResult(
5446 FILE* file,
5447 const char* codeDescription,
5448 const char* testDescription,
5449 const PoolTestConfig& config,
5450 const PoolTestResult& result)
5451 {
5452 float totalTimeSeconds = ToFloatSeconds(result.TotalTime);
5453 float allocationTimeMinSeconds = ToFloatSeconds(result.AllocationTimeMin);
5454 float allocationTimeAvgSeconds = ToFloatSeconds(result.AllocationTimeAvg);
5455 float allocationTimeMaxSeconds = ToFloatSeconds(result.AllocationTimeMax);
5456 float deallocationTimeMinSeconds = ToFloatSeconds(result.DeallocationTimeMin);
5457 float deallocationTimeAvgSeconds = ToFloatSeconds(result.DeallocationTimeAvg);
5458 float deallocationTimeMaxSeconds = ToFloatSeconds(result.DeallocationTimeMax);
5459
5460 std::string currTime;
5461 CurrentTimeToStr(currTime);
5462
5463 fprintf(file,
5464 "%s,%s,%s,"
5465 "ThreadCount=%u PoolSize=%llu FrameCount=%u TotalItemCount=%u UsedItemCount=%u...%u ItemsToMakeUnusedPercent=%u,"
5466 "%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%I64u,%I64u,%I64u,%I64u\n",
5467 // General
5468 codeDescription,
5469 testDescription,
5470 currTime.c_str(),
5471 // Config
5472 config.ThreadCount,
5473 (unsigned long long)config.PoolSize,
5474 config.FrameCount,
5475 config.TotalItemCount,
5476 config.UsedItemCountMin,
5477 config.UsedItemCountMax,
5478 config.ItemsToMakeUnusedPercent,
5479 // Results
5480 totalTimeSeconds * 1e6f,
5481 allocationTimeMinSeconds * 1e6f,
5482 allocationTimeAvgSeconds * 1e6f,
5483 allocationTimeMaxSeconds * 1e6f,
5484 deallocationTimeMinSeconds * 1e6f,
5485 deallocationTimeAvgSeconds * 1e6f,
5486 deallocationTimeMaxSeconds * 1e6f,
5487 result.LostAllocationCount,
5488 result.LostAllocationTotalSize,
5489 result.FailedAllocationCount,
5490 result.FailedAllocationTotalSize);
5491 }
5492
PerformCustomMainTest(FILE * file)5493 static void PerformCustomMainTest(FILE* file)
5494 {
5495 Config config{};
5496 config.RandSeed = 65735476;
5497 //config.MaxBytesToAllocate = 4ull * 1024 * 1024; // 4 MB
5498 config.MaxBytesToAllocate = 4ull * 1024 * 1024 * 1024; // 4 GB
5499 config.MemUsageProbability[0] = 1; // VMA_MEMORY_USAGE_GPU_ONLY
5500 config.FreeOrder = FREE_ORDER::FORWARD;
5501 config.ThreadCount = 16;
5502 config.ThreadsUsingCommonAllocationsProbabilityPercent = 50;
5503 config.AllocationStrategy = 0;
5504
5505 // Buffers
5506 //config.AllocationSizes.push_back({4, 16, 1024});
5507 config.AllocationSizes.push_back({4, 0x10000, 0xA00000}); // 64 KB ... 10 MB
5508
5509 // Images
5510 //config.AllocationSizes.push_back({4, 0, 0, 4, 32});
5511 //config.AllocationSizes.push_back({4, 0, 0, 256, 2048});
5512
5513 config.BeginBytesToAllocate = config.MaxBytesToAllocate * 5 / 100;
5514 config.AdditionalOperationCount = 1024;
5515
5516 Result result{};
5517 VkResult res = MainTest(result, config);
5518 TEST(res == VK_SUCCESS);
5519 WriteMainTestResult(file, "Foo", "CustomTest", config, result);
5520 }
5521
PerformCustomPoolTest(FILE * file)5522 static void PerformCustomPoolTest(FILE* file)
5523 {
5524 PoolTestConfig config;
5525 config.PoolSize = 100 * 1024 * 1024;
5526 config.RandSeed = 2345764;
5527 config.ThreadCount = 1;
5528 config.FrameCount = 200;
5529 config.ItemsToMakeUnusedPercent = 2;
5530
5531 AllocationSize allocSize = {};
5532 allocSize.BufferSizeMin = 1024;
5533 allocSize.BufferSizeMax = 1024 * 1024;
5534 allocSize.Probability = 1;
5535 config.AllocationSizes.push_back(allocSize);
5536
5537 allocSize.BufferSizeMin = 0;
5538 allocSize.BufferSizeMax = 0;
5539 allocSize.ImageSizeMin = 128;
5540 allocSize.ImageSizeMax = 1024;
5541 allocSize.Probability = 1;
5542 config.AllocationSizes.push_back(allocSize);
5543
5544 config.PoolSize = config.CalcAvgResourceSize() * 200;
5545 config.UsedItemCountMax = 160;
5546 config.TotalItemCount = config.UsedItemCountMax * 10;
5547 config.UsedItemCountMin = config.UsedItemCountMax * 80 / 100;
5548
5549 g_MemoryAliasingWarningEnabled = false;
5550 PoolTestResult result = {};
5551 TestPool_Benchmark(result, config);
5552 g_MemoryAliasingWarningEnabled = true;
5553
5554 WritePoolTestResult(file, "Code desc", "Test desc", config, result);
5555 }
5556
PerformMainTests(FILE * file)5557 static void PerformMainTests(FILE* file)
5558 {
5559 uint32_t repeatCount = 1;
5560 if(ConfigType >= CONFIG_TYPE_MAXIMUM) repeatCount = 3;
5561
5562 Config config{};
5563 config.RandSeed = 65735476;
5564 config.MemUsageProbability[0] = 1; // VMA_MEMORY_USAGE_GPU_ONLY
5565 config.FreeOrder = FREE_ORDER::FORWARD;
5566
5567 size_t threadCountCount = 1;
5568 switch(ConfigType)
5569 {
5570 case CONFIG_TYPE_MINIMUM: threadCountCount = 1; break;
5571 case CONFIG_TYPE_SMALL: threadCountCount = 2; break;
5572 case CONFIG_TYPE_AVERAGE: threadCountCount = 3; break;
5573 case CONFIG_TYPE_LARGE: threadCountCount = 5; break;
5574 case CONFIG_TYPE_MAXIMUM: threadCountCount = 7; break;
5575 default: assert(0);
5576 }
5577
5578 const size_t strategyCount = GetAllocationStrategyCount();
5579
5580 for(size_t threadCountIndex = 0; threadCountIndex < threadCountCount; ++threadCountIndex)
5581 {
5582 std::string desc1;
5583
5584 switch(threadCountIndex)
5585 {
5586 case 0:
5587 desc1 += "1_thread";
5588 config.ThreadCount = 1;
5589 config.ThreadsUsingCommonAllocationsProbabilityPercent = 0;
5590 break;
5591 case 1:
5592 desc1 += "16_threads+0%_common";
5593 config.ThreadCount = 16;
5594 config.ThreadsUsingCommonAllocationsProbabilityPercent = 0;
5595 break;
5596 case 2:
5597 desc1 += "16_threads+50%_common";
5598 config.ThreadCount = 16;
5599 config.ThreadsUsingCommonAllocationsProbabilityPercent = 50;
5600 break;
5601 case 3:
5602 desc1 += "16_threads+100%_common";
5603 config.ThreadCount = 16;
5604 config.ThreadsUsingCommonAllocationsProbabilityPercent = 100;
5605 break;
5606 case 4:
5607 desc1 += "2_threads+0%_common";
5608 config.ThreadCount = 2;
5609 config.ThreadsUsingCommonAllocationsProbabilityPercent = 0;
5610 break;
5611 case 5:
5612 desc1 += "2_threads+50%_common";
5613 config.ThreadCount = 2;
5614 config.ThreadsUsingCommonAllocationsProbabilityPercent = 50;
5615 break;
5616 case 6:
5617 desc1 += "2_threads+100%_common";
5618 config.ThreadCount = 2;
5619 config.ThreadsUsingCommonAllocationsProbabilityPercent = 100;
5620 break;
5621 default:
5622 assert(0);
5623 }
5624
5625 // 0 = buffers, 1 = images, 2 = buffers and images
5626 size_t buffersVsImagesCount = 2;
5627 if(ConfigType >= CONFIG_TYPE_LARGE) ++buffersVsImagesCount;
5628 for(size_t buffersVsImagesIndex = 0; buffersVsImagesIndex < buffersVsImagesCount; ++buffersVsImagesIndex)
5629 {
5630 std::string desc2 = desc1;
5631 switch(buffersVsImagesIndex)
5632 {
5633 case 0: desc2 += ",Buffers"; break;
5634 case 1: desc2 += ",Images"; break;
5635 case 2: desc2 += ",Buffers+Images"; break;
5636 default: assert(0);
5637 }
5638
5639 // 0 = small, 1 = large, 2 = small and large
5640 size_t smallVsLargeCount = 2;
5641 if(ConfigType >= CONFIG_TYPE_LARGE) ++smallVsLargeCount;
5642 for(size_t smallVsLargeIndex = 0; smallVsLargeIndex < smallVsLargeCount; ++smallVsLargeIndex)
5643 {
5644 std::string desc3 = desc2;
5645 switch(smallVsLargeIndex)
5646 {
5647 case 0: desc3 += ",Small"; break;
5648 case 1: desc3 += ",Large"; break;
5649 case 2: desc3 += ",Small+Large"; break;
5650 default: assert(0);
5651 }
5652
5653 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
5654 config.MaxBytesToAllocate = 4ull * 1024 * 1024 * 1024; // 4 GB
5655 else
5656 config.MaxBytesToAllocate = 4ull * 1024 * 1024;
5657
5658 // 0 = varying sizes min...max, 1 = set of constant sizes
5659 size_t constantSizesCount = 1;
5660 if(ConfigType >= CONFIG_TYPE_SMALL) ++constantSizesCount;
5661 for(size_t constantSizesIndex = 0; constantSizesIndex < constantSizesCount; ++constantSizesIndex)
5662 {
5663 std::string desc4 = desc3;
5664 switch(constantSizesIndex)
5665 {
5666 case 0: desc4 += " Varying_sizes"; break;
5667 case 1: desc4 += " Constant_sizes"; break;
5668 default: assert(0);
5669 }
5670
5671 config.AllocationSizes.clear();
5672 // Buffers present
5673 if(buffersVsImagesIndex == 0 || buffersVsImagesIndex == 2)
5674 {
5675 // Small
5676 if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
5677 {
5678 // Varying size
5679 if(constantSizesIndex == 0)
5680 config.AllocationSizes.push_back({4, 16, 1024});
5681 // Constant sizes
5682 else
5683 {
5684 config.AllocationSizes.push_back({1, 16, 16});
5685 config.AllocationSizes.push_back({1, 64, 64});
5686 config.AllocationSizes.push_back({1, 256, 256});
5687 config.AllocationSizes.push_back({1, 1024, 1024});
5688 }
5689 }
5690 // Large
5691 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
5692 {
5693 // Varying size
5694 if(constantSizesIndex == 0)
5695 config.AllocationSizes.push_back({4, 0x10000, 0xA00000}); // 64 KB ... 10 MB
5696 // Constant sizes
5697 else
5698 {
5699 config.AllocationSizes.push_back({1, 0x10000, 0x10000});
5700 config.AllocationSizes.push_back({1, 0x80000, 0x80000});
5701 config.AllocationSizes.push_back({1, 0x200000, 0x200000});
5702 config.AllocationSizes.push_back({1, 0xA00000, 0xA00000});
5703 }
5704 }
5705 }
5706 // Images present
5707 if(buffersVsImagesIndex == 1 || buffersVsImagesIndex == 2)
5708 {
5709 // Small
5710 if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
5711 {
5712 // Varying size
5713 if(constantSizesIndex == 0)
5714 config.AllocationSizes.push_back({4, 0, 0, 4, 32});
5715 // Constant sizes
5716 else
5717 {
5718 config.AllocationSizes.push_back({1, 0, 0, 4, 4});
5719 config.AllocationSizes.push_back({1, 0, 0, 8, 8});
5720 config.AllocationSizes.push_back({1, 0, 0, 16, 16});
5721 config.AllocationSizes.push_back({1, 0, 0, 32, 32});
5722 }
5723 }
5724 // Large
5725 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
5726 {
5727 // Varying size
5728 if(constantSizesIndex == 0)
5729 config.AllocationSizes.push_back({4, 0, 0, 256, 2048});
5730 // Constant sizes
5731 else
5732 {
5733 config.AllocationSizes.push_back({1, 0, 0, 256, 256});
5734 config.AllocationSizes.push_back({1, 0, 0, 512, 512});
5735 config.AllocationSizes.push_back({1, 0, 0, 1024, 1024});
5736 config.AllocationSizes.push_back({1, 0, 0, 2048, 2048});
5737 }
5738 }
5739 }
5740
5741 // 0 = 100%, additional_operations = 0, 1 = 50%, 2 = 5%, 3 = 95% additional_operations = a lot
5742 size_t beginBytesToAllocateCount = 1;
5743 if(ConfigType >= CONFIG_TYPE_SMALL) ++beginBytesToAllocateCount;
5744 if(ConfigType >= CONFIG_TYPE_AVERAGE) ++beginBytesToAllocateCount;
5745 if(ConfigType >= CONFIG_TYPE_LARGE) ++beginBytesToAllocateCount;
5746 for(size_t beginBytesToAllocateIndex = 0; beginBytesToAllocateIndex < beginBytesToAllocateCount; ++beginBytesToAllocateIndex)
5747 {
5748 std::string desc5 = desc4;
5749
5750 switch(beginBytesToAllocateIndex)
5751 {
5752 case 0:
5753 desc5 += ",Allocate_100%";
5754 config.BeginBytesToAllocate = config.MaxBytesToAllocate;
5755 config.AdditionalOperationCount = 0;
5756 break;
5757 case 1:
5758 desc5 += ",Allocate_50%+Operations";
5759 config.BeginBytesToAllocate = config.MaxBytesToAllocate * 50 / 100;
5760 config.AdditionalOperationCount = 1024;
5761 break;
5762 case 2:
5763 desc5 += ",Allocate_5%+Operations";
5764 config.BeginBytesToAllocate = config.MaxBytesToAllocate * 5 / 100;
5765 config.AdditionalOperationCount = 1024;
5766 break;
5767 case 3:
5768 desc5 += ",Allocate_95%+Operations";
5769 config.BeginBytesToAllocate = config.MaxBytesToAllocate * 95 / 100;
5770 config.AdditionalOperationCount = 1024;
5771 break;
5772 default:
5773 assert(0);
5774 }
5775
5776 for(size_t strategyIndex = 0; strategyIndex < strategyCount; ++strategyIndex)
5777 {
5778 std::string desc6 = desc5;
5779 switch(strategyIndex)
5780 {
5781 case 0:
5782 desc6 += ",BestFit";
5783 config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT;
5784 break;
5785 case 1:
5786 desc6 += ",WorstFit";
5787 config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT;
5788 break;
5789 case 2:
5790 desc6 += ",FirstFit";
5791 config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT;
5792 break;
5793 default:
5794 assert(0);
5795 }
5796
5797 desc6 += ',';
5798 desc6 += FREE_ORDER_NAMES[(uint32_t)config.FreeOrder];
5799
5800 const char* testDescription = desc6.c_str();
5801
5802 for(size_t repeat = 0; repeat < repeatCount; ++repeat)
5803 {
5804 printf("%s #%u\n", testDescription, (uint32_t)repeat);
5805
5806 Result result{};
5807 VkResult res = MainTest(result, config);
5808 TEST(res == VK_SUCCESS);
5809 if(file)
5810 {
5811 WriteMainTestResult(file, CODE_DESCRIPTION, testDescription, config, result);
5812 }
5813 }
5814 }
5815 }
5816 }
5817 }
5818 }
5819 }
5820 }
5821
PerformPoolTests(FILE * file)5822 static void PerformPoolTests(FILE* file)
5823 {
5824 const size_t AVG_RESOURCES_PER_POOL = 300;
5825
5826 uint32_t repeatCount = 1;
5827 if(ConfigType >= CONFIG_TYPE_MAXIMUM) repeatCount = 3;
5828
5829 PoolTestConfig config{};
5830 config.RandSeed = 2346343;
5831 config.FrameCount = 200;
5832 config.ItemsToMakeUnusedPercent = 2;
5833
5834 size_t threadCountCount = 1;
5835 switch(ConfigType)
5836 {
5837 case CONFIG_TYPE_MINIMUM: threadCountCount = 1; break;
5838 case CONFIG_TYPE_SMALL: threadCountCount = 2; break;
5839 case CONFIG_TYPE_AVERAGE: threadCountCount = 2; break;
5840 case CONFIG_TYPE_LARGE: threadCountCount = 3; break;
5841 case CONFIG_TYPE_MAXIMUM: threadCountCount = 3; break;
5842 default: assert(0);
5843 }
5844 for(size_t threadCountIndex = 0; threadCountIndex < threadCountCount; ++threadCountIndex)
5845 {
5846 std::string desc1;
5847
5848 switch(threadCountIndex)
5849 {
5850 case 0:
5851 desc1 += "1_thread";
5852 config.ThreadCount = 1;
5853 break;
5854 case 1:
5855 desc1 += "16_threads";
5856 config.ThreadCount = 16;
5857 break;
5858 case 2:
5859 desc1 += "2_threads";
5860 config.ThreadCount = 2;
5861 break;
5862 default:
5863 assert(0);
5864 }
5865
5866 // 0 = buffers, 1 = images, 2 = buffers and images
5867 size_t buffersVsImagesCount = 2;
5868 if(ConfigType >= CONFIG_TYPE_LARGE) ++buffersVsImagesCount;
5869 for(size_t buffersVsImagesIndex = 0; buffersVsImagesIndex < buffersVsImagesCount; ++buffersVsImagesIndex)
5870 {
5871 std::string desc2 = desc1;
5872 switch(buffersVsImagesIndex)
5873 {
5874 case 0: desc2 += " Buffers"; break;
5875 case 1: desc2 += " Images"; break;
5876 case 2: desc2 += " Buffers+Images"; break;
5877 default: assert(0);
5878 }
5879
5880 // 0 = small, 1 = large, 2 = small and large
5881 size_t smallVsLargeCount = 2;
5882 if(ConfigType >= CONFIG_TYPE_LARGE) ++smallVsLargeCount;
5883 for(size_t smallVsLargeIndex = 0; smallVsLargeIndex < smallVsLargeCount; ++smallVsLargeIndex)
5884 {
5885 std::string desc3 = desc2;
5886 switch(smallVsLargeIndex)
5887 {
5888 case 0: desc3 += " Small"; break;
5889 case 1: desc3 += " Large"; break;
5890 case 2: desc3 += " Small+Large"; break;
5891 default: assert(0);
5892 }
5893
5894 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
5895 config.PoolSize = 6ull * 1024 * 1024 * 1024; // 6 GB
5896 else
5897 config.PoolSize = 4ull * 1024 * 1024;
5898
5899 // 0 = varying sizes min...max, 1 = set of constant sizes
5900 size_t constantSizesCount = 1;
5901 if(ConfigType >= CONFIG_TYPE_SMALL) ++constantSizesCount;
5902 for(size_t constantSizesIndex = 0; constantSizesIndex < constantSizesCount; ++constantSizesIndex)
5903 {
5904 std::string desc4 = desc3;
5905 switch(constantSizesIndex)
5906 {
5907 case 0: desc4 += " Varying_sizes"; break;
5908 case 1: desc4 += " Constant_sizes"; break;
5909 default: assert(0);
5910 }
5911
5912 config.AllocationSizes.clear();
5913 // Buffers present
5914 if(buffersVsImagesIndex == 0 || buffersVsImagesIndex == 2)
5915 {
5916 // Small
5917 if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
5918 {
5919 // Varying size
5920 if(constantSizesIndex == 0)
5921 config.AllocationSizes.push_back({4, 16, 1024});
5922 // Constant sizes
5923 else
5924 {
5925 config.AllocationSizes.push_back({1, 16, 16});
5926 config.AllocationSizes.push_back({1, 64, 64});
5927 config.AllocationSizes.push_back({1, 256, 256});
5928 config.AllocationSizes.push_back({1, 1024, 1024});
5929 }
5930 }
5931 // Large
5932 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
5933 {
5934 // Varying size
5935 if(constantSizesIndex == 0)
5936 config.AllocationSizes.push_back({4, 0x10000, 0xA00000}); // 64 KB ... 10 MB
5937 // Constant sizes
5938 else
5939 {
5940 config.AllocationSizes.push_back({1, 0x10000, 0x10000});
5941 config.AllocationSizes.push_back({1, 0x80000, 0x80000});
5942 config.AllocationSizes.push_back({1, 0x200000, 0x200000});
5943 config.AllocationSizes.push_back({1, 0xA00000, 0xA00000});
5944 }
5945 }
5946 }
5947 // Images present
5948 if(buffersVsImagesIndex == 1 || buffersVsImagesIndex == 2)
5949 {
5950 // Small
5951 if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
5952 {
5953 // Varying size
5954 if(constantSizesIndex == 0)
5955 config.AllocationSizes.push_back({4, 0, 0, 4, 32});
5956 // Constant sizes
5957 else
5958 {
5959 config.AllocationSizes.push_back({1, 0, 0, 4, 4});
5960 config.AllocationSizes.push_back({1, 0, 0, 8, 8});
5961 config.AllocationSizes.push_back({1, 0, 0, 16, 16});
5962 config.AllocationSizes.push_back({1, 0, 0, 32, 32});
5963 }
5964 }
5965 // Large
5966 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
5967 {
5968 // Varying size
5969 if(constantSizesIndex == 0)
5970 config.AllocationSizes.push_back({4, 0, 0, 256, 2048});
5971 // Constant sizes
5972 else
5973 {
5974 config.AllocationSizes.push_back({1, 0, 0, 256, 256});
5975 config.AllocationSizes.push_back({1, 0, 0, 512, 512});
5976 config.AllocationSizes.push_back({1, 0, 0, 1024, 1024});
5977 config.AllocationSizes.push_back({1, 0, 0, 2048, 2048});
5978 }
5979 }
5980 }
5981
5982 const VkDeviceSize avgResourceSize = config.CalcAvgResourceSize();
5983 config.PoolSize = avgResourceSize * AVG_RESOURCES_PER_POOL;
5984
5985 // 0 = 66%, 1 = 133%, 2 = 100%, 3 = 33%, 4 = 166%
5986 size_t subscriptionModeCount;
5987 switch(ConfigType)
5988 {
5989 case CONFIG_TYPE_MINIMUM: subscriptionModeCount = 2; break;
5990 case CONFIG_TYPE_SMALL: subscriptionModeCount = 2; break;
5991 case CONFIG_TYPE_AVERAGE: subscriptionModeCount = 3; break;
5992 case CONFIG_TYPE_LARGE: subscriptionModeCount = 5; break;
5993 case CONFIG_TYPE_MAXIMUM: subscriptionModeCount = 5; break;
5994 default: assert(0);
5995 }
5996 for(size_t subscriptionModeIndex = 0; subscriptionModeIndex < subscriptionModeCount; ++subscriptionModeIndex)
5997 {
5998 std::string desc5 = desc4;
5999
6000 switch(subscriptionModeIndex)
6001 {
6002 case 0:
6003 desc5 += " Subscription_66%";
6004 config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 66 / 100;
6005 break;
6006 case 1:
6007 desc5 += " Subscription_133%";
6008 config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 133 / 100;
6009 break;
6010 case 2:
6011 desc5 += " Subscription_100%";
6012 config.UsedItemCountMax = AVG_RESOURCES_PER_POOL;
6013 break;
6014 case 3:
6015 desc5 += " Subscription_33%";
6016 config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 33 / 100;
6017 break;
6018 case 4:
6019 desc5 += " Subscription_166%";
6020 config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 166 / 100;
6021 break;
6022 default:
6023 assert(0);
6024 }
6025
6026 config.TotalItemCount = config.UsedItemCountMax * 5;
6027 config.UsedItemCountMin = config.UsedItemCountMax * 80 / 100;
6028
6029 const char* testDescription = desc5.c_str();
6030
6031 for(size_t repeat = 0; repeat < repeatCount; ++repeat)
6032 {
6033 printf("%s #%u\n", testDescription, (uint32_t)repeat);
6034
6035 PoolTestResult result{};
6036 g_MemoryAliasingWarningEnabled = false;
6037 TestPool_Benchmark(result, config);
6038 g_MemoryAliasingWarningEnabled = true;
6039 WritePoolTestResult(file, CODE_DESCRIPTION, testDescription, config, result);
6040 }
6041 }
6042 }
6043 }
6044 }
6045 }
6046 }
6047
BasicTestBuddyAllocator()6048 static void BasicTestBuddyAllocator()
6049 {
6050 wprintf(L"Basic test buddy allocator\n");
6051
6052 RandomNumberGenerator rand{76543};
6053
6054 VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
6055 sampleBufCreateInfo.size = 1024; // Whatever.
6056 sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
6057
6058 VmaAllocationCreateInfo sampleAllocCreateInfo = {};
6059 sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
6060
6061 VmaPoolCreateInfo poolCreateInfo = {};
6062 VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
6063 TEST(res == VK_SUCCESS);
6064
6065 // Deliberately adding 1023 to test usable size smaller than memory block size.
6066 poolCreateInfo.blockSize = 1024 * 1024 + 1023;
6067 poolCreateInfo.flags = VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT;
6068 //poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
6069
6070 VmaPool pool = nullptr;
6071 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
6072 TEST(res == VK_SUCCESS);
6073
6074 VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
6075
6076 VmaAllocationCreateInfo allocCreateInfo = {};
6077 allocCreateInfo.pool = pool;
6078
6079 std::vector<BufferInfo> bufInfo;
6080 BufferInfo newBufInfo;
6081 VmaAllocationInfo allocInfo;
6082
6083 bufCreateInfo.size = 1024 * 256;
6084 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6085 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6086 TEST(res == VK_SUCCESS);
6087 bufInfo.push_back(newBufInfo);
6088
6089 bufCreateInfo.size = 1024 * 512;
6090 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6091 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6092 TEST(res == VK_SUCCESS);
6093 bufInfo.push_back(newBufInfo);
6094
6095 bufCreateInfo.size = 1024 * 128;
6096 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6097 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6098 TEST(res == VK_SUCCESS);
6099 bufInfo.push_back(newBufInfo);
6100
6101 // Test very small allocation, smaller than minimum node size.
6102 bufCreateInfo.size = 1;
6103 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6104 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6105 TEST(res == VK_SUCCESS);
6106 bufInfo.push_back(newBufInfo);
6107
6108 // Test some small allocation with alignment requirement.
6109 {
6110 VkMemoryRequirements memReq;
6111 memReq.alignment = 256;
6112 memReq.memoryTypeBits = UINT32_MAX;
6113 memReq.size = 32;
6114
6115 newBufInfo.Buffer = VK_NULL_HANDLE;
6116 res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo,
6117 &newBufInfo.Allocation, &allocInfo);
6118 TEST(res == VK_SUCCESS);
6119 TEST(allocInfo.offset % memReq.alignment == 0);
6120 bufInfo.push_back(newBufInfo);
6121 }
6122
6123 //SaveAllocatorStatsToFile(L"TEST.json");
6124
6125 VmaPoolStats stats = {};
6126 vmaGetPoolStats(g_hAllocator, pool, &stats);
6127 int DBG = 0; // Set breakpoint here to inspect `stats`.
6128
6129 // Allocate enough new buffers to surely fall into second block.
6130 for(uint32_t i = 0; i < 32; ++i)
6131 {
6132 bufCreateInfo.size = 1024 * (rand.Generate() % 32 + 1);
6133 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6134 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6135 TEST(res == VK_SUCCESS);
6136 bufInfo.push_back(newBufInfo);
6137 }
6138
6139 SaveAllocatorStatsToFile(L"BuddyTest01.json");
6140
6141 // Destroy the buffers in random order.
6142 while(!bufInfo.empty())
6143 {
6144 const size_t indexToDestroy = rand.Generate() % bufInfo.size();
6145 const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
6146 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
6147 bufInfo.erase(bufInfo.begin() + indexToDestroy);
6148 }
6149
6150 vmaDestroyPool(g_hAllocator, pool);
6151 }
6152
BasicTestAllocatePages()6153 static void BasicTestAllocatePages()
6154 {
6155 wprintf(L"Basic test allocate pages\n");
6156
6157 RandomNumberGenerator rand{765461};
6158
6159 VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
6160 sampleBufCreateInfo.size = 1024; // Whatever.
6161 sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
6162
6163 VmaAllocationCreateInfo sampleAllocCreateInfo = {};
6164 sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
6165
6166 VmaPoolCreateInfo poolCreateInfo = {};
6167 VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
6168 TEST(res == VK_SUCCESS);
6169
6170 // 1 block of 1 MB.
6171 poolCreateInfo.blockSize = 1024 * 1024;
6172 poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
6173
6174 // Create pool.
6175 VmaPool pool = nullptr;
6176 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
6177 TEST(res == VK_SUCCESS);
6178
6179 // Make 100 allocations of 4 KB - they should fit into the pool.
6180 VkMemoryRequirements memReq;
6181 memReq.memoryTypeBits = UINT32_MAX;
6182 memReq.alignment = 4 * 1024;
6183 memReq.size = 4 * 1024;
6184
6185 VmaAllocationCreateInfo allocCreateInfo = {};
6186 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
6187 allocCreateInfo.pool = pool;
6188
6189 constexpr uint32_t allocCount = 100;
6190
6191 std::vector<VmaAllocation> alloc{allocCount};
6192 std::vector<VmaAllocationInfo> allocInfo{allocCount};
6193 res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &allocCreateInfo, allocCount, alloc.data(), allocInfo.data());
6194 TEST(res == VK_SUCCESS);
6195 for(uint32_t i = 0; i < allocCount; ++i)
6196 {
6197 TEST(alloc[i] != VK_NULL_HANDLE &&
6198 allocInfo[i].pMappedData != nullptr &&
6199 allocInfo[i].deviceMemory == allocInfo[0].deviceMemory &&
6200 allocInfo[i].memoryType == allocInfo[0].memoryType);
6201 }
6202
6203 // Free the allocations.
6204 vmaFreeMemoryPages(g_hAllocator, allocCount, alloc.data());
6205 std::fill(alloc.begin(), alloc.end(), nullptr);
6206 std::fill(allocInfo.begin(), allocInfo.end(), VmaAllocationInfo{});
6207
6208 // Try to make 100 allocations of 100 KB. This call should fail due to not enough memory.
6209 // Also test optional allocationInfo = null.
6210 memReq.size = 100 * 1024;
6211 res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &allocCreateInfo, allocCount, alloc.data(), nullptr);
6212 TEST(res != VK_SUCCESS);
6213 TEST(std::find_if(alloc.begin(), alloc.end(), [](VmaAllocation alloc){ return alloc != VK_NULL_HANDLE; }) == alloc.end());
6214
6215 // Make 100 allocations of 4 KB, but with required alignment of 128 KB. This should also fail.
6216 memReq.size = 4 * 1024;
6217 memReq.alignment = 128 * 1024;
6218 res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &allocCreateInfo, allocCount, alloc.data(), allocInfo.data());
6219 TEST(res != VK_SUCCESS);
6220
6221 // Make 100 dedicated allocations of 4 KB.
6222 memReq.alignment = 4 * 1024;
6223 memReq.size = 4 * 1024;
6224
6225 VmaAllocationCreateInfo dedicatedAllocCreateInfo = {};
6226 dedicatedAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
6227 dedicatedAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
6228 res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &dedicatedAllocCreateInfo, allocCount, alloc.data(), allocInfo.data());
6229 TEST(res == VK_SUCCESS);
6230 for(uint32_t i = 0; i < allocCount; ++i)
6231 {
6232 TEST(alloc[i] != VK_NULL_HANDLE &&
6233 allocInfo[i].pMappedData != nullptr &&
6234 allocInfo[i].memoryType == allocInfo[0].memoryType &&
6235 allocInfo[i].offset == 0);
6236 if(i > 0)
6237 {
6238 TEST(allocInfo[i].deviceMemory != allocInfo[0].deviceMemory);
6239 }
6240 }
6241
6242 // Free the allocations.
6243 vmaFreeMemoryPages(g_hAllocator, allocCount, alloc.data());
6244 std::fill(alloc.begin(), alloc.end(), nullptr);
6245 std::fill(allocInfo.begin(), allocInfo.end(), VmaAllocationInfo{});
6246
6247 vmaDestroyPool(g_hAllocator, pool);
6248 }
6249
6250 // Test the testing environment.
TestGpuData()6251 static void TestGpuData()
6252 {
6253 RandomNumberGenerator rand = { 53434 };
6254
6255 std::vector<AllocInfo> allocInfo;
6256
6257 for(size_t i = 0; i < 100; ++i)
6258 {
6259 AllocInfo info = {};
6260
6261 info.m_BufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
6262 info.m_BufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
6263 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
6264 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
6265 info.m_BufferInfo.size = 1024 * 1024 * (rand.Generate() % 9 + 1);
6266
6267 VmaAllocationCreateInfo allocCreateInfo = {};
6268 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
6269
6270 VkResult res = vmaCreateBuffer(g_hAllocator, &info.m_BufferInfo, &allocCreateInfo, &info.m_Buffer, &info.m_Allocation, nullptr);
6271 TEST(res == VK_SUCCESS);
6272
6273 info.m_StartValue = rand.Generate();
6274
6275 allocInfo.push_back(std::move(info));
6276 }
6277
6278 UploadGpuData(allocInfo.data(), allocInfo.size());
6279
6280 ValidateGpuData(allocInfo.data(), allocInfo.size());
6281
6282 DestroyAllAllocations(allocInfo);
6283 }
6284
Test()6285 void Test()
6286 {
6287 wprintf(L"TESTING:\n");
6288
6289 if(false)
6290 {
6291 ////////////////////////////////////////////////////////////////////////////////
6292 // Temporarily insert custom tests here:
6293 return;
6294 }
6295
6296 // # Simple tests
6297
6298 TestBasics();
6299 //TestGpuData(); // Not calling this because it's just testing the testing environment.
6300 #if VMA_DEBUG_MARGIN
6301 TestDebugMargin();
6302 #else
6303 TestPool_SameSize();
6304 TestPool_MinBlockCount();
6305 TestHeapSizeLimit();
6306 #endif
6307 #if VMA_DEBUG_INITIALIZE_ALLOCATIONS
6308 TestAllocationsInitialization();
6309 #endif
6310 TestMemoryUsage();
6311 TestDeviceCoherentMemory();
6312 TestBudget();
6313 TestMapping();
6314 TestDeviceLocalMapped();
6315 TestMappingMultithreaded();
6316 TestLinearAllocator();
6317 ManuallyTestLinearAllocator();
6318 TestLinearAllocatorMultiBlock();
6319
6320 BasicTestBuddyAllocator();
6321 BasicTestAllocatePages();
6322
6323 if(g_BufferDeviceAddressEnabled)
6324 TestBufferDeviceAddress();
6325
6326 {
6327 FILE* file;
6328 fopen_s(&file, "Algorithms.csv", "w");
6329 assert(file != NULL);
6330 BenchmarkAlgorithms(file);
6331 fclose(file);
6332 }
6333
6334 TestDefragmentationSimple();
6335 TestDefragmentationFull();
6336 TestDefragmentationWholePool();
6337 TestDefragmentationGpu();
6338 TestDefragmentationIncrementalBasic();
6339 TestDefragmentationIncrementalComplex();
6340
6341 // # Detailed tests
6342 FILE* file;
6343 fopen_s(&file, "Results.csv", "w");
6344 assert(file != NULL);
6345
6346 WriteMainTestResultHeader(file);
6347 PerformMainTests(file);
6348 //PerformCustomMainTest(file);
6349
6350 WritePoolTestResultHeader(file);
6351 PerformPoolTests(file);
6352 //PerformCustomPoolTest(file);
6353
6354 fclose(file);
6355
6356 wprintf(L"Done.\n");
6357 }
6358
6359 #endif // #ifdef _WIN32
6360