• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017-2020 Advanced Micro Devices, Inc. All rights reserved.
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
5 // of this software and associated documentation files (the "Software"), to deal
6 // in the Software without restriction, including without limitation the rights
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the Software is
9 // furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 // THE SOFTWARE.
21 //
22 
23 #include "Tests.h"
24 #include "VmaUsage.h"
25 #include "Common.h"
26 #include <atomic>
27 #include <thread>
28 #include <mutex>
29 #include <functional>
30 
31 #ifdef _WIN32
32 
33 static const char* CODE_DESCRIPTION = "Foo";
34 
35 extern VkCommandBuffer g_hTemporaryCommandBuffer;
36 extern const VkAllocationCallbacks* g_Allocs;
37 extern bool g_BufferDeviceAddressEnabled;
38 extern PFN_vkGetBufferDeviceAddressEXT g_vkGetBufferDeviceAddressEXT;
39 void BeginSingleTimeCommands();
40 void EndSingleTimeCommands();
41 
42 #ifndef VMA_DEBUG_MARGIN
43     #define VMA_DEBUG_MARGIN 0
44 #endif
45 
46 enum CONFIG_TYPE {
47     CONFIG_TYPE_MINIMUM,
48     CONFIG_TYPE_SMALL,
49     CONFIG_TYPE_AVERAGE,
50     CONFIG_TYPE_LARGE,
51     CONFIG_TYPE_MAXIMUM,
52     CONFIG_TYPE_COUNT
53 };
54 
55 static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;
56 //static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;
57 
58 enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT };
59 
60 static const char* FREE_ORDER_NAMES[] = {
61     "FORWARD",
62     "BACKWARD",
63     "RANDOM",
64 };
65 
66 // Copy of internal VmaAlgorithmToStr.
AlgorithmToStr(uint32_t algorithm)67 static const char* AlgorithmToStr(uint32_t algorithm)
68 {
69     switch(algorithm)
70     {
71     case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT:
72         return "Linear";
73     case VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT:
74         return "Buddy";
75     case 0:
76         return "Default";
77     default:
78         assert(0);
79         return "";
80     }
81 }
82 
83 struct AllocationSize
84 {
85     uint32_t Probability;
86     VkDeviceSize BufferSizeMin, BufferSizeMax;
87     uint32_t ImageSizeMin, ImageSizeMax;
88 };
89 
90 struct Config
91 {
92     uint32_t RandSeed;
93     VkDeviceSize BeginBytesToAllocate;
94     uint32_t AdditionalOperationCount;
95     VkDeviceSize MaxBytesToAllocate;
96     uint32_t MemUsageProbability[4]; // For VMA_MEMORY_USAGE_*
97     std::vector<AllocationSize> AllocationSizes;
98     uint32_t ThreadCount;
99     uint32_t ThreadsUsingCommonAllocationsProbabilityPercent;
100     FREE_ORDER FreeOrder;
101     VmaAllocationCreateFlags AllocationStrategy; // For VMA_ALLOCATION_CREATE_STRATEGY_*
102 };
103 
104 struct Result
105 {
106     duration TotalTime;
107     duration AllocationTimeMin, AllocationTimeAvg, AllocationTimeMax;
108     duration DeallocationTimeMin, DeallocationTimeAvg, DeallocationTimeMax;
109     VkDeviceSize TotalMemoryAllocated;
110     VkDeviceSize FreeRangeSizeAvg, FreeRangeSizeMax;
111 };
112 
113 void TestDefragmentationSimple();
114 void TestDefragmentationFull();
115 
116 struct PoolTestConfig
117 {
118     uint32_t RandSeed;
119     uint32_t ThreadCount;
120     VkDeviceSize PoolSize;
121     uint32_t FrameCount;
122     uint32_t TotalItemCount;
123     // Range for number of items used in each frame.
124     uint32_t UsedItemCountMin, UsedItemCountMax;
125     // Percent of items to make unused, and possibly make some others used in each frame.
126     uint32_t ItemsToMakeUnusedPercent;
127     std::vector<AllocationSize> AllocationSizes;
128 
CalcAvgResourceSizePoolTestConfig129     VkDeviceSize CalcAvgResourceSize() const
130     {
131         uint32_t probabilitySum = 0;
132         VkDeviceSize sizeSum = 0;
133         for(size_t i = 0; i < AllocationSizes.size(); ++i)
134         {
135             const AllocationSize& allocSize = AllocationSizes[i];
136             if(allocSize.BufferSizeMax > 0)
137                 sizeSum += (allocSize.BufferSizeMin + allocSize.BufferSizeMax) / 2 * allocSize.Probability;
138             else
139             {
140                 const VkDeviceSize avgDimension = (allocSize.ImageSizeMin + allocSize.ImageSizeMax) / 2;
141                 sizeSum += avgDimension * avgDimension * 4 * allocSize.Probability;
142             }
143             probabilitySum += allocSize.Probability;
144         }
145         return sizeSum / probabilitySum;
146     }
147 
UsesBuffersPoolTestConfig148     bool UsesBuffers() const
149     {
150         for(size_t i = 0; i < AllocationSizes.size(); ++i)
151             if(AllocationSizes[i].BufferSizeMax > 0)
152                 return true;
153         return false;
154     }
155 
UsesImagesPoolTestConfig156     bool UsesImages() const
157     {
158         for(size_t i = 0; i < AllocationSizes.size(); ++i)
159             if(AllocationSizes[i].ImageSizeMax > 0)
160                 return true;
161         return false;
162     }
163 };
164 
165 struct PoolTestResult
166 {
167     duration TotalTime;
168     duration AllocationTimeMin, AllocationTimeAvg, AllocationTimeMax;
169     duration DeallocationTimeMin, DeallocationTimeAvg, DeallocationTimeMax;
170     size_t LostAllocationCount, LostAllocationTotalSize;
171     size_t FailedAllocationCount, FailedAllocationTotalSize;
172 };
173 
174 static const uint32_t IMAGE_BYTES_PER_PIXEL = 1;
175 
176 uint32_t g_FrameIndex = 0;
177 
178 struct BufferInfo
179 {
180     VkBuffer Buffer = VK_NULL_HANDLE;
181     VmaAllocation Allocation = VK_NULL_HANDLE;
182 };
183 
MemoryTypeToHeap(uint32_t memoryTypeIndex)184 static uint32_t MemoryTypeToHeap(uint32_t memoryTypeIndex)
185 {
186     const VkPhysicalDeviceMemoryProperties* props;
187     vmaGetMemoryProperties(g_hAllocator, &props);
188     return props->memoryTypes[memoryTypeIndex].heapIndex;
189 }
190 
GetAllocationStrategyCount()191 static uint32_t GetAllocationStrategyCount()
192 {
193     uint32_t strategyCount = 0;
194     switch(ConfigType)
195     {
196     case CONFIG_TYPE_MINIMUM: strategyCount = 1; break;
197     case CONFIG_TYPE_SMALL:   strategyCount = 1; break;
198     case CONFIG_TYPE_AVERAGE: strategyCount = 2; break;
199     case CONFIG_TYPE_LARGE:   strategyCount = 2; break;
200     case CONFIG_TYPE_MAXIMUM: strategyCount = 3; break;
201     default: assert(0);
202     }
203     return strategyCount;
204 }
205 
GetAllocationStrategyName(VmaAllocationCreateFlags allocStrategy)206 static const char* GetAllocationStrategyName(VmaAllocationCreateFlags allocStrategy)
207 {
208     switch(allocStrategy)
209     {
210     case VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT: return "BEST_FIT"; break;
211     case VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT: return "WORST_FIT"; break;
212     case VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT: return "FIRST_FIT"; break;
213     case 0: return "Default"; break;
214     default: assert(0); return "";
215     }
216 }
217 
InitResult(Result & outResult)218 static void InitResult(Result& outResult)
219 {
220     outResult.TotalTime = duration::zero();
221     outResult.AllocationTimeMin = duration::max();
222     outResult.AllocationTimeAvg = duration::zero();
223     outResult.AllocationTimeMax = duration::min();
224     outResult.DeallocationTimeMin = duration::max();
225     outResult.DeallocationTimeAvg = duration::zero();
226     outResult.DeallocationTimeMax = duration::min();
227     outResult.TotalMemoryAllocated = 0;
228     outResult.FreeRangeSizeAvg = 0;
229     outResult.FreeRangeSizeMax = 0;
230 }
231 
232 class TimeRegisterObj
233 {
234 public:
TimeRegisterObj(duration & min,duration & sum,duration & max)235     TimeRegisterObj(duration& min, duration& sum, duration& max) :
236         m_Min(min),
237         m_Sum(sum),
238         m_Max(max),
239         m_TimeBeg(std::chrono::high_resolution_clock::now())
240     {
241     }
242 
~TimeRegisterObj()243     ~TimeRegisterObj()
244     {
245         duration d = std::chrono::high_resolution_clock::now() - m_TimeBeg;
246         m_Sum += d;
247         if(d < m_Min) m_Min = d;
248         if(d > m_Max) m_Max = d;
249     }
250 
251 private:
252     duration& m_Min;
253     duration& m_Sum;
254     duration& m_Max;
255     time_point m_TimeBeg;
256 };
257 
258 struct PoolTestThreadResult
259 {
260     duration AllocationTimeMin, AllocationTimeSum, AllocationTimeMax;
261     duration DeallocationTimeMin, DeallocationTimeSum, DeallocationTimeMax;
262     size_t AllocationCount, DeallocationCount;
263     size_t LostAllocationCount, LostAllocationTotalSize;
264     size_t FailedAllocationCount, FailedAllocationTotalSize;
265 };
266 
267 class AllocationTimeRegisterObj : public TimeRegisterObj
268 {
269 public:
AllocationTimeRegisterObj(Result & result)270     AllocationTimeRegisterObj(Result& result) :
271         TimeRegisterObj(result.AllocationTimeMin, result.AllocationTimeAvg, result.AllocationTimeMax)
272     {
273     }
274 };
275 
276 class DeallocationTimeRegisterObj : public TimeRegisterObj
277 {
278 public:
DeallocationTimeRegisterObj(Result & result)279     DeallocationTimeRegisterObj(Result& result) :
280         TimeRegisterObj(result.DeallocationTimeMin, result.DeallocationTimeAvg, result.DeallocationTimeMax)
281     {
282     }
283 };
284 
285 class PoolAllocationTimeRegisterObj : public TimeRegisterObj
286 {
287 public:
PoolAllocationTimeRegisterObj(PoolTestThreadResult & result)288     PoolAllocationTimeRegisterObj(PoolTestThreadResult& result) :
289         TimeRegisterObj(result.AllocationTimeMin, result.AllocationTimeSum, result.AllocationTimeMax)
290     {
291     }
292 };
293 
294 class PoolDeallocationTimeRegisterObj : public TimeRegisterObj
295 {
296 public:
PoolDeallocationTimeRegisterObj(PoolTestThreadResult & result)297     PoolDeallocationTimeRegisterObj(PoolTestThreadResult& result) :
298         TimeRegisterObj(result.DeallocationTimeMin, result.DeallocationTimeSum, result.DeallocationTimeMax)
299     {
300     }
301 };
302 
CurrentTimeToStr(std::string & out)303 static void CurrentTimeToStr(std::string& out)
304 {
305     time_t rawTime; time(&rawTime);
306     struct tm timeInfo; localtime_s(&timeInfo, &rawTime);
307     char timeStr[128];
308     strftime(timeStr, _countof(timeStr), "%c", &timeInfo);
309     out = timeStr;
310 }
311 
MainTest(Result & outResult,const Config & config)312 VkResult MainTest(Result& outResult, const Config& config)
313 {
314     assert(config.ThreadCount > 0);
315 
316     InitResult(outResult);
317 
318     RandomNumberGenerator mainRand{config.RandSeed};
319 
320     time_point timeBeg = std::chrono::high_resolution_clock::now();
321 
322     std::atomic<size_t> allocationCount = 0;
323     VkResult res = VK_SUCCESS;
324 
325     uint32_t memUsageProbabilitySum =
326         config.MemUsageProbability[0] + config.MemUsageProbability[1] +
327         config.MemUsageProbability[2] + config.MemUsageProbability[3];
328     assert(memUsageProbabilitySum > 0);
329 
330     uint32_t allocationSizeProbabilitySum = std::accumulate(
331         config.AllocationSizes.begin(),
332         config.AllocationSizes.end(),
333         0u,
334         [](uint32_t sum, const AllocationSize& allocSize) {
335             return sum + allocSize.Probability;
336         });
337 
338     struct Allocation
339     {
340         VkBuffer Buffer;
341         VkImage Image;
342         VmaAllocation Alloc;
343     };
344 
345     std::vector<Allocation> commonAllocations;
346     std::mutex commonAllocationsMutex;
347 
348     auto Allocate = [&](
349         VkDeviceSize bufferSize,
350         const VkExtent2D imageExtent,
351         RandomNumberGenerator& localRand,
352         VkDeviceSize& totalAllocatedBytes,
353         std::vector<Allocation>& allocations) -> VkResult
354     {
355         assert((bufferSize == 0) != (imageExtent.width == 0 && imageExtent.height == 0));
356 
357         uint32_t memUsageIndex = 0;
358         uint32_t memUsageRand = localRand.Generate() % memUsageProbabilitySum;
359         while(memUsageRand >= config.MemUsageProbability[memUsageIndex])
360             memUsageRand -= config.MemUsageProbability[memUsageIndex++];
361 
362         VmaAllocationCreateInfo memReq = {};
363         memReq.usage = (VmaMemoryUsage)(VMA_MEMORY_USAGE_GPU_ONLY + memUsageIndex);
364         memReq.flags |= config.AllocationStrategy;
365 
366         Allocation allocation = {};
367         VmaAllocationInfo allocationInfo;
368 
369         // Buffer
370         if(bufferSize > 0)
371         {
372             assert(imageExtent.width == 0);
373             VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
374             bufferInfo.size = bufferSize;
375             bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
376 
377             {
378                 AllocationTimeRegisterObj timeRegisterObj{outResult};
379                 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &memReq, &allocation.Buffer, &allocation.Alloc, &allocationInfo);
380             }
381         }
382         // Image
383         else
384         {
385             VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
386             imageInfo.imageType = VK_IMAGE_TYPE_2D;
387             imageInfo.extent.width = imageExtent.width;
388             imageInfo.extent.height = imageExtent.height;
389             imageInfo.extent.depth = 1;
390             imageInfo.mipLevels = 1;
391             imageInfo.arrayLayers = 1;
392             imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
393             imageInfo.tiling = memReq.usage == VMA_MEMORY_USAGE_GPU_ONLY ?
394                 VK_IMAGE_TILING_OPTIMAL :
395                 VK_IMAGE_TILING_LINEAR;
396             imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
397             switch(memReq.usage)
398             {
399             case VMA_MEMORY_USAGE_GPU_ONLY:
400                 switch(localRand.Generate() % 3)
401                 {
402                 case 0:
403                     imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
404                     break;
405                 case 1:
406                     imageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
407                     break;
408                 case 2:
409                     imageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
410                     break;
411                 }
412                 break;
413             case VMA_MEMORY_USAGE_CPU_ONLY:
414             case VMA_MEMORY_USAGE_CPU_TO_GPU:
415                 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
416                 break;
417             case VMA_MEMORY_USAGE_GPU_TO_CPU:
418                 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT;
419                 break;
420             }
421             imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
422             imageInfo.flags = 0;
423 
424             {
425                 AllocationTimeRegisterObj timeRegisterObj{outResult};
426                 res = vmaCreateImage(g_hAllocator, &imageInfo, &memReq, &allocation.Image, &allocation.Alloc, &allocationInfo);
427             }
428         }
429 
430         if(res == VK_SUCCESS)
431         {
432             ++allocationCount;
433             totalAllocatedBytes += allocationInfo.size;
434             bool useCommonAllocations = localRand.Generate() % 100 < config.ThreadsUsingCommonAllocationsProbabilityPercent;
435             if(useCommonAllocations)
436             {
437                 std::unique_lock<std::mutex> lock(commonAllocationsMutex);
438                 commonAllocations.push_back(allocation);
439             }
440             else
441                 allocations.push_back(allocation);
442         }
443         else
444         {
445             TEST(0);
446         }
447         return res;
448     };
449 
450     auto GetNextAllocationSize = [&](
451         VkDeviceSize& outBufSize,
452         VkExtent2D& outImageSize,
453         RandomNumberGenerator& localRand)
454     {
455         outBufSize = 0;
456         outImageSize = {0, 0};
457 
458         uint32_t allocSizeIndex = 0;
459         uint32_t r = localRand.Generate() % allocationSizeProbabilitySum;
460         while(r >= config.AllocationSizes[allocSizeIndex].Probability)
461             r -= config.AllocationSizes[allocSizeIndex++].Probability;
462 
463         const AllocationSize& allocSize = config.AllocationSizes[allocSizeIndex];
464         if(allocSize.BufferSizeMax > 0)
465         {
466             assert(allocSize.ImageSizeMax == 0);
467             if(allocSize.BufferSizeMax == allocSize.BufferSizeMin)
468                 outBufSize = allocSize.BufferSizeMin;
469             else
470             {
471                 outBufSize = allocSize.BufferSizeMin + localRand.Generate() % (allocSize.BufferSizeMax - allocSize.BufferSizeMin);
472                 outBufSize = outBufSize / 16 * 16;
473             }
474         }
475         else
476         {
477             if(allocSize.ImageSizeMax == allocSize.ImageSizeMin)
478                 outImageSize.width = outImageSize.height = allocSize.ImageSizeMax;
479             else
480             {
481                 outImageSize.width  = allocSize.ImageSizeMin + localRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
482                 outImageSize.height = allocSize.ImageSizeMin + localRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
483             }
484         }
485     };
486 
487     std::atomic<uint32_t> numThreadsReachedMaxAllocations = 0;
488     HANDLE threadsFinishEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
489 
490     auto ThreadProc = [&](uint32_t randSeed) -> void
491     {
492         RandomNumberGenerator threadRand(randSeed);
493         VkDeviceSize threadTotalAllocatedBytes = 0;
494         std::vector<Allocation> threadAllocations;
495         VkDeviceSize threadBeginBytesToAllocate = config.BeginBytesToAllocate / config.ThreadCount;
496         VkDeviceSize threadMaxBytesToAllocate = config.MaxBytesToAllocate / config.ThreadCount;
497         uint32_t threadAdditionalOperationCount = config.AdditionalOperationCount / config.ThreadCount;
498 
499         // BEGIN ALLOCATIONS
500         for(;;)
501         {
502             VkDeviceSize bufferSize = 0;
503             VkExtent2D imageExtent = {};
504             GetNextAllocationSize(bufferSize, imageExtent, threadRand);
505             if(threadTotalAllocatedBytes + bufferSize + imageExtent.width * imageExtent.height * IMAGE_BYTES_PER_PIXEL <
506                 threadBeginBytesToAllocate)
507             {
508                 if(Allocate(bufferSize, imageExtent, threadRand, threadTotalAllocatedBytes, threadAllocations) != VK_SUCCESS)
509                     break;
510             }
511             else
512                 break;
513         }
514 
515         // ADDITIONAL ALLOCATIONS AND FREES
516         for(size_t i = 0; i < threadAdditionalOperationCount; ++i)
517         {
518             VkDeviceSize bufferSize = 0;
519             VkExtent2D imageExtent = {};
520             GetNextAllocationSize(bufferSize, imageExtent, threadRand);
521 
522             // true = allocate, false = free
523             bool allocate = threadRand.Generate() % 2 != 0;
524 
525             if(allocate)
526             {
527                 if(threadTotalAllocatedBytes +
528                     bufferSize +
529                     imageExtent.width * imageExtent.height * IMAGE_BYTES_PER_PIXEL <
530                     threadMaxBytesToAllocate)
531                 {
532                     if(Allocate(bufferSize, imageExtent, threadRand, threadTotalAllocatedBytes, threadAllocations) != VK_SUCCESS)
533                         break;
534                 }
535             }
536             else
537             {
538                 bool useCommonAllocations = threadRand.Generate() % 100 < config.ThreadsUsingCommonAllocationsProbabilityPercent;
539                 if(useCommonAllocations)
540                 {
541                     std::unique_lock<std::mutex> lock(commonAllocationsMutex);
542                     if(!commonAllocations.empty())
543                     {
544                         size_t indexToFree = threadRand.Generate() % commonAllocations.size();
545                         VmaAllocationInfo allocationInfo;
546                         vmaGetAllocationInfo(g_hAllocator, commonAllocations[indexToFree].Alloc, &allocationInfo);
547                         if(threadTotalAllocatedBytes >= allocationInfo.size)
548                         {
549                             DeallocationTimeRegisterObj timeRegisterObj{outResult};
550                             if(commonAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
551                                 vmaDestroyBuffer(g_hAllocator, commonAllocations[indexToFree].Buffer, commonAllocations[indexToFree].Alloc);
552                             else
553                                 vmaDestroyImage(g_hAllocator, commonAllocations[indexToFree].Image, commonAllocations[indexToFree].Alloc);
554                             threadTotalAllocatedBytes -= allocationInfo.size;
555                             commonAllocations.erase(commonAllocations.begin() + indexToFree);
556                         }
557                     }
558                 }
559                 else
560                 {
561                     if(!threadAllocations.empty())
562                     {
563                         size_t indexToFree = threadRand.Generate() % threadAllocations.size();
564                         VmaAllocationInfo allocationInfo;
565                         vmaGetAllocationInfo(g_hAllocator, threadAllocations[indexToFree].Alloc, &allocationInfo);
566                         if(threadTotalAllocatedBytes >= allocationInfo.size)
567                         {
568                             DeallocationTimeRegisterObj timeRegisterObj{outResult};
569                             if(threadAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
570                                 vmaDestroyBuffer(g_hAllocator, threadAllocations[indexToFree].Buffer, threadAllocations[indexToFree].Alloc);
571                             else
572                                 vmaDestroyImage(g_hAllocator, threadAllocations[indexToFree].Image, threadAllocations[indexToFree].Alloc);
573                             threadTotalAllocatedBytes -= allocationInfo.size;
574                             threadAllocations.erase(threadAllocations.begin() + indexToFree);
575                         }
576                     }
577                 }
578             }
579         }
580 
581         ++numThreadsReachedMaxAllocations;
582 
583         WaitForSingleObject(threadsFinishEvent, INFINITE);
584 
585         // DEALLOCATION
586         while(!threadAllocations.empty())
587         {
588             size_t indexToFree = 0;
589             switch(config.FreeOrder)
590             {
591             case FREE_ORDER::FORWARD:
592                 indexToFree = 0;
593                 break;
594             case FREE_ORDER::BACKWARD:
595                 indexToFree = threadAllocations.size() - 1;
596                 break;
597             case FREE_ORDER::RANDOM:
598                 indexToFree = mainRand.Generate() % threadAllocations.size();
599                 break;
600             }
601 
602             {
603                 DeallocationTimeRegisterObj timeRegisterObj{outResult};
604                 if(threadAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
605                     vmaDestroyBuffer(g_hAllocator, threadAllocations[indexToFree].Buffer, threadAllocations[indexToFree].Alloc);
606                 else
607                     vmaDestroyImage(g_hAllocator, threadAllocations[indexToFree].Image, threadAllocations[indexToFree].Alloc);
608             }
609             threadAllocations.erase(threadAllocations.begin() + indexToFree);
610         }
611     };
612 
613     uint32_t threadRandSeed = mainRand.Generate();
614     std::vector<std::thread> bkgThreads;
615     for(size_t i = 0; i < config.ThreadCount; ++i)
616     {
617         bkgThreads.emplace_back(std::bind(ThreadProc, threadRandSeed + (uint32_t)i));
618     }
619 
620     // Wait for threads reached max allocations
621     while(numThreadsReachedMaxAllocations < config.ThreadCount)
622         Sleep(0);
623 
624     // CALCULATE MEMORY STATISTICS ON FINAL USAGE
625     VmaStats vmaStats = {};
626     vmaCalculateStats(g_hAllocator, &vmaStats);
627     outResult.TotalMemoryAllocated = vmaStats.total.usedBytes + vmaStats.total.unusedBytes;
628     outResult.FreeRangeSizeMax = vmaStats.total.unusedRangeSizeMax;
629     outResult.FreeRangeSizeAvg = vmaStats.total.unusedRangeSizeAvg;
630 
631     // Signal threads to deallocate
632     SetEvent(threadsFinishEvent);
633 
634     // Wait for threads finished
635     for(size_t i = 0; i < bkgThreads.size(); ++i)
636         bkgThreads[i].join();
637     bkgThreads.clear();
638 
639     CloseHandle(threadsFinishEvent);
640 
641     // Deallocate remaining common resources
642     while(!commonAllocations.empty())
643     {
644         size_t indexToFree = 0;
645         switch(config.FreeOrder)
646         {
647         case FREE_ORDER::FORWARD:
648             indexToFree = 0;
649             break;
650         case FREE_ORDER::BACKWARD:
651             indexToFree = commonAllocations.size() - 1;
652             break;
653         case FREE_ORDER::RANDOM:
654             indexToFree = mainRand.Generate() % commonAllocations.size();
655             break;
656         }
657 
658         {
659             DeallocationTimeRegisterObj timeRegisterObj{outResult};
660             if(commonAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
661                 vmaDestroyBuffer(g_hAllocator, commonAllocations[indexToFree].Buffer, commonAllocations[indexToFree].Alloc);
662             else
663                 vmaDestroyImage(g_hAllocator, commonAllocations[indexToFree].Image, commonAllocations[indexToFree].Alloc);
664         }
665         commonAllocations.erase(commonAllocations.begin() + indexToFree);
666     }
667 
668     if(allocationCount)
669     {
670         outResult.AllocationTimeAvg /= allocationCount;
671         outResult.DeallocationTimeAvg /= allocationCount;
672     }
673 
674     outResult.TotalTime = std::chrono::high_resolution_clock::now() - timeBeg;
675 
676     return res;
677 }
678 
SaveAllocatorStatsToFile(const wchar_t * filePath)679 void SaveAllocatorStatsToFile(const wchar_t* filePath)
680 {
681     wprintf(L"Saving JSON dump to file \"%s\"\n", filePath);
682     char* stats;
683     vmaBuildStatsString(g_hAllocator, &stats, VK_TRUE);
684     SaveFile(filePath, stats, strlen(stats));
685     vmaFreeStatsString(g_hAllocator, stats);
686 }
687 
688 struct AllocInfo
689 {
690     VmaAllocation m_Allocation = VK_NULL_HANDLE;
691     VkBuffer m_Buffer = VK_NULL_HANDLE;
692     VkImage m_Image = VK_NULL_HANDLE;
693     VkImageLayout m_ImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
694     uint32_t m_StartValue = 0;
695     union
696     {
697         VkBufferCreateInfo m_BufferInfo;
698         VkImageCreateInfo m_ImageInfo;
699     };
700 
701     // After defragmentation.
702     VkBuffer m_NewBuffer = VK_NULL_HANDLE;
703     VkImage m_NewImage = VK_NULL_HANDLE;
704 
705     void CreateBuffer(
706         const VkBufferCreateInfo& bufCreateInfo,
707         const VmaAllocationCreateInfo& allocCreateInfo);
708     void CreateImage(
709         const VkImageCreateInfo& imageCreateInfo,
710         const VmaAllocationCreateInfo& allocCreateInfo,
711         VkImageLayout layout);
712     void Destroy();
713 };
714 
CreateBuffer(const VkBufferCreateInfo & bufCreateInfo,const VmaAllocationCreateInfo & allocCreateInfo)715 void AllocInfo::CreateBuffer(
716     const VkBufferCreateInfo& bufCreateInfo,
717     const VmaAllocationCreateInfo& allocCreateInfo)
718 {
719     m_BufferInfo = bufCreateInfo;
720     VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &m_Buffer, &m_Allocation, nullptr);
721     TEST(res == VK_SUCCESS);
722 }
CreateImage(const VkImageCreateInfo & imageCreateInfo,const VmaAllocationCreateInfo & allocCreateInfo,VkImageLayout layout)723 void AllocInfo::CreateImage(
724     const VkImageCreateInfo& imageCreateInfo,
725     const VmaAllocationCreateInfo& allocCreateInfo,
726     VkImageLayout layout)
727 {
728     m_ImageInfo = imageCreateInfo;
729     m_ImageLayout = layout;
730     VkResult res = vmaCreateImage(g_hAllocator, &imageCreateInfo, &allocCreateInfo, &m_Image, &m_Allocation, nullptr);
731     TEST(res == VK_SUCCESS);
732 }
733 
Destroy()734 void AllocInfo::Destroy()
735 {
736     if(m_Image)
737     {
738         assert(!m_Buffer);
739         vkDestroyImage(g_hDevice, m_Image, g_Allocs);
740         m_Image = VK_NULL_HANDLE;
741     }
742     if(m_Buffer)
743     {
744         assert(!m_Image);
745         vkDestroyBuffer(g_hDevice, m_Buffer, g_Allocs);
746         m_Buffer = VK_NULL_HANDLE;
747     }
748     if(m_Allocation)
749     {
750         vmaFreeMemory(g_hAllocator, m_Allocation);
751         m_Allocation = VK_NULL_HANDLE;
752     }
753 }
754 
755 class StagingBufferCollection
756 {
757 public:
StagingBufferCollection()758     StagingBufferCollection() { }
759     ~StagingBufferCollection();
760     // Returns false if maximum total size of buffers would be exceeded.
761     bool AcquireBuffer(VkDeviceSize size, VkBuffer& outBuffer, void*& outMappedPtr);
762     void ReleaseAllBuffers();
763 
764 private:
765     static const VkDeviceSize MAX_TOTAL_SIZE = 256ull * 1024 * 1024;
766     struct BufInfo
767     {
768         VmaAllocation Allocation = VK_NULL_HANDLE;
769         VkBuffer Buffer = VK_NULL_HANDLE;
770         VkDeviceSize Size = VK_WHOLE_SIZE;
771         void* MappedPtr = nullptr;
772         bool Used = false;
773     };
774     std::vector<BufInfo> m_Bufs;
775     // Including both used and unused.
776     VkDeviceSize m_TotalSize = 0;
777 };
778 
~StagingBufferCollection()779 StagingBufferCollection::~StagingBufferCollection()
780 {
781     for(size_t i = m_Bufs.size(); i--; )
782     {
783         vmaDestroyBuffer(g_hAllocator, m_Bufs[i].Buffer, m_Bufs[i].Allocation);
784     }
785 }
786 
AcquireBuffer(VkDeviceSize size,VkBuffer & outBuffer,void * & outMappedPtr)787 bool StagingBufferCollection::AcquireBuffer(VkDeviceSize size, VkBuffer& outBuffer, void*& outMappedPtr)
788 {
789     assert(size <= MAX_TOTAL_SIZE);
790 
791     // Try to find existing unused buffer with best size.
792     size_t bestIndex = SIZE_MAX;
793     for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
794     {
795         BufInfo& currBufInfo = m_Bufs[i];
796         if(!currBufInfo.Used && currBufInfo.Size >= size &&
797             (bestIndex == SIZE_MAX || currBufInfo.Size < m_Bufs[bestIndex].Size))
798         {
799             bestIndex = i;
800         }
801     }
802 
803     if(bestIndex != SIZE_MAX)
804     {
805         m_Bufs[bestIndex].Used = true;
806         outBuffer = m_Bufs[bestIndex].Buffer;
807         outMappedPtr = m_Bufs[bestIndex].MappedPtr;
808         return true;
809     }
810 
811     // Allocate new buffer with requested size.
812     if(m_TotalSize + size <= MAX_TOTAL_SIZE)
813     {
814         BufInfo bufInfo;
815         bufInfo.Size = size;
816         bufInfo.Used = true;
817 
818         VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
819         bufCreateInfo.size = size;
820         bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
821 
822         VmaAllocationCreateInfo allocCreateInfo = {};
823         allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
824         allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
825 
826         VmaAllocationInfo allocInfo;
827         VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &bufInfo.Buffer, &bufInfo.Allocation, &allocInfo);
828         bufInfo.MappedPtr = allocInfo.pMappedData;
829         TEST(res == VK_SUCCESS && bufInfo.MappedPtr);
830 
831         outBuffer = bufInfo.Buffer;
832         outMappedPtr = bufInfo.MappedPtr;
833 
834         m_Bufs.push_back(std::move(bufInfo));
835 
836         m_TotalSize += size;
837 
838         return true;
839     }
840 
841     // There are some unused but smaller buffers: Free them and try again.
842     bool hasUnused = false;
843     for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
844     {
845         if(!m_Bufs[i].Used)
846         {
847             hasUnused = true;
848             break;
849         }
850     }
851     if(hasUnused)
852     {
853         for(size_t i = m_Bufs.size(); i--; )
854         {
855             if(!m_Bufs[i].Used)
856             {
857                 m_TotalSize -= m_Bufs[i].Size;
858                 vmaDestroyBuffer(g_hAllocator, m_Bufs[i].Buffer, m_Bufs[i].Allocation);
859                 m_Bufs.erase(m_Bufs.begin() + i);
860             }
861         }
862 
863         return AcquireBuffer(size, outBuffer, outMappedPtr);
864    }
865 
866     return false;
867 }
868 
ReleaseAllBuffers()869 void StagingBufferCollection::ReleaseAllBuffers()
870 {
871     for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
872     {
873         m_Bufs[i].Used = false;
874     }
875 }
876 
UploadGpuData(const AllocInfo * allocInfo,size_t allocInfoCount)877 static void UploadGpuData(const AllocInfo* allocInfo, size_t allocInfoCount)
878 {
879     StagingBufferCollection stagingBufs;
880 
881     bool cmdBufferStarted = false;
882     for(size_t allocInfoIndex = 0; allocInfoIndex < allocInfoCount; ++allocInfoIndex)
883     {
884         const AllocInfo& currAllocInfo = allocInfo[allocInfoIndex];
885         if(currAllocInfo.m_Buffer)
886         {
887             const VkDeviceSize size = currAllocInfo.m_BufferInfo.size;
888 
889             VkBuffer stagingBuf = VK_NULL_HANDLE;
890             void* stagingBufMappedPtr = nullptr;
891             if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
892             {
893                 TEST(cmdBufferStarted);
894                 EndSingleTimeCommands();
895                 stagingBufs.ReleaseAllBuffers();
896                 cmdBufferStarted = false;
897 
898                 bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
899                 TEST(ok);
900             }
901 
902             // Fill staging buffer.
903             {
904                 assert(size % sizeof(uint32_t) == 0);
905                 uint32_t* stagingValPtr = (uint32_t*)stagingBufMappedPtr;
906                 uint32_t val = currAllocInfo.m_StartValue;
907                 for(size_t i = 0; i < size / sizeof(uint32_t); ++i)
908                 {
909                     *stagingValPtr = val;
910                     ++stagingValPtr;
911                     ++val;
912                 }
913             }
914 
915             // Issue copy command from staging buffer to destination buffer.
916             if(!cmdBufferStarted)
917             {
918                 cmdBufferStarted = true;
919                 BeginSingleTimeCommands();
920             }
921 
922             VkBufferCopy copy = {};
923             copy.srcOffset = 0;
924             copy.dstOffset = 0;
925             copy.size = size;
926             vkCmdCopyBuffer(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Buffer, 1, &copy);
927         }
928         else
929         {
930             TEST(currAllocInfo.m_ImageInfo.format == VK_FORMAT_R8G8B8A8_UNORM && "Only RGBA8 images are currently supported.");
931             TEST(currAllocInfo.m_ImageInfo.mipLevels == 1 && "Only single mip images are currently supported.");
932 
933             const VkDeviceSize size = (VkDeviceSize)currAllocInfo.m_ImageInfo.extent.width * currAllocInfo.m_ImageInfo.extent.height * sizeof(uint32_t);
934 
935             VkBuffer stagingBuf = VK_NULL_HANDLE;
936             void* stagingBufMappedPtr = nullptr;
937             if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
938             {
939                 TEST(cmdBufferStarted);
940                 EndSingleTimeCommands();
941                 stagingBufs.ReleaseAllBuffers();
942                 cmdBufferStarted = false;
943 
944                 bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
945                 TEST(ok);
946             }
947 
948             // Fill staging buffer.
949             {
950                 assert(size % sizeof(uint32_t) == 0);
951                 uint32_t *stagingValPtr = (uint32_t *)stagingBufMappedPtr;
952                 uint32_t val = currAllocInfo.m_StartValue;
953                 for(size_t i = 0; i < size / sizeof(uint32_t); ++i)
954                 {
955                     *stagingValPtr = val;
956                     ++stagingValPtr;
957                     ++val;
958                 }
959             }
960 
961             // Issue copy command from staging buffer to destination buffer.
962             if(!cmdBufferStarted)
963             {
964                 cmdBufferStarted = true;
965                 BeginSingleTimeCommands();
966             }
967 
968 
969             // Transfer to transfer dst layout
970             VkImageSubresourceRange subresourceRange = {
971                 VK_IMAGE_ASPECT_COLOR_BIT,
972                 0, VK_REMAINING_MIP_LEVELS,
973                 0, VK_REMAINING_ARRAY_LAYERS
974             };
975 
976             VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
977             barrier.srcAccessMask = 0;
978             barrier.dstAccessMask = 0;
979             barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
980             barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
981             barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
982             barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
983             barrier.image = currAllocInfo.m_Image;
984             barrier.subresourceRange = subresourceRange;
985 
986             vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,
987                 0, nullptr,
988                 0, nullptr,
989                 1, &barrier);
990 
991             // Copy image date
992             VkBufferImageCopy copy = {};
993             copy.bufferOffset = 0;
994             copy.bufferRowLength = 0;
995             copy.bufferImageHeight = 0;
996             copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
997             copy.imageSubresource.layerCount = 1;
998             copy.imageExtent = currAllocInfo.m_ImageInfo.extent;
999 
1000             vkCmdCopyBufferToImage(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy);
1001 
1002             // Transfer to desired layout
1003             barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1004             barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
1005             barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1006             barrier.newLayout = currAllocInfo.m_ImageLayout;
1007 
1008             vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,
1009                 0, nullptr,
1010                 0, nullptr,
1011                 1, &barrier);
1012         }
1013     }
1014 
1015     if(cmdBufferStarted)
1016     {
1017         EndSingleTimeCommands();
1018         stagingBufs.ReleaseAllBuffers();
1019     }
1020 }
1021 
ValidateGpuData(const AllocInfo * allocInfo,size_t allocInfoCount)1022 static void ValidateGpuData(const AllocInfo* allocInfo, size_t allocInfoCount)
1023 {
1024     StagingBufferCollection stagingBufs;
1025 
1026     bool cmdBufferStarted = false;
1027     size_t validateAllocIndexOffset = 0;
1028     std::vector<void*> validateStagingBuffers;
1029     for(size_t allocInfoIndex = 0; allocInfoIndex < allocInfoCount; ++allocInfoIndex)
1030     {
1031         const AllocInfo& currAllocInfo = allocInfo[allocInfoIndex];
1032         if(currAllocInfo.m_Buffer)
1033         {
1034             const VkDeviceSize size = currAllocInfo.m_BufferInfo.size;
1035 
1036             VkBuffer stagingBuf = VK_NULL_HANDLE;
1037             void* stagingBufMappedPtr = nullptr;
1038             if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
1039             {
1040                 TEST(cmdBufferStarted);
1041                 EndSingleTimeCommands();
1042                 cmdBufferStarted = false;
1043 
1044                 for(size_t validateIndex = 0;
1045                     validateIndex < validateStagingBuffers.size();
1046                     ++validateIndex)
1047                 {
1048                     const size_t validateAllocIndex = validateIndex + validateAllocIndexOffset;
1049                     const VkDeviceSize validateSize = allocInfo[validateAllocIndex].m_BufferInfo.size;
1050                     TEST(validateSize % sizeof(uint32_t) == 0);
1051                     const uint32_t* stagingValPtr = (const uint32_t*)validateStagingBuffers[validateIndex];
1052                     uint32_t val = allocInfo[validateAllocIndex].m_StartValue;
1053                     bool valid = true;
1054                     for(size_t i = 0; i < validateSize / sizeof(uint32_t); ++i)
1055                     {
1056                         if(*stagingValPtr != val)
1057                         {
1058                             valid = false;
1059                             break;
1060                         }
1061                         ++stagingValPtr;
1062                         ++val;
1063                     }
1064                     TEST(valid);
1065                 }
1066 
1067                 stagingBufs.ReleaseAllBuffers();
1068 
1069                 validateAllocIndexOffset = allocInfoIndex;
1070                 validateStagingBuffers.clear();
1071 
1072                 bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
1073                 TEST(ok);
1074             }
1075 
1076             // Issue copy command from staging buffer to destination buffer.
1077             if(!cmdBufferStarted)
1078             {
1079                 cmdBufferStarted = true;
1080                 BeginSingleTimeCommands();
1081             }
1082 
1083             VkBufferCopy copy = {};
1084             copy.srcOffset = 0;
1085             copy.dstOffset = 0;
1086             copy.size = size;
1087             vkCmdCopyBuffer(g_hTemporaryCommandBuffer, currAllocInfo.m_Buffer, stagingBuf, 1, &copy);
1088 
1089             // Sava mapped pointer for later validation.
1090             validateStagingBuffers.push_back(stagingBufMappedPtr);
1091         }
1092         else
1093         {
1094             TEST(0 && "Images not currently supported.");
1095         }
1096     }
1097 
1098     if(cmdBufferStarted)
1099     {
1100         EndSingleTimeCommands();
1101 
1102         for(size_t validateIndex = 0;
1103             validateIndex < validateStagingBuffers.size();
1104             ++validateIndex)
1105         {
1106             const size_t validateAllocIndex = validateIndex + validateAllocIndexOffset;
1107             const VkDeviceSize validateSize = allocInfo[validateAllocIndex].m_BufferInfo.size;
1108             TEST(validateSize % sizeof(uint32_t) == 0);
1109             const uint32_t* stagingValPtr = (const uint32_t*)validateStagingBuffers[validateIndex];
1110             uint32_t val = allocInfo[validateAllocIndex].m_StartValue;
1111             bool valid = true;
1112             for(size_t i = 0; i < validateSize / sizeof(uint32_t); ++i)
1113             {
1114                 if(*stagingValPtr != val)
1115                 {
1116                     valid = false;
1117                     break;
1118                 }
1119                 ++stagingValPtr;
1120                 ++val;
1121             }
1122             TEST(valid);
1123         }
1124 
1125         stagingBufs.ReleaseAllBuffers();
1126     }
1127 }
1128 
GetMemReq(VmaAllocationCreateInfo & outMemReq)1129 static void GetMemReq(VmaAllocationCreateInfo& outMemReq)
1130 {
1131     outMemReq = {};
1132     outMemReq.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
1133     //outMemReq.flags = VMA_ALLOCATION_CREATE_PERSISTENT_MAP_BIT;
1134 }
1135 
CreateBuffer(VmaPool pool,const VkBufferCreateInfo & bufCreateInfo,bool persistentlyMapped,AllocInfo & outAllocInfo)1136 static void CreateBuffer(
1137     VmaPool pool,
1138     const VkBufferCreateInfo& bufCreateInfo,
1139     bool persistentlyMapped,
1140     AllocInfo& outAllocInfo)
1141 {
1142     outAllocInfo = {};
1143     outAllocInfo.m_BufferInfo = bufCreateInfo;
1144 
1145     VmaAllocationCreateInfo allocCreateInfo = {};
1146     allocCreateInfo.pool = pool;
1147     if(persistentlyMapped)
1148         allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
1149 
1150     VmaAllocationInfo vmaAllocInfo = {};
1151     ERR_GUARD_VULKAN( vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &outAllocInfo.m_Buffer, &outAllocInfo.m_Allocation, &vmaAllocInfo) );
1152 
1153     // Setup StartValue and fill.
1154     {
1155         outAllocInfo.m_StartValue = (uint32_t)rand();
1156         uint32_t* data = (uint32_t*)vmaAllocInfo.pMappedData;
1157         TEST((data != nullptr) == persistentlyMapped);
1158         if(!persistentlyMapped)
1159         {
1160             ERR_GUARD_VULKAN( vmaMapMemory(g_hAllocator, outAllocInfo.m_Allocation, (void**)&data) );
1161         }
1162 
1163         uint32_t value = outAllocInfo.m_StartValue;
1164         TEST(bufCreateInfo.size % 4 == 0);
1165         for(size_t i = 0; i < bufCreateInfo.size / sizeof(uint32_t); ++i)
1166             data[i] = value++;
1167 
1168         if(!persistentlyMapped)
1169             vmaUnmapMemory(g_hAllocator, outAllocInfo.m_Allocation);
1170     }
1171 }
1172 
CreateAllocation(AllocInfo & outAllocation)1173 static void CreateAllocation(AllocInfo& outAllocation)
1174 {
1175     outAllocation.m_Allocation = nullptr;
1176     outAllocation.m_Buffer = nullptr;
1177     outAllocation.m_Image = nullptr;
1178     outAllocation.m_StartValue = (uint32_t)rand();
1179 
1180     VmaAllocationCreateInfo vmaMemReq;
1181     GetMemReq(vmaMemReq);
1182 
1183     VmaAllocationInfo allocInfo;
1184 
1185     const bool isBuffer = true;//(rand() & 0x1) != 0;
1186     const bool isLarge = (rand() % 16) == 0;
1187     if(isBuffer)
1188     {
1189         const uint32_t bufferSize = isLarge ?
1190             (rand() % 10 + 1) * (1024 * 1024) : // 1 MB ... 10 MB
1191             (rand() % 1024 + 1) * 1024; // 1 KB ... 1 MB
1192 
1193         VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1194         bufferInfo.size = bufferSize;
1195         bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1196 
1197         VkResult res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &vmaMemReq, &outAllocation.m_Buffer, &outAllocation.m_Allocation, &allocInfo);
1198         outAllocation.m_BufferInfo = bufferInfo;
1199         TEST(res == VK_SUCCESS);
1200     }
1201     else
1202     {
1203         const uint32_t imageSizeX = isLarge ?
1204             1024 + rand() % (4096 - 1024) : // 1024 ... 4096
1205             rand() % 1024 + 1; // 1 ... 1024
1206         const uint32_t imageSizeY = isLarge ?
1207             1024 + rand() % (4096 - 1024) : // 1024 ... 4096
1208             rand() % 1024 + 1; // 1 ... 1024
1209 
1210         VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
1211         imageInfo.imageType = VK_IMAGE_TYPE_2D;
1212         imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
1213         imageInfo.extent.width = imageSizeX;
1214         imageInfo.extent.height = imageSizeY;
1215         imageInfo.extent.depth = 1;
1216         imageInfo.mipLevels = 1;
1217         imageInfo.arrayLayers = 1;
1218         imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
1219         imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
1220         imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
1221         imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
1222 
1223         VkResult res = vmaCreateImage(g_hAllocator, &imageInfo, &vmaMemReq, &outAllocation.m_Image, &outAllocation.m_Allocation, &allocInfo);
1224         outAllocation.m_ImageInfo = imageInfo;
1225         TEST(res == VK_SUCCESS);
1226     }
1227 
1228     uint32_t* data = (uint32_t*)allocInfo.pMappedData;
1229     if(allocInfo.pMappedData == nullptr)
1230     {
1231         VkResult res = vmaMapMemory(g_hAllocator, outAllocation.m_Allocation, (void**)&data);
1232         TEST(res == VK_SUCCESS);
1233     }
1234 
1235     uint32_t value = outAllocation.m_StartValue;
1236     TEST(allocInfo.size % 4 == 0);
1237     for(size_t i = 0; i < allocInfo.size / sizeof(uint32_t); ++i)
1238         data[i] = value++;
1239 
1240     if(allocInfo.pMappedData == nullptr)
1241         vmaUnmapMemory(g_hAllocator, outAllocation.m_Allocation);
1242 }
1243 
DestroyAllocation(const AllocInfo & allocation)1244 static void DestroyAllocation(const AllocInfo& allocation)
1245 {
1246     if(allocation.m_Buffer)
1247         vmaDestroyBuffer(g_hAllocator, allocation.m_Buffer, allocation.m_Allocation);
1248     else
1249         vmaDestroyImage(g_hAllocator, allocation.m_Image, allocation.m_Allocation);
1250 }
1251 
DestroyAllAllocations(std::vector<AllocInfo> & allocations)1252 static void DestroyAllAllocations(std::vector<AllocInfo>& allocations)
1253 {
1254     for(size_t i = allocations.size(); i--; )
1255         DestroyAllocation(allocations[i]);
1256     allocations.clear();
1257 }
1258 
ValidateAllocationData(const AllocInfo & allocation)1259 static void ValidateAllocationData(const AllocInfo& allocation)
1260 {
1261     VmaAllocationInfo allocInfo;
1262     vmaGetAllocationInfo(g_hAllocator, allocation.m_Allocation, &allocInfo);
1263 
1264     uint32_t* data = (uint32_t*)allocInfo.pMappedData;
1265     if(allocInfo.pMappedData == nullptr)
1266     {
1267         VkResult res = vmaMapMemory(g_hAllocator, allocation.m_Allocation, (void**)&data);
1268         TEST(res == VK_SUCCESS);
1269     }
1270 
1271     uint32_t value = allocation.m_StartValue;
1272     bool ok = true;
1273     size_t i;
1274     TEST(allocInfo.size % 4 == 0);
1275     for(i = 0; i < allocInfo.size / sizeof(uint32_t); ++i)
1276     {
1277         if(data[i] != value++)
1278         {
1279             ok = false;
1280             break;
1281         }
1282     }
1283     TEST(ok);
1284 
1285     if(allocInfo.pMappedData == nullptr)
1286         vmaUnmapMemory(g_hAllocator, allocation.m_Allocation);
1287 }
1288 
RecreateAllocationResource(AllocInfo & allocation)1289 static void RecreateAllocationResource(AllocInfo& allocation)
1290 {
1291     VmaAllocationInfo allocInfo;
1292     vmaGetAllocationInfo(g_hAllocator, allocation.m_Allocation, &allocInfo);
1293 
1294     if(allocation.m_Buffer)
1295     {
1296         vkDestroyBuffer(g_hDevice, allocation.m_Buffer, g_Allocs);
1297 
1298         VkResult res = vkCreateBuffer(g_hDevice, &allocation.m_BufferInfo, g_Allocs, &allocation.m_Buffer);
1299         TEST(res == VK_SUCCESS);
1300 
1301         // Just to silence validation layer warnings.
1302         VkMemoryRequirements vkMemReq;
1303         vkGetBufferMemoryRequirements(g_hDevice, allocation.m_Buffer, &vkMemReq);
1304         TEST(vkMemReq.size >= allocation.m_BufferInfo.size);
1305 
1306         res = vmaBindBufferMemory(g_hAllocator, allocation.m_Allocation, allocation.m_Buffer);
1307         TEST(res == VK_SUCCESS);
1308     }
1309     else
1310     {
1311         vkDestroyImage(g_hDevice, allocation.m_Image, g_Allocs);
1312 
1313         VkResult res = vkCreateImage(g_hDevice, &allocation.m_ImageInfo, g_Allocs, &allocation.m_Image);
1314         TEST(res == VK_SUCCESS);
1315 
1316         // Just to silence validation layer warnings.
1317         VkMemoryRequirements vkMemReq;
1318         vkGetImageMemoryRequirements(g_hDevice, allocation.m_Image, &vkMemReq);
1319 
1320         res = vmaBindImageMemory(g_hAllocator, allocation.m_Allocation, allocation.m_Image);
1321         TEST(res == VK_SUCCESS);
1322     }
1323 }
1324 
Defragment(AllocInfo * allocs,size_t allocCount,const VmaDefragmentationInfo * defragmentationInfo=nullptr,VmaDefragmentationStats * defragmentationStats=nullptr)1325 static void Defragment(AllocInfo* allocs, size_t allocCount,
1326     const VmaDefragmentationInfo* defragmentationInfo = nullptr,
1327     VmaDefragmentationStats* defragmentationStats = nullptr)
1328 {
1329     std::vector<VmaAllocation> vmaAllocs(allocCount);
1330     for(size_t i = 0; i < allocCount; ++i)
1331         vmaAllocs[i] = allocs[i].m_Allocation;
1332 
1333     std::vector<VkBool32> allocChanged(allocCount);
1334 
1335     ERR_GUARD_VULKAN( vmaDefragment(g_hAllocator, vmaAllocs.data(), allocCount, allocChanged.data(),
1336         defragmentationInfo, defragmentationStats) );
1337 
1338     for(size_t i = 0; i < allocCount; ++i)
1339     {
1340         if(allocChanged[i])
1341         {
1342             RecreateAllocationResource(allocs[i]);
1343         }
1344     }
1345 }
1346 
ValidateAllocationsData(const AllocInfo * allocs,size_t allocCount)1347 static void ValidateAllocationsData(const AllocInfo* allocs, size_t allocCount)
1348 {
1349     std::for_each(allocs, allocs + allocCount, [](const AllocInfo& allocInfo) {
1350         ValidateAllocationData(allocInfo);
1351     });
1352 }
1353 
TestDefragmentationSimple()1354 void TestDefragmentationSimple()
1355 {
1356     wprintf(L"Test defragmentation simple\n");
1357 
1358     RandomNumberGenerator rand(667);
1359 
1360     const VkDeviceSize BUF_SIZE = 0x10000;
1361     const VkDeviceSize BLOCK_SIZE = BUF_SIZE * 8;
1362 
1363     const VkDeviceSize MIN_BUF_SIZE = 32;
1364     const VkDeviceSize MAX_BUF_SIZE = BUF_SIZE * 4;
1365     auto RandomBufSize = [&]() -> VkDeviceSize {
1366         return align_up<VkDeviceSize>(rand.Generate() % (MAX_BUF_SIZE - MIN_BUF_SIZE + 1) + MIN_BUF_SIZE, 32);
1367     };
1368 
1369     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1370     bufCreateInfo.size = BUF_SIZE;
1371     bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1372 
1373     VmaAllocationCreateInfo exampleAllocCreateInfo = {};
1374     exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
1375 
1376     uint32_t memTypeIndex = UINT32_MAX;
1377     vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex);
1378 
1379     VmaPoolCreateInfo poolCreateInfo = {};
1380     poolCreateInfo.blockSize = BLOCK_SIZE;
1381     poolCreateInfo.memoryTypeIndex = memTypeIndex;
1382 
1383     VmaPool pool;
1384     ERR_GUARD_VULKAN( vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool) );
1385 
1386     // Defragmentation of empty pool.
1387     {
1388         VmaDefragmentationInfo2 defragInfo = {};
1389         defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE;
1390         defragInfo.maxCpuAllocationsToMove = UINT32_MAX;
1391         defragInfo.poolCount = 1;
1392         defragInfo.pPools = &pool;
1393 
1394         VmaDefragmentationStats defragStats = {};
1395         VmaDefragmentationContext defragCtx = nullptr;
1396         VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &defragStats, &defragCtx);
1397         TEST(res >= VK_SUCCESS);
1398         vmaDefragmentationEnd(g_hAllocator, defragCtx);
1399         TEST(defragStats.allocationsMoved == 0 && defragStats.bytesFreed == 0 &&
1400             defragStats.bytesMoved == 0 && defragStats.deviceMemoryBlocksFreed == 0);
1401     }
1402 
1403     std::vector<AllocInfo> allocations;
1404 
1405     // persistentlyMappedOption = 0 - not persistently mapped.
1406     // persistentlyMappedOption = 1 - persistently mapped.
1407     for(uint32_t persistentlyMappedOption = 0; persistentlyMappedOption < 2; ++persistentlyMappedOption)
1408     {
1409         wprintf(L"  Persistently mapped option = %u\n", persistentlyMappedOption);
1410         const bool persistentlyMapped = persistentlyMappedOption != 0;
1411 
1412         // # Test 1
1413         // Buffers of fixed size.
1414         // Fill 2 blocks. Remove odd buffers. Defragment everything.
1415         // Expected result: at least 1 block freed.
1416         {
1417             for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
1418             {
1419                 AllocInfo allocInfo;
1420                 CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
1421                 allocations.push_back(allocInfo);
1422             }
1423 
1424             for(size_t i = 1; i < allocations.size(); ++i)
1425             {
1426                 DestroyAllocation(allocations[i]);
1427                 allocations.erase(allocations.begin() + i);
1428             }
1429 
1430             VmaDefragmentationStats defragStats;
1431             Defragment(allocations.data(), allocations.size(), nullptr, &defragStats);
1432             TEST(defragStats.allocationsMoved > 0 && defragStats.bytesMoved > 0);
1433             TEST(defragStats.deviceMemoryBlocksFreed >= 1);
1434 
1435             ValidateAllocationsData(allocations.data(), allocations.size());
1436 
1437             DestroyAllAllocations(allocations);
1438         }
1439 
1440         // # Test 2
1441         // Buffers of fixed size.
1442         // Fill 2 blocks. Remove odd buffers. Defragment one buffer at time.
1443         // Expected result: Each of 4 interations makes some progress.
1444         {
1445             for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
1446             {
1447                 AllocInfo allocInfo;
1448                 CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
1449                 allocations.push_back(allocInfo);
1450             }
1451 
1452             for(size_t i = 1; i < allocations.size(); ++i)
1453             {
1454                 DestroyAllocation(allocations[i]);
1455                 allocations.erase(allocations.begin() + i);
1456             }
1457 
1458             VmaDefragmentationInfo defragInfo = {};
1459             defragInfo.maxAllocationsToMove = 1;
1460             defragInfo.maxBytesToMove = BUF_SIZE;
1461 
1462             for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE / 2; ++i)
1463             {
1464                 VmaDefragmentationStats defragStats;
1465                 Defragment(allocations.data(), allocations.size(), &defragInfo, &defragStats);
1466                 TEST(defragStats.allocationsMoved > 0 && defragStats.bytesMoved > 0);
1467             }
1468 
1469             ValidateAllocationsData(allocations.data(), allocations.size());
1470 
1471             DestroyAllAllocations(allocations);
1472         }
1473 
1474         // # Test 3
1475         // Buffers of variable size.
1476         // Create a number of buffers. Remove some percent of them.
1477         // Defragment while having some percent of them unmovable.
1478         // Expected result: Just simple validation.
1479         {
1480             for(size_t i = 0; i < 100; ++i)
1481             {
1482                 VkBufferCreateInfo localBufCreateInfo = bufCreateInfo;
1483                 localBufCreateInfo.size = RandomBufSize();
1484 
1485                 AllocInfo allocInfo;
1486                 CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
1487                 allocations.push_back(allocInfo);
1488             }
1489 
1490             const uint32_t percentToDelete = 60;
1491             const size_t numberToDelete = allocations.size() * percentToDelete / 100;
1492             for(size_t i = 0; i < numberToDelete; ++i)
1493             {
1494                 size_t indexToDelete = rand.Generate() % (uint32_t)allocations.size();
1495                 DestroyAllocation(allocations[indexToDelete]);
1496                 allocations.erase(allocations.begin() + indexToDelete);
1497             }
1498 
1499             // Non-movable allocations will be at the beginning of allocations array.
1500             const uint32_t percentNonMovable = 20;
1501             const size_t numberNonMovable = allocations.size() * percentNonMovable / 100;
1502             for(size_t i = 0; i < numberNonMovable; ++i)
1503             {
1504                 size_t indexNonMovable = i + rand.Generate() % (uint32_t)(allocations.size() - i);
1505                 if(indexNonMovable != i)
1506                     std::swap(allocations[i], allocations[indexNonMovable]);
1507             }
1508 
1509             VmaDefragmentationStats defragStats;
1510             Defragment(
1511                 allocations.data() + numberNonMovable,
1512                 allocations.size() - numberNonMovable,
1513                 nullptr, &defragStats);
1514 
1515             ValidateAllocationsData(allocations.data(), allocations.size());
1516 
1517             DestroyAllAllocations(allocations);
1518         }
1519     }
1520 
1521     /*
1522     Allocation that must be move to an overlapping place using memmove().
1523     Create 2 buffers, second slightly bigger than the first. Delete first. Then defragment.
1524     */
1525     if(VMA_DEBUG_MARGIN == 0) // FAST algorithm works only when DEBUG_MARGIN disabled.
1526     {
1527         AllocInfo allocInfo[2];
1528 
1529         bufCreateInfo.size = BUF_SIZE;
1530         CreateBuffer(pool, bufCreateInfo, false, allocInfo[0]);
1531         const VkDeviceSize biggerBufSize = BUF_SIZE + BUF_SIZE / 256;
1532         bufCreateInfo.size = biggerBufSize;
1533         CreateBuffer(pool, bufCreateInfo, false, allocInfo[1]);
1534 
1535         DestroyAllocation(allocInfo[0]);
1536 
1537         VmaDefragmentationStats defragStats;
1538         Defragment(&allocInfo[1], 1, nullptr, &defragStats);
1539         // If this fails, it means we couldn't do memmove with overlapping regions.
1540         TEST(defragStats.allocationsMoved == 1 && defragStats.bytesMoved > 0);
1541 
1542         ValidateAllocationsData(&allocInfo[1], 1);
1543         DestroyAllocation(allocInfo[1]);
1544     }
1545 
1546     vmaDestroyPool(g_hAllocator, pool);
1547 }
1548 
TestDefragmentationWholePool()1549 void TestDefragmentationWholePool()
1550 {
1551     wprintf(L"Test defragmentation whole pool\n");
1552 
1553     RandomNumberGenerator rand(668);
1554 
1555     const VkDeviceSize BUF_SIZE = 0x10000;
1556     const VkDeviceSize BLOCK_SIZE = BUF_SIZE * 8;
1557 
1558     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1559     bufCreateInfo.size = BUF_SIZE;
1560     bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1561 
1562     VmaAllocationCreateInfo exampleAllocCreateInfo = {};
1563     exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
1564 
1565     uint32_t memTypeIndex = UINT32_MAX;
1566     vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex);
1567 
1568     VmaPoolCreateInfo poolCreateInfo = {};
1569     poolCreateInfo.blockSize = BLOCK_SIZE;
1570     poolCreateInfo.memoryTypeIndex = memTypeIndex;
1571 
1572     VmaDefragmentationStats defragStats[2];
1573     for(size_t caseIndex = 0; caseIndex < 2; ++caseIndex)
1574     {
1575         VmaPool pool;
1576         ERR_GUARD_VULKAN( vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool) );
1577 
1578         std::vector<AllocInfo> allocations;
1579 
1580         // Buffers of fixed size.
1581         // Fill 2 blocks. Remove odd buffers. Defragment all of them.
1582         for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
1583         {
1584             AllocInfo allocInfo;
1585             CreateBuffer(pool, bufCreateInfo, false, allocInfo);
1586             allocations.push_back(allocInfo);
1587         }
1588 
1589         for(size_t i = 1; i < allocations.size(); ++i)
1590         {
1591             DestroyAllocation(allocations[i]);
1592             allocations.erase(allocations.begin() + i);
1593         }
1594 
1595         VmaDefragmentationInfo2 defragInfo = {};
1596         defragInfo.maxCpuAllocationsToMove = UINT32_MAX;
1597         defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE;
1598         std::vector<VmaAllocation> allocationsToDefrag;
1599         if(caseIndex == 0)
1600         {
1601             defragInfo.poolCount = 1;
1602             defragInfo.pPools = &pool;
1603         }
1604         else
1605         {
1606             const size_t allocCount = allocations.size();
1607             allocationsToDefrag.resize(allocCount);
1608             std::transform(
1609                 allocations.begin(), allocations.end(),
1610                 allocationsToDefrag.begin(),
1611                 [](const AllocInfo& allocInfo) { return allocInfo.m_Allocation; });
1612             defragInfo.allocationCount = (uint32_t)allocCount;
1613             defragInfo.pAllocations = allocationsToDefrag.data();
1614         }
1615 
1616         VmaDefragmentationContext defragCtx = VK_NULL_HANDLE;
1617         VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &defragStats[caseIndex], &defragCtx);
1618         TEST(res >= VK_SUCCESS);
1619         vmaDefragmentationEnd(g_hAllocator, defragCtx);
1620 
1621         TEST(defragStats[caseIndex].allocationsMoved > 0 && defragStats[caseIndex].bytesMoved > 0);
1622 
1623         ValidateAllocationsData(allocations.data(), allocations.size());
1624 
1625         DestroyAllAllocations(allocations);
1626 
1627         vmaDestroyPool(g_hAllocator, pool);
1628     }
1629 
1630     TEST(defragStats[0].bytesMoved == defragStats[1].bytesMoved);
1631     TEST(defragStats[0].allocationsMoved == defragStats[1].allocationsMoved);
1632     TEST(defragStats[0].bytesFreed == defragStats[1].bytesFreed);
1633     TEST(defragStats[0].deviceMemoryBlocksFreed == defragStats[1].deviceMemoryBlocksFreed);
1634 }
1635 
TestDefragmentationFull()1636 void TestDefragmentationFull()
1637 {
1638     std::vector<AllocInfo> allocations;
1639 
1640     // Create initial allocations.
1641     for(size_t i = 0; i < 400; ++i)
1642     {
1643         AllocInfo allocation;
1644         CreateAllocation(allocation);
1645         allocations.push_back(allocation);
1646     }
1647 
1648     // Delete random allocations
1649     const size_t allocationsToDeletePercent = 80;
1650     size_t allocationsToDelete = allocations.size() * allocationsToDeletePercent / 100;
1651     for(size_t i = 0; i < allocationsToDelete; ++i)
1652     {
1653         size_t index = (size_t)rand() % allocations.size();
1654         DestroyAllocation(allocations[index]);
1655         allocations.erase(allocations.begin() + index);
1656     }
1657 
1658     for(size_t i = 0; i < allocations.size(); ++i)
1659         ValidateAllocationData(allocations[i]);
1660 
1661     //SaveAllocatorStatsToFile(L"Before.csv");
1662 
1663     {
1664         std::vector<VmaAllocation> vmaAllocations(allocations.size());
1665         for(size_t i = 0; i < allocations.size(); ++i)
1666             vmaAllocations[i] = allocations[i].m_Allocation;
1667 
1668         const size_t nonMovablePercent = 0;
1669         size_t nonMovableCount = vmaAllocations.size() * nonMovablePercent / 100;
1670         for(size_t i = 0; i < nonMovableCount; ++i)
1671         {
1672             size_t index = (size_t)rand() % vmaAllocations.size();
1673             vmaAllocations.erase(vmaAllocations.begin() + index);
1674         }
1675 
1676         const uint32_t defragCount = 1;
1677         for(uint32_t defragIndex = 0; defragIndex < defragCount; ++defragIndex)
1678         {
1679             std::vector<VkBool32> allocationsChanged(vmaAllocations.size());
1680 
1681             VmaDefragmentationInfo defragmentationInfo;
1682             defragmentationInfo.maxAllocationsToMove = UINT_MAX;
1683             defragmentationInfo.maxBytesToMove = SIZE_MAX;
1684 
1685             wprintf(L"Defragmentation #%u\n", defragIndex);
1686 
1687             time_point begTime = std::chrono::high_resolution_clock::now();
1688 
1689             VmaDefragmentationStats stats;
1690             VkResult res = vmaDefragment(g_hAllocator, vmaAllocations.data(), vmaAllocations.size(), allocationsChanged.data(), &defragmentationInfo, &stats);
1691             TEST(res >= 0);
1692 
1693             float defragmentDuration = ToFloatSeconds(std::chrono::high_resolution_clock::now() - begTime);
1694 
1695             wprintf(L"Moved allocations %u, bytes %llu\n", stats.allocationsMoved, stats.bytesMoved);
1696             wprintf(L"Freed blocks %u, bytes %llu\n", stats.deviceMemoryBlocksFreed, stats.bytesFreed);
1697             wprintf(L"Time: %.2f s\n", defragmentDuration);
1698 
1699             for(size_t i = 0; i < vmaAllocations.size(); ++i)
1700             {
1701                 if(allocationsChanged[i])
1702                 {
1703                     RecreateAllocationResource(allocations[i]);
1704                 }
1705             }
1706 
1707             for(size_t i = 0; i < allocations.size(); ++i)
1708                 ValidateAllocationData(allocations[i]);
1709 
1710             //wchar_t fileName[MAX_PATH];
1711             //swprintf(fileName, MAX_PATH, L"After_%02u.csv", defragIndex);
1712             //SaveAllocatorStatsToFile(fileName);
1713         }
1714     }
1715 
1716     // Destroy all remaining allocations.
1717     DestroyAllAllocations(allocations);
1718 }
1719 
TestDefragmentationGpu()1720 static void TestDefragmentationGpu()
1721 {
1722     wprintf(L"Test defragmentation GPU\n");
1723     g_MemoryAliasingWarningEnabled = false;
1724 
1725     std::vector<AllocInfo> allocations;
1726 
1727     // Create that many allocations to surely fill 3 new blocks of 256 MB.
1728     const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
1729     const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
1730     const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
1731     const size_t bufCount = (size_t)(totalSize / bufSizeMin);
1732     const size_t percentToLeave = 30;
1733     const size_t percentNonMovable = 3;
1734     RandomNumberGenerator rand = { 234522 };
1735 
1736     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1737 
1738     VmaAllocationCreateInfo allocCreateInfo = {};
1739     allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
1740     allocCreateInfo.flags = 0;
1741 
1742     // Create all intended buffers.
1743     for(size_t i = 0; i < bufCount; ++i)
1744     {
1745         bufCreateInfo.size = align_up(rand.Generate() % (bufSizeMax - bufSizeMin) + bufSizeMin, 32ull);
1746 
1747         if(rand.Generate() % 100 < percentNonMovable)
1748         {
1749             bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
1750                 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1751                 VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1752             allocCreateInfo.pUserData = (void*)(uintptr_t)2;
1753         }
1754         else
1755         {
1756             // Different usage just to see different color in output from VmaDumpVis.
1757             bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
1758                 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1759                 VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1760             // And in JSON dump.
1761             allocCreateInfo.pUserData = (void*)(uintptr_t)1;
1762         }
1763 
1764         AllocInfo alloc;
1765         alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
1766         alloc.m_StartValue = rand.Generate();
1767         allocations.push_back(alloc);
1768     }
1769 
1770     // Destroy some percentage of them.
1771     {
1772         const size_t buffersToDestroy = round_div<size_t>(bufCount * (100 - percentToLeave), 100);
1773         for(size_t i = 0; i < buffersToDestroy; ++i)
1774         {
1775             const size_t index = rand.Generate() % allocations.size();
1776             allocations[index].Destroy();
1777             allocations.erase(allocations.begin() + index);
1778         }
1779     }
1780 
1781     // Fill them with meaningful data.
1782     UploadGpuData(allocations.data(), allocations.size());
1783 
1784     wchar_t fileName[MAX_PATH];
1785     swprintf_s(fileName, L"GPU_defragmentation_A_before.json");
1786     SaveAllocatorStatsToFile(fileName);
1787 
1788     // Defragment using GPU only.
1789     {
1790         const size_t allocCount = allocations.size();
1791 
1792         std::vector<VmaAllocation> allocationPtrs;
1793         std::vector<VkBool32> allocationChanged;
1794         std::vector<size_t> allocationOriginalIndex;
1795 
1796         for(size_t i = 0; i < allocCount; ++i)
1797         {
1798             VmaAllocationInfo allocInfo = {};
1799             vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo);
1800             if((uintptr_t)allocInfo.pUserData == 1) // Movable
1801             {
1802                 allocationPtrs.push_back(allocations[i].m_Allocation);
1803                 allocationChanged.push_back(VK_FALSE);
1804                 allocationOriginalIndex.push_back(i);
1805             }
1806         }
1807 
1808         const size_t movableAllocCount = allocationPtrs.size();
1809 
1810         BeginSingleTimeCommands();
1811 
1812         VmaDefragmentationInfo2 defragInfo = {};
1813         defragInfo.flags = 0;
1814         defragInfo.allocationCount = (uint32_t)movableAllocCount;
1815         defragInfo.pAllocations = allocationPtrs.data();
1816         defragInfo.pAllocationsChanged = allocationChanged.data();
1817         defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
1818         defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
1819         defragInfo.commandBuffer = g_hTemporaryCommandBuffer;
1820 
1821         VmaDefragmentationStats stats = {};
1822         VmaDefragmentationContext ctx = VK_NULL_HANDLE;
1823         VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
1824         TEST(res >= VK_SUCCESS);
1825 
1826         EndSingleTimeCommands();
1827 
1828         vmaDefragmentationEnd(g_hAllocator, ctx);
1829 
1830         for(size_t i = 0; i < movableAllocCount; ++i)
1831         {
1832             if(allocationChanged[i])
1833             {
1834                 const size_t origAllocIndex = allocationOriginalIndex[i];
1835                 RecreateAllocationResource(allocations[origAllocIndex]);
1836             }
1837         }
1838 
1839         // If corruption detection is enabled, GPU defragmentation may not work on
1840         // memory types that have this detection active, e.g. on Intel.
1841         #if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
1842             TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
1843             TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
1844         #endif
1845     }
1846 
1847     ValidateGpuData(allocations.data(), allocations.size());
1848 
1849     swprintf_s(fileName, L"GPU_defragmentation_B_after.json");
1850     SaveAllocatorStatsToFile(fileName);
1851 
1852     // Destroy all remaining buffers.
1853     for(size_t i = allocations.size(); i--; )
1854     {
1855         allocations[i].Destroy();
1856     }
1857 
1858     g_MemoryAliasingWarningEnabled = true;
1859 }
1860 
ProcessDefragmentationStepInfo(VmaDefragmentationPassInfo & stepInfo)1861 static void ProcessDefragmentationStepInfo(VmaDefragmentationPassInfo &stepInfo)
1862 {
1863     std::vector<VkImageMemoryBarrier> beginImageBarriers;
1864     std::vector<VkImageMemoryBarrier> finalizeImageBarriers;
1865 
1866     VkPipelineStageFlags beginSrcStageMask = 0;
1867     VkPipelineStageFlags beginDstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
1868 
1869     VkPipelineStageFlags finalizeSrcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
1870     VkPipelineStageFlags finalizeDstStageMask = 0;
1871 
1872     bool wantsMemoryBarrier = false;
1873 
1874     VkMemoryBarrier beginMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
1875     VkMemoryBarrier finalizeMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
1876 
1877     for(uint32_t i = 0; i < stepInfo.moveCount; ++i)
1878     {
1879         VmaAllocationInfo info;
1880         vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info);
1881 
1882         AllocInfo *allocInfo = (AllocInfo *)info.pUserData;
1883 
1884         if(allocInfo->m_Image)
1885         {
1886             VkImage newImage;
1887 
1888             const VkResult result = vkCreateImage(g_hDevice, &allocInfo->m_ImageInfo, g_Allocs, &newImage);
1889             TEST(result >= VK_SUCCESS);
1890 
1891             vkBindImageMemory(g_hDevice, newImage, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset);
1892             allocInfo->m_NewImage = newImage;
1893 
1894             // Keep track of our pipeline stages that we need to wait/signal on
1895             beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1896             finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1897 
1898             // We need one pipeline barrier and two image layout transitions here
1899             // First we'll have to turn our newly created image into VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
1900             // And the second one is turning the old image into VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
1901 
1902             VkImageSubresourceRange subresourceRange = {
1903                 VK_IMAGE_ASPECT_COLOR_BIT,
1904                 0, VK_REMAINING_MIP_LEVELS,
1905                 0, VK_REMAINING_ARRAY_LAYERS
1906             };
1907 
1908             VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
1909             barrier.srcAccessMask = 0;
1910             barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
1911             barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
1912             barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1913             barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1914             barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1915             barrier.image = newImage;
1916             barrier.subresourceRange = subresourceRange;
1917 
1918             beginImageBarriers.push_back(barrier);
1919 
1920             // Second barrier to convert the existing image. This one actually needs a real barrier
1921             barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
1922             barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
1923             barrier.oldLayout = allocInfo->m_ImageLayout;
1924             barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1925             barrier.image = allocInfo->m_Image;
1926 
1927             beginImageBarriers.push_back(barrier);
1928 
1929             // And lastly we need a barrier that turns our new image into the layout of the old one
1930             barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1931             barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
1932             barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1933             barrier.newLayout = allocInfo->m_ImageLayout;
1934             barrier.image = newImage;
1935 
1936             finalizeImageBarriers.push_back(barrier);
1937         }
1938         else if(allocInfo->m_Buffer)
1939         {
1940             VkBuffer newBuffer;
1941 
1942             const VkResult result = vkCreateBuffer(g_hDevice, &allocInfo->m_BufferInfo, g_Allocs, &newBuffer);
1943             TEST(result >= VK_SUCCESS);
1944 
1945             vkBindBufferMemory(g_hDevice, newBuffer, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset);
1946             allocInfo->m_NewBuffer = newBuffer;
1947 
1948             // Keep track of our pipeline stages that we need to wait/signal on
1949             beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1950             finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1951 
1952             beginMemoryBarrier.srcAccessMask |= VK_ACCESS_MEMORY_WRITE_BIT;
1953             beginMemoryBarrier.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT;
1954 
1955             finalizeMemoryBarrier.srcAccessMask |= VK_ACCESS_TRANSFER_WRITE_BIT;
1956             finalizeMemoryBarrier.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT;
1957 
1958             wantsMemoryBarrier = true;
1959         }
1960     }
1961 
1962     if(!beginImageBarriers.empty() || wantsMemoryBarrier)
1963     {
1964         const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0;
1965 
1966         vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, beginSrcStageMask, beginDstStageMask, 0,
1967             memoryBarrierCount, &beginMemoryBarrier,
1968             0, nullptr,
1969             (uint32_t)beginImageBarriers.size(), beginImageBarriers.data());
1970     }
1971 
1972     for(uint32_t i = 0; i < stepInfo.moveCount; ++ i)
1973     {
1974         VmaAllocationInfo info;
1975         vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info);
1976 
1977         AllocInfo *allocInfo = (AllocInfo *)info.pUserData;
1978 
1979         if(allocInfo->m_Image)
1980         {
1981             std::vector<VkImageCopy> imageCopies;
1982 
1983             // Copy all mips of the source image into the target image
1984             VkOffset3D offset = { 0, 0, 0 };
1985             VkExtent3D extent = allocInfo->m_ImageInfo.extent;
1986 
1987             VkImageSubresourceLayers subresourceLayers = {
1988                 VK_IMAGE_ASPECT_COLOR_BIT,
1989                 0,
1990                 0, 1
1991             };
1992 
1993             for(uint32_t mip = 0; mip < allocInfo->m_ImageInfo.mipLevels; ++ mip)
1994             {
1995                 subresourceLayers.mipLevel = mip;
1996 
1997                 VkImageCopy imageCopy{
1998                     subresourceLayers,
1999                     offset,
2000                     subresourceLayers,
2001                     offset,
2002                     extent
2003                 };
2004 
2005                 imageCopies.push_back(imageCopy);
2006 
2007                 extent.width = std::max(uint32_t(1), extent.width >> 1);
2008                 extent.height = std::max(uint32_t(1), extent.height >> 1);
2009                 extent.depth = std::max(uint32_t(1), extent.depth >> 1);
2010             }
2011 
2012             vkCmdCopyImage(
2013                 g_hTemporaryCommandBuffer,
2014                 allocInfo->m_Image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2015                 allocInfo->m_NewImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
2016                 (uint32_t)imageCopies.size(), imageCopies.data());
2017         }
2018         else if(allocInfo->m_Buffer)
2019         {
2020             VkBufferCopy region = {
2021                 0,
2022                 0,
2023                 allocInfo->m_BufferInfo.size };
2024 
2025             vkCmdCopyBuffer(g_hTemporaryCommandBuffer,
2026                 allocInfo->m_Buffer, allocInfo->m_NewBuffer,
2027                 1, &region);
2028         }
2029     }
2030 
2031     if(!finalizeImageBarriers.empty() || wantsMemoryBarrier)
2032     {
2033         const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0;
2034 
2035         vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, finalizeSrcStageMask, finalizeDstStageMask, 0,
2036             memoryBarrierCount, &finalizeMemoryBarrier,
2037             0, nullptr,
2038             (uint32_t)finalizeImageBarriers.size(), finalizeImageBarriers.data());
2039     }
2040 }
2041 
2042 
TestDefragmentationIncrementalBasic()2043 static void TestDefragmentationIncrementalBasic()
2044 {
2045     wprintf(L"Test defragmentation incremental basic\n");
2046     g_MemoryAliasingWarningEnabled = false;
2047 
2048     std::vector<AllocInfo> allocations;
2049 
2050     // Create that many allocations to surely fill 3 new blocks of 256 MB.
2051     const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 };
2052     const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
2053     const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
2054     const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
2055     const size_t imageCount = totalSize / ((size_t)imageSizes[0] * imageSizes[0] * 4) / 2;
2056     const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2;
2057     const size_t percentToLeave = 30;
2058     RandomNumberGenerator rand = { 234522 };
2059 
2060     VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
2061     imageInfo.imageType = VK_IMAGE_TYPE_2D;
2062     imageInfo.extent.depth = 1;
2063     imageInfo.mipLevels = 1;
2064     imageInfo.arrayLayers = 1;
2065     imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
2066     imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
2067     imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
2068     imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
2069     imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
2070 
2071     VmaAllocationCreateInfo allocCreateInfo = {};
2072     allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2073     allocCreateInfo.flags = 0;
2074 
2075     // Create all intended images.
2076     for(size_t i = 0; i < imageCount; ++i)
2077     {
2078         const uint32_t size = imageSizes[rand.Generate() % 3];
2079 
2080         imageInfo.extent.width = size;
2081         imageInfo.extent.height = size;
2082 
2083         AllocInfo alloc;
2084         alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
2085         alloc.m_StartValue = 0;
2086 
2087         allocations.push_back(alloc);
2088     }
2089 
2090     // And all buffers
2091     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2092 
2093     for(size_t i = 0; i < bufCount; ++i)
2094     {
2095         bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
2096         bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2097 
2098         AllocInfo alloc;
2099         alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
2100         alloc.m_StartValue = 0;
2101 
2102         allocations.push_back(alloc);
2103     }
2104 
2105     // Destroy some percentage of them.
2106     {
2107         const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100);
2108         for(size_t i = 0; i < allocationsToDestroy; ++i)
2109         {
2110             const size_t index = rand.Generate() % allocations.size();
2111             allocations[index].Destroy();
2112             allocations.erase(allocations.begin() + index);
2113         }
2114     }
2115 
2116     {
2117         // Set our user data pointers. A real application should probably be more clever here
2118         const size_t allocationCount = allocations.size();
2119         for(size_t i = 0; i < allocationCount; ++i)
2120         {
2121             AllocInfo &alloc = allocations[i];
2122             vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc);
2123         }
2124     }
2125 
2126     // Fill them with meaningful data.
2127     UploadGpuData(allocations.data(), allocations.size());
2128 
2129     wchar_t fileName[MAX_PATH];
2130     swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_A_before.json");
2131     SaveAllocatorStatsToFile(fileName);
2132 
2133     // Defragment using GPU only.
2134     {
2135         const size_t allocCount = allocations.size();
2136 
2137         std::vector<VmaAllocation> allocationPtrs;
2138 
2139         for(size_t i = 0; i < allocCount; ++i)
2140         {
2141             allocationPtrs.push_back(allocations[i].m_Allocation);
2142         }
2143 
2144         const size_t movableAllocCount = allocationPtrs.size();
2145 
2146         VmaDefragmentationInfo2 defragInfo = {};
2147         defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL;
2148         defragInfo.allocationCount = (uint32_t)movableAllocCount;
2149         defragInfo.pAllocations = allocationPtrs.data();
2150         defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
2151         defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
2152 
2153         VmaDefragmentationStats stats = {};
2154         VmaDefragmentationContext ctx = VK_NULL_HANDLE;
2155         VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
2156         TEST(res >= VK_SUCCESS);
2157 
2158         res = VK_NOT_READY;
2159 
2160         std::vector<VmaDefragmentationPassMoveInfo> moveInfo;
2161         moveInfo.resize(movableAllocCount);
2162 
2163         while(res == VK_NOT_READY)
2164         {
2165             VmaDefragmentationPassInfo stepInfo = {};
2166             stepInfo.pMoves = moveInfo.data();
2167             stepInfo.moveCount = (uint32_t)moveInfo.size();
2168 
2169             res = vmaBeginDefragmentationPass(g_hAllocator, ctx, &stepInfo);
2170             TEST(res >= VK_SUCCESS);
2171 
2172             BeginSingleTimeCommands();
2173             std::vector<void*> newHandles;
2174             ProcessDefragmentationStepInfo(stepInfo);
2175             EndSingleTimeCommands();
2176 
2177             res = vmaEndDefragmentationPass(g_hAllocator, ctx);
2178 
2179             // Destroy old buffers/images and replace them with new handles.
2180             for(size_t i = 0; i < stepInfo.moveCount; ++i)
2181             {
2182                 VmaAllocation const alloc = stepInfo.pMoves[i].allocation;
2183                 VmaAllocationInfo vmaAllocInfo;
2184                 vmaGetAllocationInfo(g_hAllocator, alloc, &vmaAllocInfo);
2185                 AllocInfo* allocInfo = (AllocInfo*)vmaAllocInfo.pUserData;
2186                 if(allocInfo->m_Buffer)
2187                 {
2188                     assert(allocInfo->m_NewBuffer && !allocInfo->m_Image && !allocInfo->m_NewImage);
2189                     vkDestroyBuffer(g_hDevice, allocInfo->m_Buffer, g_Allocs);
2190                     allocInfo->m_Buffer = allocInfo->m_NewBuffer;
2191                     allocInfo->m_NewBuffer = VK_NULL_HANDLE;
2192                 }
2193                 else if(allocInfo->m_Image)
2194                 {
2195                     assert(allocInfo->m_NewImage && !allocInfo->m_Buffer && !allocInfo->m_NewBuffer);
2196                     vkDestroyImage(g_hDevice, allocInfo->m_Image, g_Allocs);
2197                     allocInfo->m_Image = allocInfo->m_NewImage;
2198                     allocInfo->m_NewImage = VK_NULL_HANDLE;
2199                 }
2200                 else
2201                     assert(0);
2202             }
2203         }
2204 
2205         TEST(res >= VK_SUCCESS);
2206         vmaDefragmentationEnd(g_hAllocator, ctx);
2207 
2208         // If corruption detection is enabled, GPU defragmentation may not work on
2209         // memory types that have this detection active, e.g. on Intel.
2210 #if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
2211         TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
2212         TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
2213 #endif
2214     }
2215 
2216     //ValidateGpuData(allocations.data(), allocations.size());
2217 
2218     swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_B_after.json");
2219     SaveAllocatorStatsToFile(fileName);
2220 
2221     // Destroy all remaining buffers and images.
2222     for(size_t i = allocations.size(); i--; )
2223     {
2224         allocations[i].Destroy();
2225     }
2226 
2227     g_MemoryAliasingWarningEnabled = true;
2228 }
2229 
TestDefragmentationIncrementalComplex()2230 void TestDefragmentationIncrementalComplex()
2231 {
2232     wprintf(L"Test defragmentation incremental complex\n");
2233     g_MemoryAliasingWarningEnabled = false;
2234 
2235     std::vector<AllocInfo> allocations;
2236 
2237     // Create that many allocations to surely fill 3 new blocks of 256 MB.
2238     const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 };
2239     const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
2240     const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
2241     const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
2242     const size_t imageCount = (size_t)(totalSize / (imageSizes[0] * imageSizes[0] * 4)) / 2;
2243     const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2;
2244     const size_t percentToLeave = 30;
2245     RandomNumberGenerator rand = { 234522 };
2246 
2247     VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
2248     imageInfo.imageType = VK_IMAGE_TYPE_2D;
2249     imageInfo.extent.depth = 1;
2250     imageInfo.mipLevels = 1;
2251     imageInfo.arrayLayers = 1;
2252     imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
2253     imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
2254     imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
2255     imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
2256     imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
2257 
2258     VmaAllocationCreateInfo allocCreateInfo = {};
2259     allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2260     allocCreateInfo.flags = 0;
2261 
2262     // Create all intended images.
2263     for(size_t i = 0; i < imageCount; ++i)
2264     {
2265         const uint32_t size = imageSizes[rand.Generate() % 3];
2266 
2267         imageInfo.extent.width = size;
2268         imageInfo.extent.height = size;
2269 
2270         AllocInfo alloc;
2271         alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
2272         alloc.m_StartValue = 0;
2273 
2274         allocations.push_back(alloc);
2275     }
2276 
2277     // And all buffers
2278     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2279 
2280     for(size_t i = 0; i < bufCount; ++i)
2281     {
2282         bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
2283         bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2284 
2285         AllocInfo alloc;
2286         alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
2287         alloc.m_StartValue = 0;
2288 
2289         allocations.push_back(alloc);
2290     }
2291 
2292     // Destroy some percentage of them.
2293     {
2294         const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100);
2295         for(size_t i = 0; i < allocationsToDestroy; ++i)
2296         {
2297             const size_t index = rand.Generate() % allocations.size();
2298             allocations[index].Destroy();
2299             allocations.erase(allocations.begin() + index);
2300         }
2301     }
2302 
2303     {
2304         // Set our user data pointers. A real application should probably be more clever here
2305         const size_t allocationCount = allocations.size();
2306         for(size_t i = 0; i < allocationCount; ++i)
2307         {
2308             AllocInfo &alloc = allocations[i];
2309             vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc);
2310         }
2311     }
2312 
2313     // Fill them with meaningful data.
2314     UploadGpuData(allocations.data(), allocations.size());
2315 
2316     wchar_t fileName[MAX_PATH];
2317     swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_A_before.json");
2318     SaveAllocatorStatsToFile(fileName);
2319 
2320     std::vector<AllocInfo> additionalAllocations;
2321 
2322 #define MakeAdditionalAllocation() \
2323     do { \
2324         { \
2325             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16); \
2326             bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; \
2327             \
2328             AllocInfo alloc; \
2329             alloc.CreateBuffer(bufCreateInfo, allocCreateInfo); \
2330             \
2331             additionalAllocations.push_back(alloc); \
2332         } \
2333     } while(0)
2334 
2335     // Defragment using GPU only.
2336     {
2337         const size_t allocCount = allocations.size();
2338 
2339         std::vector<VmaAllocation> allocationPtrs;
2340 
2341         for(size_t i = 0; i < allocCount; ++i)
2342         {
2343             VmaAllocationInfo allocInfo = {};
2344             vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo);
2345 
2346             allocationPtrs.push_back(allocations[i].m_Allocation);
2347         }
2348 
2349         const size_t movableAllocCount = allocationPtrs.size();
2350 
2351         VmaDefragmentationInfo2 defragInfo = {};
2352         defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL;
2353         defragInfo.allocationCount = (uint32_t)movableAllocCount;
2354         defragInfo.pAllocations = allocationPtrs.data();
2355         defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
2356         defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
2357 
2358         VmaDefragmentationStats stats = {};
2359         VmaDefragmentationContext ctx = VK_NULL_HANDLE;
2360         VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
2361         TEST(res >= VK_SUCCESS);
2362 
2363         res = VK_NOT_READY;
2364 
2365         std::vector<VmaDefragmentationPassMoveInfo> moveInfo;
2366         moveInfo.resize(movableAllocCount);
2367 
2368         MakeAdditionalAllocation();
2369 
2370         while(res == VK_NOT_READY)
2371         {
2372             VmaDefragmentationPassInfo stepInfo = {};
2373             stepInfo.pMoves = moveInfo.data();
2374             stepInfo.moveCount = (uint32_t)moveInfo.size();
2375 
2376             res = vmaBeginDefragmentationPass(g_hAllocator, ctx, &stepInfo);
2377             TEST(res >= VK_SUCCESS);
2378 
2379             MakeAdditionalAllocation();
2380 
2381             BeginSingleTimeCommands();
2382             ProcessDefragmentationStepInfo(stepInfo);
2383             EndSingleTimeCommands();
2384 
2385             res = vmaEndDefragmentationPass(g_hAllocator, ctx);
2386 
2387             // Destroy old buffers/images and replace them with new handles.
2388             for(size_t i = 0; i < stepInfo.moveCount; ++i)
2389             {
2390                 VmaAllocation const alloc = stepInfo.pMoves[i].allocation;
2391                 VmaAllocationInfo vmaAllocInfo;
2392                 vmaGetAllocationInfo(g_hAllocator, alloc, &vmaAllocInfo);
2393                 AllocInfo* allocInfo = (AllocInfo*)vmaAllocInfo.pUserData;
2394                 if(allocInfo->m_Buffer)
2395                 {
2396                     assert(allocInfo->m_NewBuffer && !allocInfo->m_Image && !allocInfo->m_NewImage);
2397                     vkDestroyBuffer(g_hDevice, allocInfo->m_Buffer, g_Allocs);
2398                     allocInfo->m_Buffer = allocInfo->m_NewBuffer;
2399                     allocInfo->m_NewBuffer = VK_NULL_HANDLE;
2400                 }
2401                 else if(allocInfo->m_Image)
2402                 {
2403                     assert(allocInfo->m_NewImage && !allocInfo->m_Buffer && !allocInfo->m_NewBuffer);
2404                     vkDestroyImage(g_hDevice, allocInfo->m_Image, g_Allocs);
2405                     allocInfo->m_Image = allocInfo->m_NewImage;
2406                     allocInfo->m_NewImage = VK_NULL_HANDLE;
2407                 }
2408                 else
2409                     assert(0);
2410             }
2411 
2412             MakeAdditionalAllocation();
2413         }
2414 
2415         TEST(res >= VK_SUCCESS);
2416         vmaDefragmentationEnd(g_hAllocator, ctx);
2417 
2418         // If corruption detection is enabled, GPU defragmentation may not work on
2419         // memory types that have this detection active, e.g. on Intel.
2420 #if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
2421         TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
2422         TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
2423 #endif
2424     }
2425 
2426     //ValidateGpuData(allocations.data(), allocations.size());
2427 
2428     swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_B_after.json");
2429     SaveAllocatorStatsToFile(fileName);
2430 
2431     // Destroy all remaining buffers.
2432     for(size_t i = allocations.size(); i--; )
2433     {
2434         allocations[i].Destroy();
2435     }
2436 
2437     for(size_t i = additionalAllocations.size(); i--; )
2438     {
2439         additionalAllocations[i].Destroy();
2440     }
2441 
2442     g_MemoryAliasingWarningEnabled = true;
2443 }
2444 
2445 
TestUserData()2446 static void TestUserData()
2447 {
2448     VkResult res;
2449 
2450     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2451     bufCreateInfo.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
2452     bufCreateInfo.size = 0x10000;
2453 
2454     for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
2455     {
2456         // Opaque pointer
2457         {
2458 
2459             void* numberAsPointer = (void*)(size_t)0xC2501FF3u;
2460             void* pointerToSomething = &res;
2461 
2462             VmaAllocationCreateInfo allocCreateInfo = {};
2463             allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2464             allocCreateInfo.pUserData = numberAsPointer;
2465             if(testIndex == 1)
2466                 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2467 
2468             VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo;
2469             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2470             TEST(res == VK_SUCCESS);
2471             TEST(allocInfo.pUserData = numberAsPointer);
2472 
2473             vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2474             TEST(allocInfo.pUserData == numberAsPointer);
2475 
2476             vmaSetAllocationUserData(g_hAllocator, alloc, pointerToSomething);
2477             vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2478             TEST(allocInfo.pUserData == pointerToSomething);
2479 
2480             vmaDestroyBuffer(g_hAllocator, buf, alloc);
2481         }
2482 
2483         // String
2484         {
2485             const char* name1 = "Buffer name \\\"\'<>&% \nSecond line .,;=";
2486             const char* name2 = "2";
2487             const size_t name1Len = strlen(name1);
2488 
2489             char* name1Buf = new char[name1Len + 1];
2490             strcpy_s(name1Buf, name1Len + 1, name1);
2491 
2492             VmaAllocationCreateInfo allocCreateInfo = {};
2493             allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2494             allocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT;
2495             allocCreateInfo.pUserData = name1Buf;
2496             if(testIndex == 1)
2497                 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2498 
2499             VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo;
2500             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2501             TEST(res == VK_SUCCESS);
2502             TEST(allocInfo.pUserData != nullptr && allocInfo.pUserData != name1Buf);
2503             TEST(strcmp(name1, (const char*)allocInfo.pUserData) == 0);
2504 
2505             delete[] name1Buf;
2506 
2507             vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2508             TEST(strcmp(name1, (const char*)allocInfo.pUserData) == 0);
2509 
2510             vmaSetAllocationUserData(g_hAllocator, alloc, (void*)name2);
2511             vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2512             TEST(strcmp(name2, (const char*)allocInfo.pUserData) == 0);
2513 
2514             vmaSetAllocationUserData(g_hAllocator, alloc, nullptr);
2515             vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2516             TEST(allocInfo.pUserData == nullptr);
2517 
2518             vmaDestroyBuffer(g_hAllocator, buf, alloc);
2519         }
2520     }
2521 }
2522 
TestInvalidAllocations()2523 static void TestInvalidAllocations()
2524 {
2525     VkResult res;
2526 
2527     VmaAllocationCreateInfo allocCreateInfo = {};
2528     allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2529 
2530     // Try to allocate 0 bytes.
2531     {
2532         VkMemoryRequirements memReq = {};
2533         memReq.size = 0; // !!!
2534         memReq.alignment = 4;
2535         memReq.memoryTypeBits = UINT32_MAX;
2536         VmaAllocation alloc = VK_NULL_HANDLE;
2537         res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
2538         TEST(res == VK_ERROR_VALIDATION_FAILED_EXT && alloc == VK_NULL_HANDLE);
2539     }
2540 
2541     // Try to create buffer with size = 0.
2542     {
2543         VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2544         bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2545         bufCreateInfo.size = 0; // !!!
2546         VkBuffer buf = VK_NULL_HANDLE;
2547         VmaAllocation alloc = VK_NULL_HANDLE;
2548         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr);
2549         TEST(res == VK_ERROR_VALIDATION_FAILED_EXT && buf == VK_NULL_HANDLE && alloc == VK_NULL_HANDLE);
2550     }
2551 
2552     // Try to create image with one dimension = 0.
2553     {
2554         VkImageCreateInfo imageCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2555         imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
2556         imageCreateInfo.format = VK_FORMAT_B8G8R8A8_UNORM;
2557         imageCreateInfo.extent.width = 128;
2558         imageCreateInfo.extent.height = 0; // !!!
2559         imageCreateInfo.extent.depth = 1;
2560         imageCreateInfo.mipLevels = 1;
2561         imageCreateInfo.arrayLayers = 1;
2562         imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
2563         imageCreateInfo.tiling = VK_IMAGE_TILING_LINEAR;
2564         imageCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
2565         imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
2566         VkImage image = VK_NULL_HANDLE;
2567         VmaAllocation alloc = VK_NULL_HANDLE;
2568         res = vmaCreateImage(g_hAllocator, &imageCreateInfo, &allocCreateInfo, &image, &alloc, nullptr);
2569         TEST(res == VK_ERROR_VALIDATION_FAILED_EXT && image == VK_NULL_HANDLE && alloc == VK_NULL_HANDLE);
2570     }
2571 }
2572 
TestMemoryRequirements()2573 static void TestMemoryRequirements()
2574 {
2575     VkResult res;
2576     VkBuffer buf;
2577     VmaAllocation alloc;
2578     VmaAllocationInfo allocInfo;
2579 
2580     const VkPhysicalDeviceMemoryProperties* memProps;
2581     vmaGetMemoryProperties(g_hAllocator, &memProps);
2582 
2583     VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2584     bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2585     bufInfo.size = 128;
2586 
2587     VmaAllocationCreateInfo allocCreateInfo = {};
2588 
2589     // No requirements.
2590     res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2591     TEST(res == VK_SUCCESS);
2592     vmaDestroyBuffer(g_hAllocator, buf, alloc);
2593 
2594     // Usage.
2595     allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2596     allocCreateInfo.requiredFlags = 0;
2597     allocCreateInfo.preferredFlags = 0;
2598     allocCreateInfo.memoryTypeBits = UINT32_MAX;
2599 
2600     res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2601     TEST(res == VK_SUCCESS);
2602     TEST(memProps->memoryTypes[allocInfo.memoryType].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
2603     vmaDestroyBuffer(g_hAllocator, buf, alloc);
2604 
2605     // Required flags, preferred flags.
2606     allocCreateInfo.usage = VMA_MEMORY_USAGE_UNKNOWN;
2607     allocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
2608     allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
2609     allocCreateInfo.memoryTypeBits = 0;
2610 
2611     res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2612     TEST(res == VK_SUCCESS);
2613     TEST(memProps->memoryTypes[allocInfo.memoryType].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
2614     TEST(memProps->memoryTypes[allocInfo.memoryType].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
2615     vmaDestroyBuffer(g_hAllocator, buf, alloc);
2616 
2617     // memoryTypeBits.
2618     const uint32_t memType = allocInfo.memoryType;
2619     allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2620     allocCreateInfo.requiredFlags = 0;
2621     allocCreateInfo.preferredFlags = 0;
2622     allocCreateInfo.memoryTypeBits = 1u << memType;
2623 
2624     res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2625     TEST(res == VK_SUCCESS);
2626     TEST(allocInfo.memoryType == memType);
2627     vmaDestroyBuffer(g_hAllocator, buf, alloc);
2628 
2629 }
2630 
TestGetAllocatorInfo()2631 static void TestGetAllocatorInfo()
2632 {
2633     wprintf(L"Test vnaGetAllocatorInfo\n");
2634 
2635     VmaAllocatorInfo allocInfo = {};
2636     vmaGetAllocatorInfo(g_hAllocator, &allocInfo);
2637     TEST(allocInfo.instance == g_hVulkanInstance);
2638     TEST(allocInfo.physicalDevice == g_hPhysicalDevice);
2639     TEST(allocInfo.device == g_hDevice);
2640 }
2641 
TestBasics()2642 static void TestBasics()
2643 {
2644     VkResult res;
2645 
2646     TestGetAllocatorInfo();
2647 
2648     TestMemoryRequirements();
2649 
2650     // Lost allocation
2651     {
2652         VmaAllocation alloc = VK_NULL_HANDLE;
2653         vmaCreateLostAllocation(g_hAllocator, &alloc);
2654         TEST(alloc != VK_NULL_HANDLE);
2655 
2656         VmaAllocationInfo allocInfo;
2657         vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2658         TEST(allocInfo.deviceMemory == VK_NULL_HANDLE);
2659         TEST(allocInfo.size == 0);
2660 
2661         vmaFreeMemory(g_hAllocator, alloc);
2662     }
2663 
2664     // Allocation that is MAPPED and not necessarily HOST_VISIBLE.
2665     {
2666         VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2667         bufCreateInfo.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
2668         bufCreateInfo.size = 128;
2669 
2670         VmaAllocationCreateInfo allocCreateInfo = {};
2671         allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2672         allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
2673 
2674         VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo;
2675         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2676         TEST(res == VK_SUCCESS);
2677 
2678         vmaDestroyBuffer(g_hAllocator, buf, alloc);
2679 
2680         // Same with OWN_MEMORY.
2681         allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2682 
2683         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2684         TEST(res == VK_SUCCESS);
2685 
2686         vmaDestroyBuffer(g_hAllocator, buf, alloc);
2687     }
2688 
2689     TestUserData();
2690 
2691     TestInvalidAllocations();
2692 }
2693 
TestPool_MinBlockCount()2694 static void TestPool_MinBlockCount()
2695 {
2696 #if defined(VMA_DEBUG_MARGIN) && VMA_DEBUG_MARGIN > 0
2697     return;
2698 #endif
2699 
2700     wprintf(L"Test Pool MinBlockCount\n");
2701     VkResult res;
2702 
2703     static const VkDeviceSize ALLOC_SIZE = 512ull * 1024;
2704     static const VkDeviceSize BLOCK_SIZE = ALLOC_SIZE * 2; // Each block can fit 2 allocations.
2705 
2706     VmaAllocationCreateInfo allocCreateInfo = {};
2707     allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_COPY;
2708 
2709     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2710     bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2711     bufCreateInfo.size = ALLOC_SIZE;
2712 
2713     VmaPoolCreateInfo poolCreateInfo = {};
2714     poolCreateInfo.blockSize = BLOCK_SIZE;
2715     poolCreateInfo.minBlockCount = 2; // At least 2 blocks always present.
2716     res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex);
2717     TEST(res == VK_SUCCESS);
2718 
2719     VmaPool pool = VK_NULL_HANDLE;
2720     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
2721     TEST(res == VK_SUCCESS && pool != VK_NULL_HANDLE);
2722 
2723     // Check that there are 2 blocks preallocated as requested.
2724     VmaPoolStats begPoolStats = {};
2725     vmaGetPoolStats(g_hAllocator, pool, &begPoolStats);
2726     TEST(begPoolStats.blockCount == 2 && begPoolStats.allocationCount == 0 && begPoolStats.size == BLOCK_SIZE * 2);
2727 
2728     // Allocate 5 buffers to create 3 blocks.
2729     static const uint32_t BUF_COUNT = 5;
2730     allocCreateInfo.pool = pool;
2731     std::vector<AllocInfo> allocs(BUF_COUNT);
2732     for(uint32_t i = 0; i < BUF_COUNT; ++i)
2733     {
2734         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &allocs[i].m_Buffer, &allocs[i].m_Allocation, nullptr);
2735         TEST(res == VK_SUCCESS && allocs[i].m_Buffer != VK_NULL_HANDLE && allocs[i].m_Allocation != VK_NULL_HANDLE);
2736     }
2737 
2738     // Check that there are really 3 blocks.
2739     VmaPoolStats poolStats2 = {};
2740     vmaGetPoolStats(g_hAllocator, pool, &poolStats2);
2741     TEST(poolStats2.blockCount == 3 && poolStats2.allocationCount == BUF_COUNT && poolStats2.size == BLOCK_SIZE * 3);
2742 
2743     // Free two first allocations to make one block empty.
2744     allocs[0].Destroy();
2745     allocs[1].Destroy();
2746 
2747     // Check that there are still 3 blocks due to hysteresis.
2748     VmaPoolStats poolStats3 = {};
2749     vmaGetPoolStats(g_hAllocator, pool, &poolStats3);
2750     TEST(poolStats3.blockCount == 3 && poolStats3.allocationCount == BUF_COUNT - 2 && poolStats2.size == BLOCK_SIZE * 3);
2751 
2752     // Free the last allocation to make second block empty.
2753     allocs[BUF_COUNT - 1].Destroy();
2754 
2755     // Check that there are now 2 blocks only.
2756     VmaPoolStats poolStats4 = {};
2757     vmaGetPoolStats(g_hAllocator, pool, &poolStats4);
2758     TEST(poolStats4.blockCount == 2 && poolStats4.allocationCount == BUF_COUNT - 3 && poolStats4.size == BLOCK_SIZE * 2);
2759 
2760     // Cleanup.
2761     for(size_t i = allocs.size(); i--; )
2762     {
2763         allocs[i].Destroy();
2764     }
2765     vmaDestroyPool(g_hAllocator, pool);
2766 }
2767 
TestHeapSizeLimit()2768 void TestHeapSizeLimit()
2769 {
2770     const VkDeviceSize HEAP_SIZE_LIMIT = 100ull * 1024 * 1024; // 100 MB
2771     const VkDeviceSize BLOCK_SIZE      =  10ull * 1024 * 1024; // 10 MB
2772 
2773     VkDeviceSize heapSizeLimit[VK_MAX_MEMORY_HEAPS];
2774     for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i)
2775     {
2776         heapSizeLimit[i] = HEAP_SIZE_LIMIT;
2777     }
2778 
2779     VmaAllocatorCreateInfo allocatorCreateInfo = {};
2780     allocatorCreateInfo.physicalDevice = g_hPhysicalDevice;
2781     allocatorCreateInfo.device = g_hDevice;
2782     allocatorCreateInfo.instance = g_hVulkanInstance;
2783     allocatorCreateInfo.pHeapSizeLimit = heapSizeLimit;
2784 
2785     VmaAllocator hAllocator;
2786     VkResult res = vmaCreateAllocator(&allocatorCreateInfo, &hAllocator);
2787     TEST(res == VK_SUCCESS);
2788 
2789     struct Item
2790     {
2791         VkBuffer hBuf;
2792         VmaAllocation hAlloc;
2793     };
2794     std::vector<Item> items;
2795 
2796     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2797     bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
2798 
2799     // 1. Allocate two blocks of dedicated memory, half the size of BLOCK_SIZE.
2800     VmaAllocationInfo dedicatedAllocInfo;
2801     {
2802         VmaAllocationCreateInfo allocCreateInfo = {};
2803         allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2804         allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2805 
2806         bufCreateInfo.size = BLOCK_SIZE / 2;
2807 
2808         for(size_t i = 0; i < 2; ++i)
2809         {
2810             Item item;
2811             res = vmaCreateBuffer(hAllocator, &bufCreateInfo, &allocCreateInfo, &item.hBuf, &item.hAlloc, &dedicatedAllocInfo);
2812             TEST(res == VK_SUCCESS);
2813             items.push_back(item);
2814         }
2815     }
2816 
2817     // Create pool to make sure allocations must be out of this memory type.
2818     VmaPoolCreateInfo poolCreateInfo = {};
2819     poolCreateInfo.memoryTypeIndex = dedicatedAllocInfo.memoryType;
2820     poolCreateInfo.blockSize = BLOCK_SIZE;
2821 
2822     VmaPool hPool;
2823     res = vmaCreatePool(hAllocator, &poolCreateInfo, &hPool);
2824     TEST(res == VK_SUCCESS);
2825 
2826     // 2. Allocate normal buffers from all the remaining memory.
2827     {
2828         VmaAllocationCreateInfo allocCreateInfo = {};
2829         allocCreateInfo.pool = hPool;
2830 
2831         bufCreateInfo.size = BLOCK_SIZE / 2;
2832 
2833         const size_t bufCount = ((HEAP_SIZE_LIMIT / BLOCK_SIZE) - 1) * 2;
2834         for(size_t i = 0; i < bufCount; ++i)
2835         {
2836             Item item;
2837             res = vmaCreateBuffer(hAllocator, &bufCreateInfo, &allocCreateInfo, &item.hBuf, &item.hAlloc, nullptr);
2838             TEST(res == VK_SUCCESS);
2839             items.push_back(item);
2840         }
2841     }
2842 
2843     // 3. Allocation of one more (even small) buffer should fail.
2844     {
2845         VmaAllocationCreateInfo allocCreateInfo = {};
2846         allocCreateInfo.pool = hPool;
2847 
2848         bufCreateInfo.size = 128;
2849 
2850         VkBuffer hBuf;
2851         VmaAllocation hAlloc;
2852         res = vmaCreateBuffer(hAllocator, &bufCreateInfo, &allocCreateInfo, &hBuf, &hAlloc, nullptr);
2853         TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY);
2854     }
2855 
2856     // Destroy everything.
2857     for(size_t i = items.size(); i--; )
2858     {
2859         vmaDestroyBuffer(hAllocator, items[i].hBuf, items[i].hAlloc);
2860     }
2861 
2862     vmaDestroyPool(hAllocator, hPool);
2863 
2864     vmaDestroyAllocator(hAllocator);
2865 }
2866 
2867 #if VMA_DEBUG_MARGIN
TestDebugMargin()2868 static void TestDebugMargin()
2869 {
2870     if(VMA_DEBUG_MARGIN == 0)
2871     {
2872         return;
2873     }
2874 
2875     VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2876     bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2877 
2878     VmaAllocationCreateInfo allocCreateInfo = {};
2879     allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2880 
2881     // Create few buffers of different size.
2882     const size_t BUF_COUNT = 10;
2883     BufferInfo buffers[BUF_COUNT];
2884     VmaAllocationInfo allocInfo[BUF_COUNT];
2885     for(size_t i = 0; i < 10; ++i)
2886     {
2887         bufInfo.size = (VkDeviceSize)(i + 1) * 64;
2888         // Last one will be mapped.
2889         allocCreateInfo.flags = (i == BUF_COUNT - 1) ? VMA_ALLOCATION_CREATE_MAPPED_BIT : 0;
2890 
2891         VkResult res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buffers[i].Buffer, &buffers[i].Allocation, &allocInfo[i]);
2892         TEST(res == VK_SUCCESS);
2893         // Margin is preserved also at the beginning of a block.
2894         TEST(allocInfo[i].offset >= VMA_DEBUG_MARGIN);
2895 
2896         if(i == BUF_COUNT - 1)
2897         {
2898             // Fill with data.
2899             TEST(allocInfo[i].pMappedData != nullptr);
2900             // Uncomment this "+ 1" to overwrite past end of allocation and check corruption detection.
2901             memset(allocInfo[i].pMappedData, 0xFF, bufInfo.size /* + 1 */);
2902         }
2903     }
2904 
2905     // Check if their offsets preserve margin between them.
2906     std::sort(allocInfo, allocInfo + BUF_COUNT, [](const VmaAllocationInfo& lhs, const VmaAllocationInfo& rhs) -> bool
2907     {
2908         if(lhs.deviceMemory != rhs.deviceMemory)
2909         {
2910             return lhs.deviceMemory < rhs.deviceMemory;
2911         }
2912         return lhs.offset < rhs.offset;
2913     });
2914     for(size_t i = 1; i < BUF_COUNT; ++i)
2915     {
2916         if(allocInfo[i].deviceMemory == allocInfo[i - 1].deviceMemory)
2917         {
2918             TEST(allocInfo[i].offset >= allocInfo[i - 1].offset + VMA_DEBUG_MARGIN);
2919         }
2920     }
2921 
2922     VkResult res = vmaCheckCorruption(g_hAllocator, UINT32_MAX);
2923     TEST(res == VK_SUCCESS);
2924 
2925     // Destroy all buffers.
2926     for(size_t i = BUF_COUNT; i--; )
2927     {
2928         vmaDestroyBuffer(g_hAllocator, buffers[i].Buffer, buffers[i].Allocation);
2929     }
2930 }
2931 #endif
2932 
TestLinearAllocator()2933 static void TestLinearAllocator()
2934 {
2935     wprintf(L"Test linear allocator\n");
2936 
2937     RandomNumberGenerator rand{645332};
2938 
2939     VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2940     sampleBufCreateInfo.size = 1024; // Whatever.
2941     sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
2942 
2943     VmaAllocationCreateInfo sampleAllocCreateInfo = {};
2944     sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2945 
2946     VmaPoolCreateInfo poolCreateInfo = {};
2947     VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
2948     TEST(res == VK_SUCCESS);
2949 
2950     poolCreateInfo.blockSize = 1024 * 300;
2951     poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
2952     poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
2953 
2954     VmaPool pool = nullptr;
2955     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
2956     TEST(res == VK_SUCCESS);
2957 
2958     VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
2959 
2960     VmaAllocationCreateInfo allocCreateInfo = {};
2961     allocCreateInfo.pool = pool;
2962 
2963     constexpr size_t maxBufCount = 100;
2964     std::vector<BufferInfo> bufInfo;
2965 
2966     constexpr VkDeviceSize bufSizeMin = 16;
2967     constexpr VkDeviceSize bufSizeMax = 1024;
2968     VmaAllocationInfo allocInfo;
2969     VkDeviceSize prevOffset = 0;
2970 
2971     // Test one-time free.
2972     for(size_t i = 0; i < 2; ++i)
2973     {
2974         // Allocate number of buffers of varying size that surely fit into this block.
2975         VkDeviceSize bufSumSize = 0;
2976         for(size_t i = 0; i < maxBufCount; ++i)
2977         {
2978 			bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
2979             BufferInfo newBufInfo;
2980             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
2981                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
2982             TEST(res == VK_SUCCESS);
2983             TEST(i == 0 || allocInfo.offset > prevOffset);
2984             bufInfo.push_back(newBufInfo);
2985             prevOffset = allocInfo.offset;
2986             bufSumSize += bufCreateInfo.size;
2987         }
2988 
2989         // Validate pool stats.
2990         VmaPoolStats stats;
2991         vmaGetPoolStats(g_hAllocator, pool, &stats);
2992         TEST(stats.size == poolCreateInfo.blockSize);
2993         TEST(stats.unusedSize = poolCreateInfo.blockSize - bufSumSize);
2994         TEST(stats.allocationCount == bufInfo.size());
2995 
2996         // Destroy the buffers in random order.
2997         while(!bufInfo.empty())
2998         {
2999             const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3000             const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3001             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3002             bufInfo.erase(bufInfo.begin() + indexToDestroy);
3003         }
3004     }
3005 
3006     // Test stack.
3007     {
3008         // Allocate number of buffers of varying size that surely fit into this block.
3009         for(size_t i = 0; i < maxBufCount; ++i)
3010         {
3011             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3012             BufferInfo newBufInfo;
3013             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3014                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3015             TEST(res == VK_SUCCESS);
3016             TEST(i == 0 || allocInfo.offset > prevOffset);
3017             bufInfo.push_back(newBufInfo);
3018             prevOffset = allocInfo.offset;
3019         }
3020 
3021         // Destroy few buffers from top of the stack.
3022         for(size_t i = 0; i < maxBufCount / 5; ++i)
3023         {
3024             const BufferInfo& currBufInfo = bufInfo.back();
3025             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3026             bufInfo.pop_back();
3027         }
3028 
3029         // Create some more
3030         for(size_t i = 0; i < maxBufCount / 5; ++i)
3031         {
3032             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3033             BufferInfo newBufInfo;
3034             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3035                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3036             TEST(res == VK_SUCCESS);
3037             TEST(i == 0 || allocInfo.offset > prevOffset);
3038             bufInfo.push_back(newBufInfo);
3039             prevOffset = allocInfo.offset;
3040         }
3041 
3042         // Destroy the buffers in reverse order.
3043         while(!bufInfo.empty())
3044         {
3045             const BufferInfo& currBufInfo = bufInfo.back();
3046             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3047             bufInfo.pop_back();
3048         }
3049     }
3050 
3051     // Test ring buffer.
3052     {
3053         // Allocate number of buffers that surely fit into this block.
3054         bufCreateInfo.size = bufSizeMax;
3055         for(size_t i = 0; i < maxBufCount; ++i)
3056         {
3057             BufferInfo newBufInfo;
3058             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3059                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3060             TEST(res == VK_SUCCESS);
3061             TEST(i == 0 || allocInfo.offset > prevOffset);
3062             bufInfo.push_back(newBufInfo);
3063             prevOffset = allocInfo.offset;
3064         }
3065 
3066         // Free and allocate new buffers so many times that we make sure we wrap-around at least once.
3067         const size_t buffersPerIter = maxBufCount / 10 - 1;
3068         const size_t iterCount = poolCreateInfo.blockSize / bufCreateInfo.size / buffersPerIter * 2;
3069         for(size_t iter = 0; iter < iterCount; ++iter)
3070         {
3071             for(size_t bufPerIter = 0; bufPerIter < buffersPerIter; ++bufPerIter)
3072             {
3073                 const BufferInfo& currBufInfo = bufInfo.front();
3074                 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3075                 bufInfo.erase(bufInfo.begin());
3076             }
3077             for(size_t bufPerIter = 0; bufPerIter < buffersPerIter; ++bufPerIter)
3078             {
3079                 BufferInfo newBufInfo;
3080                 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3081                     &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3082                 TEST(res == VK_SUCCESS);
3083                 bufInfo.push_back(newBufInfo);
3084             }
3085         }
3086 
3087         // Allocate buffers until we reach out-of-memory.
3088         uint32_t debugIndex = 0;
3089         while(res == VK_SUCCESS)
3090         {
3091             BufferInfo newBufInfo;
3092             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3093                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3094             if(res == VK_SUCCESS)
3095             {
3096                 bufInfo.push_back(newBufInfo);
3097             }
3098             else
3099             {
3100                 TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY);
3101             }
3102             ++debugIndex;
3103         }
3104 
3105         // Destroy the buffers in random order.
3106         while(!bufInfo.empty())
3107         {
3108             const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3109             const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3110             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3111             bufInfo.erase(bufInfo.begin() + indexToDestroy);
3112         }
3113     }
3114 
3115     // Test double stack.
3116     {
3117         // Allocate number of buffers of varying size that surely fit into this block, alternate from bottom/top.
3118         VkDeviceSize prevOffsetLower = 0;
3119         VkDeviceSize prevOffsetUpper = poolCreateInfo.blockSize;
3120         for(size_t i = 0; i < maxBufCount; ++i)
3121         {
3122             const bool upperAddress = (i % 2) != 0;
3123             if(upperAddress)
3124                 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3125             else
3126                 allocCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3127             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3128             BufferInfo newBufInfo;
3129             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3130                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3131             TEST(res == VK_SUCCESS);
3132             if(upperAddress)
3133             {
3134                 TEST(allocInfo.offset < prevOffsetUpper);
3135                 prevOffsetUpper = allocInfo.offset;
3136             }
3137             else
3138             {
3139                 TEST(allocInfo.offset >= prevOffsetLower);
3140                 prevOffsetLower = allocInfo.offset;
3141             }
3142             TEST(prevOffsetLower < prevOffsetUpper);
3143             bufInfo.push_back(newBufInfo);
3144         }
3145 
3146         // Destroy few buffers from top of the stack.
3147         for(size_t i = 0; i < maxBufCount / 5; ++i)
3148         {
3149             const BufferInfo& currBufInfo = bufInfo.back();
3150             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3151             bufInfo.pop_back();
3152         }
3153 
3154         // Create some more
3155         for(size_t i = 0; i < maxBufCount / 5; ++i)
3156         {
3157             const bool upperAddress = (i % 2) != 0;
3158             if(upperAddress)
3159                 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3160             else
3161                 allocCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3162             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3163             BufferInfo newBufInfo;
3164             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3165                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3166             TEST(res == VK_SUCCESS);
3167             bufInfo.push_back(newBufInfo);
3168         }
3169 
3170         // Destroy the buffers in reverse order.
3171         while(!bufInfo.empty())
3172         {
3173             const BufferInfo& currBufInfo = bufInfo.back();
3174             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3175             bufInfo.pop_back();
3176         }
3177 
3178         // Create buffers on both sides until we reach out of memory.
3179         prevOffsetLower = 0;
3180         prevOffsetUpper = poolCreateInfo.blockSize;
3181         res = VK_SUCCESS;
3182         for(size_t i = 0; res == VK_SUCCESS; ++i)
3183         {
3184             const bool upperAddress = (i % 2) != 0;
3185             if(upperAddress)
3186                 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3187             else
3188                 allocCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3189             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3190             BufferInfo newBufInfo;
3191             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3192                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3193             if(res == VK_SUCCESS)
3194             {
3195                 if(upperAddress)
3196                 {
3197                     TEST(allocInfo.offset < prevOffsetUpper);
3198                     prevOffsetUpper = allocInfo.offset;
3199                 }
3200                 else
3201                 {
3202                     TEST(allocInfo.offset >= prevOffsetLower);
3203                     prevOffsetLower = allocInfo.offset;
3204                 }
3205                 TEST(prevOffsetLower < prevOffsetUpper);
3206                 bufInfo.push_back(newBufInfo);
3207             }
3208         }
3209 
3210         // Destroy the buffers in random order.
3211         while(!bufInfo.empty())
3212         {
3213             const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3214             const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3215             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3216             bufInfo.erase(bufInfo.begin() + indexToDestroy);
3217         }
3218 
3219         // Create buffers on upper side only, constant size, until we reach out of memory.
3220         prevOffsetUpper = poolCreateInfo.blockSize;
3221         res = VK_SUCCESS;
3222         allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3223         bufCreateInfo.size = bufSizeMax;
3224         for(size_t i = 0; res == VK_SUCCESS; ++i)
3225         {
3226             BufferInfo newBufInfo;
3227             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3228                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3229             if(res == VK_SUCCESS)
3230             {
3231                 TEST(allocInfo.offset < prevOffsetUpper);
3232                 prevOffsetUpper = allocInfo.offset;
3233                 bufInfo.push_back(newBufInfo);
3234             }
3235         }
3236 
3237         // Destroy the buffers in reverse order.
3238         while(!bufInfo.empty())
3239         {
3240             const BufferInfo& currBufInfo = bufInfo.back();
3241             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3242             bufInfo.pop_back();
3243         }
3244     }
3245 
3246     // Test ring buffer with lost allocations.
3247     {
3248         // Allocate number of buffers until pool is full.
3249         // Notice CAN_BECOME_LOST flag and call to vmaSetCurrentFrameIndex.
3250         allocCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT;
3251         res = VK_SUCCESS;
3252         for(size_t i = 0; res == VK_SUCCESS; ++i)
3253         {
3254             vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3255 
3256             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3257 
3258             BufferInfo newBufInfo;
3259             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3260                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3261             if(res == VK_SUCCESS)
3262                 bufInfo.push_back(newBufInfo);
3263         }
3264 
3265         // Free first half of it.
3266         {
3267             const size_t buffersToDelete = bufInfo.size() / 2;
3268             for(size_t i = 0; i < buffersToDelete; ++i)
3269             {
3270                 vmaDestroyBuffer(g_hAllocator, bufInfo[i].Buffer, bufInfo[i].Allocation);
3271             }
3272             bufInfo.erase(bufInfo.begin(), bufInfo.begin() + buffersToDelete);
3273         }
3274 
3275         // Allocate number of buffers until pool is full again.
3276         // This way we make sure ring buffers wraps around, front in in the middle.
3277         res = VK_SUCCESS;
3278         for(size_t i = 0; res == VK_SUCCESS; ++i)
3279         {
3280             vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3281 
3282             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3283 
3284             BufferInfo newBufInfo;
3285             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3286                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3287             if(res == VK_SUCCESS)
3288                 bufInfo.push_back(newBufInfo);
3289         }
3290 
3291         VkDeviceSize firstNewOffset;
3292         {
3293             vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3294 
3295             // Allocate a large buffer with CAN_MAKE_OTHER_LOST.
3296             allocCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
3297             bufCreateInfo.size = bufSizeMax;
3298 
3299             BufferInfo newBufInfo;
3300             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3301                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3302             TEST(res == VK_SUCCESS);
3303             bufInfo.push_back(newBufInfo);
3304             firstNewOffset = allocInfo.offset;
3305 
3306             // Make sure at least one buffer from the beginning became lost.
3307             vmaGetAllocationInfo(g_hAllocator, bufInfo[0].Allocation, &allocInfo);
3308             TEST(allocInfo.deviceMemory == VK_NULL_HANDLE);
3309         }
3310 
3311 #if 0 // TODO Fix and uncomment. Failing on Intel.
3312         // Allocate more buffers that CAN_MAKE_OTHER_LOST until we wrap-around with this.
3313         size_t newCount = 1;
3314         for(;;)
3315         {
3316             vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3317 
3318             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3319 
3320             BufferInfo newBufInfo;
3321             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3322                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3323 
3324             TEST(res == VK_SUCCESS);
3325             bufInfo.push_back(newBufInfo);
3326             ++newCount;
3327             if(allocInfo.offset < firstNewOffset)
3328                 break;
3329         }
3330 #endif
3331 
3332         // Delete buffers that are lost.
3333         for(size_t i = bufInfo.size(); i--; )
3334         {
3335             vmaGetAllocationInfo(g_hAllocator, bufInfo[i].Allocation, &allocInfo);
3336             if(allocInfo.deviceMemory == VK_NULL_HANDLE)
3337             {
3338                 vmaDestroyBuffer(g_hAllocator, bufInfo[i].Buffer, bufInfo[i].Allocation);
3339                 bufInfo.erase(bufInfo.begin() + i);
3340             }
3341         }
3342 
3343         // Test vmaMakePoolAllocationsLost
3344         {
3345             vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3346 
3347             size_t lostAllocCount = 0;
3348             vmaMakePoolAllocationsLost(g_hAllocator, pool, &lostAllocCount);
3349             TEST(lostAllocCount > 0);
3350 
3351             size_t realLostAllocCount = 0;
3352             for(size_t i = 0; i < bufInfo.size(); ++i)
3353             {
3354                 vmaGetAllocationInfo(g_hAllocator, bufInfo[i].Allocation, &allocInfo);
3355                 if(allocInfo.deviceMemory == VK_NULL_HANDLE)
3356                     ++realLostAllocCount;
3357             }
3358             TEST(realLostAllocCount == lostAllocCount);
3359         }
3360 
3361         // Destroy all the buffers in forward order.
3362         for(size_t i = 0; i < bufInfo.size(); ++i)
3363             vmaDestroyBuffer(g_hAllocator, bufInfo[i].Buffer, bufInfo[i].Allocation);
3364         bufInfo.clear();
3365     }
3366 
3367     vmaDestroyPool(g_hAllocator, pool);
3368 }
3369 
TestLinearAllocatorMultiBlock()3370 static void TestLinearAllocatorMultiBlock()
3371 {
3372     wprintf(L"Test linear allocator multi block\n");
3373 
3374     RandomNumberGenerator rand{345673};
3375 
3376     VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3377     sampleBufCreateInfo.size = 1024 * 1024;
3378     sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
3379 
3380     VmaAllocationCreateInfo sampleAllocCreateInfo = {};
3381     sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
3382 
3383     VmaPoolCreateInfo poolCreateInfo = {};
3384     poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
3385     VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3386     TEST(res == VK_SUCCESS);
3387 
3388     VmaPool pool = nullptr;
3389     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3390     TEST(res == VK_SUCCESS);
3391 
3392     VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
3393 
3394     VmaAllocationCreateInfo allocCreateInfo = {};
3395     allocCreateInfo.pool = pool;
3396 
3397     std::vector<BufferInfo> bufInfo;
3398     VmaAllocationInfo allocInfo;
3399 
3400     // Test one-time free.
3401     {
3402         // Allocate buffers until we move to a second block.
3403         VkDeviceMemory lastMem = VK_NULL_HANDLE;
3404         for(uint32_t i = 0; ; ++i)
3405         {
3406             BufferInfo newBufInfo;
3407             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3408                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3409             TEST(res == VK_SUCCESS);
3410             bufInfo.push_back(newBufInfo);
3411             if(lastMem && allocInfo.deviceMemory != lastMem)
3412             {
3413                 break;
3414             }
3415             lastMem = allocInfo.deviceMemory;
3416         }
3417 
3418         TEST(bufInfo.size() > 2);
3419 
3420         // Make sure that pool has now two blocks.
3421         VmaPoolStats poolStats = {};
3422         vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3423         TEST(poolStats.blockCount == 2);
3424 
3425         // Destroy all the buffers in random order.
3426         while(!bufInfo.empty())
3427         {
3428             const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3429             const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3430             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3431             bufInfo.erase(bufInfo.begin() + indexToDestroy);
3432         }
3433 
3434         // Make sure that pool has now at most one block.
3435         vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3436         TEST(poolStats.blockCount <= 1);
3437     }
3438 
3439     // Test stack.
3440     {
3441         // Allocate buffers until we move to a second block.
3442         VkDeviceMemory lastMem = VK_NULL_HANDLE;
3443         for(uint32_t i = 0; ; ++i)
3444         {
3445             BufferInfo newBufInfo;
3446             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3447                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3448             TEST(res == VK_SUCCESS);
3449             bufInfo.push_back(newBufInfo);
3450             if(lastMem && allocInfo.deviceMemory != lastMem)
3451             {
3452                 break;
3453             }
3454             lastMem = allocInfo.deviceMemory;
3455         }
3456 
3457         TEST(bufInfo.size() > 2);
3458 
3459         // Add few more buffers.
3460         for(uint32_t i = 0; i < 5; ++i)
3461         {
3462             BufferInfo newBufInfo;
3463             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3464                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3465             TEST(res == VK_SUCCESS);
3466             bufInfo.push_back(newBufInfo);
3467         }
3468 
3469         // Make sure that pool has now two blocks.
3470         VmaPoolStats poolStats = {};
3471         vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3472         TEST(poolStats.blockCount == 2);
3473 
3474         // Delete half of buffers, LIFO.
3475         for(size_t i = 0, countToDelete = bufInfo.size() / 2; i < countToDelete; ++i)
3476         {
3477             const BufferInfo& currBufInfo = bufInfo.back();
3478             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3479             bufInfo.pop_back();
3480         }
3481 
3482         // Add one more buffer.
3483         BufferInfo newBufInfo;
3484         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3485             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3486         TEST(res == VK_SUCCESS);
3487         bufInfo.push_back(newBufInfo);
3488 
3489         // Make sure that pool has now one block.
3490         vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3491         TEST(poolStats.blockCount == 1);
3492 
3493         // Delete all the remaining buffers, LIFO.
3494         while(!bufInfo.empty())
3495         {
3496             const BufferInfo& currBufInfo = bufInfo.back();
3497             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3498             bufInfo.pop_back();
3499         }
3500     }
3501 
3502     vmaDestroyPool(g_hAllocator, pool);
3503 }
3504 
ManuallyTestLinearAllocator()3505 static void ManuallyTestLinearAllocator()
3506 {
3507     VmaStats origStats;
3508     vmaCalculateStats(g_hAllocator, &origStats);
3509 
3510     wprintf(L"Manually test linear allocator\n");
3511 
3512     RandomNumberGenerator rand{645332};
3513 
3514     VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3515     sampleBufCreateInfo.size = 1024; // Whatever.
3516     sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
3517 
3518     VmaAllocationCreateInfo sampleAllocCreateInfo = {};
3519     sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
3520 
3521     VmaPoolCreateInfo poolCreateInfo = {};
3522     VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3523     TEST(res == VK_SUCCESS);
3524 
3525     poolCreateInfo.blockSize = 10 * 1024;
3526     poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
3527     poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
3528 
3529     VmaPool pool = nullptr;
3530     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3531     TEST(res == VK_SUCCESS);
3532 
3533     VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
3534 
3535     VmaAllocationCreateInfo allocCreateInfo = {};
3536     allocCreateInfo.pool = pool;
3537 
3538     std::vector<BufferInfo> bufInfo;
3539     VmaAllocationInfo allocInfo;
3540     BufferInfo newBufInfo;
3541 
3542     // Test double stack.
3543     {
3544         /*
3545         Lower: Buffer 32 B, Buffer 1024 B, Buffer 32 B
3546         Upper: Buffer 16 B, Buffer 1024 B, Buffer 128 B
3547 
3548         Totally:
3549         1 block allocated
3550         10240 Vulkan bytes
3551         6 new allocations
3552         2256 bytes in allocations
3553         */
3554 
3555         bufCreateInfo.size = 32;
3556         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3557             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3558         TEST(res == VK_SUCCESS);
3559         bufInfo.push_back(newBufInfo);
3560 
3561         bufCreateInfo.size = 1024;
3562         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3563             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3564         TEST(res == VK_SUCCESS);
3565         bufInfo.push_back(newBufInfo);
3566 
3567         bufCreateInfo.size = 32;
3568         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3569             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3570         TEST(res == VK_SUCCESS);
3571         bufInfo.push_back(newBufInfo);
3572 
3573         allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3574 
3575         bufCreateInfo.size = 128;
3576         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3577             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3578         TEST(res == VK_SUCCESS);
3579         bufInfo.push_back(newBufInfo);
3580 
3581         bufCreateInfo.size = 1024;
3582         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3583             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3584         TEST(res == VK_SUCCESS);
3585         bufInfo.push_back(newBufInfo);
3586 
3587         bufCreateInfo.size = 16;
3588         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3589             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3590         TEST(res == VK_SUCCESS);
3591         bufInfo.push_back(newBufInfo);
3592 
3593         VmaStats currStats;
3594         vmaCalculateStats(g_hAllocator, &currStats);
3595         VmaPoolStats poolStats;
3596         vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3597 
3598         char* statsStr = nullptr;
3599         vmaBuildStatsString(g_hAllocator, &statsStr, VK_TRUE);
3600 
3601         // PUT BREAKPOINT HERE TO CHECK.
3602         // Inspect: currStats versus origStats, poolStats, statsStr.
3603         int I = 0;
3604 
3605         vmaFreeStatsString(g_hAllocator, statsStr);
3606 
3607         // Destroy the buffers in reverse order.
3608         while(!bufInfo.empty())
3609         {
3610             const BufferInfo& currBufInfo = bufInfo.back();
3611             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3612             bufInfo.pop_back();
3613         }
3614     }
3615 
3616     vmaDestroyPool(g_hAllocator, pool);
3617 }
3618 
BenchmarkAlgorithmsCase(FILE * file,uint32_t algorithm,bool empty,VmaAllocationCreateFlags allocStrategy,FREE_ORDER freeOrder)3619 static void BenchmarkAlgorithmsCase(FILE* file,
3620     uint32_t algorithm,
3621     bool empty,
3622     VmaAllocationCreateFlags allocStrategy,
3623     FREE_ORDER freeOrder)
3624 {
3625     RandomNumberGenerator rand{16223};
3626 
3627     const VkDeviceSize bufSizeMin = 32;
3628     const VkDeviceSize bufSizeMax = 1024;
3629     const size_t maxBufCapacity = 10000;
3630     const uint32_t iterationCount = 10;
3631 
3632     VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3633     sampleBufCreateInfo.size = bufSizeMax;
3634     sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
3635 
3636     VmaAllocationCreateInfo sampleAllocCreateInfo = {};
3637     sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
3638 
3639     VmaPoolCreateInfo poolCreateInfo = {};
3640     VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3641     TEST(res == VK_SUCCESS);
3642 
3643     poolCreateInfo.blockSize = bufSizeMax * maxBufCapacity;
3644     poolCreateInfo.flags |= algorithm;
3645     poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
3646 
3647     VmaPool pool = nullptr;
3648     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3649     TEST(res == VK_SUCCESS);
3650 
3651     // Buffer created just to get memory requirements. Never bound to any memory.
3652     VkBuffer dummyBuffer = VK_NULL_HANDLE;
3653     res = vkCreateBuffer(g_hDevice, &sampleBufCreateInfo, g_Allocs, &dummyBuffer);
3654     TEST(res == VK_SUCCESS && dummyBuffer);
3655 
3656     VkMemoryRequirements memReq = {};
3657     vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
3658 
3659     vkDestroyBuffer(g_hDevice, dummyBuffer, g_Allocs);
3660 
3661     VmaAllocationCreateInfo allocCreateInfo = {};
3662     allocCreateInfo.pool = pool;
3663     allocCreateInfo.flags = allocStrategy;
3664 
3665     VmaAllocation alloc;
3666     std::vector<VmaAllocation> baseAllocations;
3667 
3668     if(!empty)
3669     {
3670         // Make allocations up to 1/3 of pool size.
3671         VkDeviceSize totalSize = 0;
3672         while(totalSize < poolCreateInfo.blockSize / 3)
3673         {
3674             // This test intentionally allows sizes that are aligned to 4 or 16 bytes.
3675             // This is theoretically allowed and already uncovered one bug.
3676             memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);
3677             res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
3678             TEST(res == VK_SUCCESS);
3679             baseAllocations.push_back(alloc);
3680             totalSize += memReq.size;
3681         }
3682 
3683         // Delete half of them, choose randomly.
3684         size_t allocsToDelete = baseAllocations.size() / 2;
3685         for(size_t i = 0; i < allocsToDelete; ++i)
3686         {
3687             const size_t index = (size_t)rand.Generate() % baseAllocations.size();
3688             vmaFreeMemory(g_hAllocator, baseAllocations[index]);
3689             baseAllocations.erase(baseAllocations.begin() + index);
3690         }
3691     }
3692 
3693     // BENCHMARK
3694     const size_t allocCount = maxBufCapacity / 3;
3695     std::vector<VmaAllocation> testAllocations;
3696     testAllocations.reserve(allocCount);
3697     duration allocTotalDuration = duration::zero();
3698     duration freeTotalDuration = duration::zero();
3699     for(uint32_t iterationIndex = 0; iterationIndex < iterationCount; ++iterationIndex)
3700     {
3701         // Allocations
3702         time_point allocTimeBeg = std::chrono::high_resolution_clock::now();
3703         for(size_t i = 0; i < allocCount; ++i)
3704         {
3705             memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);
3706             res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
3707             TEST(res == VK_SUCCESS);
3708             testAllocations.push_back(alloc);
3709         }
3710         allocTotalDuration += std::chrono::high_resolution_clock::now() - allocTimeBeg;
3711 
3712         // Deallocations
3713         switch(freeOrder)
3714         {
3715         case FREE_ORDER::FORWARD:
3716             // Leave testAllocations unchanged.
3717             break;
3718         case FREE_ORDER::BACKWARD:
3719             std::reverse(testAllocations.begin(), testAllocations.end());
3720             break;
3721         case FREE_ORDER::RANDOM:
3722             std::shuffle(testAllocations.begin(), testAllocations.end(), MyUniformRandomNumberGenerator(rand));
3723             break;
3724         default: assert(0);
3725         }
3726 
3727         time_point freeTimeBeg = std::chrono::high_resolution_clock::now();
3728         for(size_t i = 0; i < allocCount; ++i)
3729             vmaFreeMemory(g_hAllocator, testAllocations[i]);
3730         freeTotalDuration += std::chrono::high_resolution_clock::now() - freeTimeBeg;
3731 
3732         testAllocations.clear();
3733     }
3734 
3735     // Delete baseAllocations
3736     while(!baseAllocations.empty())
3737     {
3738         vmaFreeMemory(g_hAllocator, baseAllocations.back());
3739         baseAllocations.pop_back();
3740     }
3741 
3742     vmaDestroyPool(g_hAllocator, pool);
3743 
3744     const float allocTotalSeconds = ToFloatSeconds(allocTotalDuration);
3745     const float freeTotalSeconds  = ToFloatSeconds(freeTotalDuration);
3746 
3747     printf("    Algorithm=%s %s Allocation=%s FreeOrder=%s: allocations %g s, free %g s\n",
3748         AlgorithmToStr(algorithm),
3749         empty ? "Empty" : "Not empty",
3750         GetAllocationStrategyName(allocStrategy),
3751         FREE_ORDER_NAMES[(size_t)freeOrder],
3752         allocTotalSeconds,
3753         freeTotalSeconds);
3754 
3755     if(file)
3756     {
3757         std::string currTime;
3758         CurrentTimeToStr(currTime);
3759 
3760         fprintf(file, "%s,%s,%s,%u,%s,%s,%g,%g\n",
3761             CODE_DESCRIPTION, currTime.c_str(),
3762             AlgorithmToStr(algorithm),
3763             empty ? 1 : 0,
3764             GetAllocationStrategyName(allocStrategy),
3765             FREE_ORDER_NAMES[(uint32_t)freeOrder],
3766             allocTotalSeconds,
3767             freeTotalSeconds);
3768     }
3769 }
3770 
TestBufferDeviceAddress()3771 static void TestBufferDeviceAddress()
3772 {
3773     wprintf(L"Test buffer device address\n");
3774 
3775     assert(g_BufferDeviceAddressEnabled);
3776 
3777     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3778     bufCreateInfo.size = 0x10000;
3779     bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
3780         VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; // !!!
3781 
3782     VmaAllocationCreateInfo allocCreateInfo = {};
3783     allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
3784 
3785     for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
3786     {
3787         // 1st is placed, 2nd is dedicated.
3788         if(testIndex == 1)
3789             allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
3790 
3791         BufferInfo bufInfo = {};
3792         VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3793             &bufInfo.Buffer, &bufInfo.Allocation, nullptr);
3794         TEST(res == VK_SUCCESS);
3795 
3796         VkBufferDeviceAddressInfoEXT bufferDeviceAddressInfo = { VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT };
3797         bufferDeviceAddressInfo.buffer = bufInfo.Buffer;
3798         //assert(g_vkGetBufferDeviceAddressEXT != nullptr);
3799         if(g_vkGetBufferDeviceAddressEXT != nullptr)
3800         {
3801             VkDeviceAddress addr = g_vkGetBufferDeviceAddressEXT(g_hDevice, &bufferDeviceAddressInfo);
3802             TEST(addr != 0);
3803         }
3804 
3805         vmaDestroyBuffer(g_hAllocator, bufInfo.Buffer, bufInfo.Allocation);
3806     }
3807 }
3808 
BenchmarkAlgorithms(FILE * file)3809 static void BenchmarkAlgorithms(FILE* file)
3810 {
3811     wprintf(L"Benchmark algorithms\n");
3812 
3813     if(file)
3814     {
3815         fprintf(file,
3816             "Code,Time,"
3817             "Algorithm,Empty,Allocation strategy,Free order,"
3818             "Allocation time (s),Deallocation time (s)\n");
3819     }
3820 
3821     uint32_t freeOrderCount = 1;
3822     if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_LARGE)
3823         freeOrderCount = 3;
3824     else if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL)
3825         freeOrderCount = 2;
3826 
3827     const uint32_t emptyCount = ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL ? 2 : 1;
3828     const uint32_t allocStrategyCount = GetAllocationStrategyCount();
3829 
3830     for(uint32_t freeOrderIndex = 0; freeOrderIndex < freeOrderCount; ++freeOrderIndex)
3831     {
3832         FREE_ORDER freeOrder = FREE_ORDER::COUNT;
3833         switch(freeOrderIndex)
3834         {
3835         case 0: freeOrder = FREE_ORDER::BACKWARD; break;
3836         case 1: freeOrder = FREE_ORDER::FORWARD; break;
3837         case 2: freeOrder = FREE_ORDER::RANDOM; break;
3838         default: assert(0);
3839         }
3840 
3841         for(uint32_t emptyIndex = 0; emptyIndex < emptyCount; ++emptyIndex)
3842         {
3843             for(uint32_t algorithmIndex = 0; algorithmIndex < 3; ++algorithmIndex)
3844             {
3845                 uint32_t algorithm = 0;
3846                 switch(algorithmIndex)
3847                 {
3848                 case 0:
3849                     break;
3850                 case 1:
3851                     algorithm = VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT;
3852                     break;
3853                 case 2:
3854                     algorithm = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
3855                     break;
3856                 default:
3857                     assert(0);
3858                 }
3859 
3860                 uint32_t currAllocStrategyCount = algorithm != 0 ? 1 : allocStrategyCount;
3861                 for(uint32_t allocStrategyIndex = 0; allocStrategyIndex < currAllocStrategyCount; ++allocStrategyIndex)
3862                 {
3863                     VmaAllocatorCreateFlags strategy = 0;
3864                     if(currAllocStrategyCount > 1)
3865                     {
3866                         switch(allocStrategyIndex)
3867                         {
3868                         case 0: strategy = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT; break;
3869                         case 1: strategy = VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT; break;
3870                         case 2: strategy = VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT; break;
3871                         default: assert(0);
3872                         }
3873                     }
3874 
3875                     BenchmarkAlgorithmsCase(
3876                         file,
3877                         algorithm,
3878                         (emptyIndex == 0), // empty
3879                         strategy,
3880                         freeOrder); // freeOrder
3881                 }
3882             }
3883         }
3884     }
3885 }
3886 
TestPool_SameSize()3887 static void TestPool_SameSize()
3888 {
3889     const VkDeviceSize BUF_SIZE = 1024 * 1024;
3890     const size_t BUF_COUNT = 100;
3891     VkResult res;
3892 
3893     RandomNumberGenerator rand{123};
3894 
3895     VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3896     bufferInfo.size = BUF_SIZE;
3897     bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
3898 
3899     uint32_t memoryTypeBits = UINT32_MAX;
3900     {
3901         VkBuffer dummyBuffer;
3902         res = vkCreateBuffer(g_hDevice, &bufferInfo, g_Allocs, &dummyBuffer);
3903         TEST(res == VK_SUCCESS);
3904 
3905         VkMemoryRequirements memReq;
3906         vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
3907         memoryTypeBits = memReq.memoryTypeBits;
3908 
3909         vkDestroyBuffer(g_hDevice, dummyBuffer, g_Allocs);
3910     }
3911 
3912     VmaAllocationCreateInfo poolAllocInfo = {};
3913     poolAllocInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
3914     uint32_t memTypeIndex;
3915     res = vmaFindMemoryTypeIndex(
3916         g_hAllocator,
3917         memoryTypeBits,
3918         &poolAllocInfo,
3919         &memTypeIndex);
3920 
3921     VmaPoolCreateInfo poolCreateInfo = {};
3922     poolCreateInfo.memoryTypeIndex = memTypeIndex;
3923     poolCreateInfo.blockSize = BUF_SIZE * BUF_COUNT / 4;
3924     poolCreateInfo.minBlockCount = 1;
3925     poolCreateInfo.maxBlockCount = 4;
3926     poolCreateInfo.frameInUseCount = 0;
3927 
3928     VmaPool pool;
3929     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3930     TEST(res == VK_SUCCESS);
3931 
3932     // Test pool name
3933     {
3934         static const char* const POOL_NAME = "Pool name";
3935         vmaSetPoolName(g_hAllocator, pool, POOL_NAME);
3936 
3937         const char* fetchedPoolName = nullptr;
3938         vmaGetPoolName(g_hAllocator, pool, &fetchedPoolName);
3939         TEST(strcmp(fetchedPoolName, POOL_NAME) == 0);
3940 
3941         vmaSetPoolName(g_hAllocator, pool, nullptr);
3942     }
3943 
3944     vmaSetCurrentFrameIndex(g_hAllocator, 1);
3945 
3946     VmaAllocationCreateInfo allocInfo = {};
3947     allocInfo.pool = pool;
3948     allocInfo.flags = VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT |
3949         VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
3950 
3951     struct BufItem
3952     {
3953         VkBuffer Buf;
3954         VmaAllocation Alloc;
3955     };
3956     std::vector<BufItem> items;
3957 
3958     // Fill entire pool.
3959     for(size_t i = 0; i < BUF_COUNT; ++i)
3960     {
3961         BufItem item;
3962         res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
3963         TEST(res == VK_SUCCESS);
3964         items.push_back(item);
3965     }
3966 
3967     // Make sure that another allocation would fail.
3968     {
3969         BufItem item;
3970         res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
3971         TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY);
3972     }
3973 
3974     // Validate that no buffer is lost. Also check that they are not mapped.
3975     for(size_t i = 0; i < items.size(); ++i)
3976     {
3977         VmaAllocationInfo allocInfo;
3978         vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
3979         TEST(allocInfo.deviceMemory != VK_NULL_HANDLE);
3980         TEST(allocInfo.pMappedData == nullptr);
3981     }
3982 
3983     // Free some percent of random items.
3984     {
3985         const size_t PERCENT_TO_FREE = 10;
3986         size_t itemsToFree = items.size() * PERCENT_TO_FREE / 100;
3987         for(size_t i = 0; i < itemsToFree; ++i)
3988         {
3989             size_t index = (size_t)rand.Generate() % items.size();
3990             vmaDestroyBuffer(g_hAllocator, items[index].Buf, items[index].Alloc);
3991             items.erase(items.begin() + index);
3992         }
3993     }
3994 
3995     // Randomly allocate and free items.
3996     {
3997         const size_t OPERATION_COUNT = BUF_COUNT;
3998         for(size_t i = 0; i < OPERATION_COUNT; ++i)
3999         {
4000             bool allocate = rand.Generate() % 2 != 0;
4001             if(allocate)
4002             {
4003                 if(items.size() < BUF_COUNT)
4004                 {
4005                     BufItem item;
4006                     res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4007                     TEST(res == VK_SUCCESS);
4008                     items.push_back(item);
4009                }
4010             }
4011             else // Free
4012             {
4013                 if(!items.empty())
4014                 {
4015                     size_t index = (size_t)rand.Generate() % items.size();
4016                     vmaDestroyBuffer(g_hAllocator, items[index].Buf, items[index].Alloc);
4017                     items.erase(items.begin() + index);
4018                 }
4019             }
4020         }
4021     }
4022 
4023     // Allocate up to maximum.
4024     while(items.size() < BUF_COUNT)
4025     {
4026         BufItem item;
4027         res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4028         TEST(res == VK_SUCCESS);
4029         items.push_back(item);
4030     }
4031 
4032     // Validate that no buffer is lost.
4033     for(size_t i = 0; i < items.size(); ++i)
4034     {
4035         VmaAllocationInfo allocInfo;
4036         vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4037         TEST(allocInfo.deviceMemory != VK_NULL_HANDLE);
4038     }
4039 
4040     // Next frame.
4041     vmaSetCurrentFrameIndex(g_hAllocator, 2);
4042 
4043     // Allocate another BUF_COUNT buffers.
4044     for(size_t i = 0; i < BUF_COUNT; ++i)
4045     {
4046         BufItem item;
4047         res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4048         TEST(res == VK_SUCCESS);
4049         items.push_back(item);
4050     }
4051 
4052     // Make sure the first BUF_COUNT is lost. Delete them.
4053     for(size_t i = 0; i < BUF_COUNT; ++i)
4054     {
4055         VmaAllocationInfo allocInfo;
4056         vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4057         TEST(allocInfo.deviceMemory == VK_NULL_HANDLE);
4058         vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4059     }
4060     items.erase(items.begin(), items.begin() + BUF_COUNT);
4061 
4062     // Validate that no buffer is lost.
4063     for(size_t i = 0; i < items.size(); ++i)
4064     {
4065         VmaAllocationInfo allocInfo;
4066         vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4067         TEST(allocInfo.deviceMemory != VK_NULL_HANDLE);
4068     }
4069 
4070     // Free one item.
4071     vmaDestroyBuffer(g_hAllocator, items.back().Buf, items.back().Alloc);
4072     items.pop_back();
4073 
4074     // Validate statistics.
4075     {
4076         VmaPoolStats poolStats = {};
4077         vmaGetPoolStats(g_hAllocator, pool, &poolStats);
4078         TEST(poolStats.allocationCount == items.size());
4079         TEST(poolStats.size = BUF_COUNT * BUF_SIZE);
4080         TEST(poolStats.unusedRangeCount == 1);
4081         TEST(poolStats.unusedRangeSizeMax == BUF_SIZE);
4082         TEST(poolStats.unusedSize == BUF_SIZE);
4083     }
4084 
4085     // Free all remaining items.
4086     for(size_t i = items.size(); i--; )
4087         vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4088     items.clear();
4089 
4090     // Allocate maximum items again.
4091     for(size_t i = 0; i < BUF_COUNT; ++i)
4092     {
4093         BufItem item;
4094         res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4095         TEST(res == VK_SUCCESS);
4096         items.push_back(item);
4097     }
4098 
4099     // Delete every other item.
4100     for(size_t i = 0; i < BUF_COUNT / 2; ++i)
4101     {
4102         vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4103         items.erase(items.begin() + i);
4104     }
4105 
4106     // Defragment!
4107     {
4108         std::vector<VmaAllocation> allocationsToDefragment(items.size());
4109         for(size_t i = 0; i < items.size(); ++i)
4110             allocationsToDefragment[i] = items[i].Alloc;
4111 
4112         VmaDefragmentationStats defragmentationStats;
4113         res = vmaDefragment(g_hAllocator, allocationsToDefragment.data(), items.size(), nullptr, nullptr, &defragmentationStats);
4114         TEST(res == VK_SUCCESS);
4115         TEST(defragmentationStats.deviceMemoryBlocksFreed == 2);
4116     }
4117 
4118     // Free all remaining items.
4119     for(size_t i = items.size(); i--; )
4120         vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4121     items.clear();
4122 
4123     ////////////////////////////////////////////////////////////////////////////////
4124     // Test for vmaMakePoolAllocationsLost
4125 
4126     // Allocate 4 buffers on frame 10.
4127     vmaSetCurrentFrameIndex(g_hAllocator, 10);
4128     for(size_t i = 0; i < 4; ++i)
4129     {
4130         BufItem item;
4131         res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4132         TEST(res == VK_SUCCESS);
4133         items.push_back(item);
4134     }
4135 
4136     // Touch first 2 of them on frame 11.
4137     vmaSetCurrentFrameIndex(g_hAllocator, 11);
4138     for(size_t i = 0; i < 2; ++i)
4139     {
4140         VmaAllocationInfo allocInfo;
4141         vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4142     }
4143 
4144     // vmaMakePoolAllocationsLost. Only remaining 2 should be lost.
4145     size_t lostCount = 0xDEADC0DE;
4146     vmaMakePoolAllocationsLost(g_hAllocator, pool, &lostCount);
4147     TEST(lostCount == 2);
4148 
4149     // Make another call. Now 0 should be lost.
4150     vmaMakePoolAllocationsLost(g_hAllocator, pool, &lostCount);
4151     TEST(lostCount == 0);
4152 
4153     // Make another call, with null count. Should not crash.
4154     vmaMakePoolAllocationsLost(g_hAllocator, pool, nullptr);
4155 
4156     // END: Free all remaining items.
4157     for(size_t i = items.size(); i--; )
4158         vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4159 
4160     items.clear();
4161 
4162     ////////////////////////////////////////////////////////////////////////////////
4163     // Test for allocation too large for pool
4164 
4165     {
4166         VmaAllocationCreateInfo allocCreateInfo = {};
4167         allocCreateInfo.pool = pool;
4168 
4169         VkMemoryRequirements memReq;
4170         memReq.memoryTypeBits = UINT32_MAX;
4171         memReq.alignment = 1;
4172         memReq.size = poolCreateInfo.blockSize + 4;
4173 
4174         VmaAllocation alloc = nullptr;
4175         res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
4176         TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY && alloc == nullptr);
4177     }
4178 
4179     vmaDestroyPool(g_hAllocator, pool);
4180 }
4181 
ValidatePattern(const void * pMemory,size_t size,uint8_t pattern)4182 static bool ValidatePattern(const void* pMemory, size_t size, uint8_t pattern)
4183 {
4184     const uint8_t* pBytes = (const uint8_t*)pMemory;
4185     for(size_t i = 0; i < size; ++i)
4186     {
4187         if(pBytes[i] != pattern)
4188         {
4189             return false;
4190         }
4191     }
4192     return true;
4193 }
4194 
TestAllocationsInitialization()4195 static void TestAllocationsInitialization()
4196 {
4197     VkResult res;
4198 
4199     const size_t BUF_SIZE = 1024;
4200 
4201     // Create pool.
4202 
4203     VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4204     bufInfo.size = BUF_SIZE;
4205     bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
4206 
4207     VmaAllocationCreateInfo dummyBufAllocCreateInfo = {};
4208     dummyBufAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
4209 
4210     VmaPoolCreateInfo poolCreateInfo = {};
4211     poolCreateInfo.blockSize = BUF_SIZE * 10;
4212     poolCreateInfo.minBlockCount = 1; // To keep memory alive while pool exists.
4213     poolCreateInfo.maxBlockCount = 1;
4214     res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufInfo, &dummyBufAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
4215     TEST(res == VK_SUCCESS);
4216 
4217     VmaAllocationCreateInfo bufAllocCreateInfo = {};
4218     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &bufAllocCreateInfo.pool);
4219     TEST(res == VK_SUCCESS);
4220 
4221     // Create one persistently mapped buffer to keep memory of this block mapped,
4222     // so that pointer to mapped data will remain (more or less...) valid even
4223     // after destruction of other allocations.
4224 
4225     bufAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
4226     VkBuffer firstBuf;
4227     VmaAllocation firstAlloc;
4228     res = vmaCreateBuffer(g_hAllocator, &bufInfo, &bufAllocCreateInfo, &firstBuf, &firstAlloc, nullptr);
4229     TEST(res == VK_SUCCESS);
4230 
4231     // Test buffers.
4232 
4233     for(uint32_t i = 0; i < 2; ++i)
4234     {
4235         const bool persistentlyMapped = i == 0;
4236         bufAllocCreateInfo.flags = persistentlyMapped ? VMA_ALLOCATION_CREATE_MAPPED_BIT : 0;
4237         VkBuffer buf;
4238         VmaAllocation alloc;
4239         VmaAllocationInfo allocInfo;
4240         res = vmaCreateBuffer(g_hAllocator, &bufInfo, &bufAllocCreateInfo, &buf, &alloc, &allocInfo);
4241         TEST(res == VK_SUCCESS);
4242 
4243         void* pMappedData;
4244         if(!persistentlyMapped)
4245         {
4246             res = vmaMapMemory(g_hAllocator, alloc, &pMappedData);
4247             TEST(res == VK_SUCCESS);
4248         }
4249         else
4250         {
4251             pMappedData = allocInfo.pMappedData;
4252         }
4253 
4254         // Validate initialized content
4255         bool valid = ValidatePattern(pMappedData, BUF_SIZE, 0xDC);
4256         TEST(valid);
4257 
4258         if(!persistentlyMapped)
4259         {
4260             vmaUnmapMemory(g_hAllocator, alloc);
4261         }
4262 
4263         vmaDestroyBuffer(g_hAllocator, buf, alloc);
4264 
4265         // Validate freed content
4266         valid = ValidatePattern(pMappedData, BUF_SIZE, 0xEF);
4267         TEST(valid);
4268     }
4269 
4270     vmaDestroyBuffer(g_hAllocator, firstBuf, firstAlloc);
4271     vmaDestroyPool(g_hAllocator, bufAllocCreateInfo.pool);
4272 }
4273 
TestPool_Benchmark(PoolTestResult & outResult,const PoolTestConfig & config)4274 static void TestPool_Benchmark(
4275     PoolTestResult& outResult,
4276     const PoolTestConfig& config)
4277 {
4278     TEST(config.ThreadCount > 0);
4279 
4280     RandomNumberGenerator mainRand{config.RandSeed};
4281 
4282     uint32_t allocationSizeProbabilitySum = std::accumulate(
4283         config.AllocationSizes.begin(),
4284         config.AllocationSizes.end(),
4285         0u,
4286         [](uint32_t sum, const AllocationSize& allocSize) {
4287             return sum + allocSize.Probability;
4288         });
4289 
4290     VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4291     bufferInfo.size = 256; // Whatever.
4292     bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4293 
4294     VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
4295     imageInfo.imageType = VK_IMAGE_TYPE_2D;
4296     imageInfo.extent.width = 256; // Whatever.
4297     imageInfo.extent.height = 256; // Whatever.
4298     imageInfo.extent.depth = 1;
4299     imageInfo.mipLevels = 1;
4300     imageInfo.arrayLayers = 1;
4301     imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
4302     imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; // LINEAR if CPU memory.
4303     imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
4304     imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; // TRANSFER_SRC if CPU memory.
4305     imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
4306 
4307     uint32_t bufferMemoryTypeBits = UINT32_MAX;
4308     {
4309         VkBuffer dummyBuffer;
4310         VkResult res = vkCreateBuffer(g_hDevice, &bufferInfo, g_Allocs, &dummyBuffer);
4311         TEST(res == VK_SUCCESS);
4312 
4313         VkMemoryRequirements memReq;
4314         vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
4315         bufferMemoryTypeBits = memReq.memoryTypeBits;
4316 
4317         vkDestroyBuffer(g_hDevice, dummyBuffer, g_Allocs);
4318     }
4319 
4320     uint32_t imageMemoryTypeBits = UINT32_MAX;
4321     {
4322         VkImage dummyImage;
4323         VkResult res = vkCreateImage(g_hDevice, &imageInfo, g_Allocs, &dummyImage);
4324         TEST(res == VK_SUCCESS);
4325 
4326         VkMemoryRequirements memReq;
4327         vkGetImageMemoryRequirements(g_hDevice, dummyImage, &memReq);
4328         imageMemoryTypeBits = memReq.memoryTypeBits;
4329 
4330         vkDestroyImage(g_hDevice, dummyImage, g_Allocs);
4331     }
4332 
4333     uint32_t memoryTypeBits = 0;
4334     if(config.UsesBuffers() && config.UsesImages())
4335     {
4336         memoryTypeBits = bufferMemoryTypeBits & imageMemoryTypeBits;
4337         if(memoryTypeBits == 0)
4338         {
4339             PrintWarning(L"Cannot test buffers + images in the same memory pool on this GPU.");
4340             return;
4341         }
4342     }
4343     else if(config.UsesBuffers())
4344         memoryTypeBits = bufferMemoryTypeBits;
4345     else if(config.UsesImages())
4346         memoryTypeBits = imageMemoryTypeBits;
4347     else
4348         TEST(0);
4349 
4350     VmaPoolCreateInfo poolCreateInfo = {};
4351     poolCreateInfo.memoryTypeIndex = 0;
4352     poolCreateInfo.minBlockCount = 1;
4353     poolCreateInfo.maxBlockCount = 1;
4354     poolCreateInfo.blockSize = config.PoolSize;
4355     poolCreateInfo.frameInUseCount = 1;
4356 
4357     VmaAllocationCreateInfo dummyAllocCreateInfo = {};
4358     dummyAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
4359     vmaFindMemoryTypeIndex(g_hAllocator, memoryTypeBits, &dummyAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
4360 
4361     VmaPool pool;
4362     VkResult res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
4363     TEST(res == VK_SUCCESS);
4364 
4365     // Start time measurement - after creating pool and initializing data structures.
4366     time_point timeBeg = std::chrono::high_resolution_clock::now();
4367 
4368     ////////////////////////////////////////////////////////////////////////////////
4369     // ThreadProc
4370     auto ThreadProc = [&](
4371         PoolTestThreadResult* outThreadResult,
4372         uint32_t randSeed,
4373         HANDLE frameStartEvent,
4374         HANDLE frameEndEvent) -> void
4375     {
4376         RandomNumberGenerator threadRand{randSeed};
4377 
4378         outThreadResult->AllocationTimeMin = duration::max();
4379         outThreadResult->AllocationTimeSum = duration::zero();
4380         outThreadResult->AllocationTimeMax = duration::min();
4381         outThreadResult->DeallocationTimeMin = duration::max();
4382         outThreadResult->DeallocationTimeSum = duration::zero();
4383         outThreadResult->DeallocationTimeMax = duration::min();
4384         outThreadResult->AllocationCount = 0;
4385         outThreadResult->DeallocationCount = 0;
4386         outThreadResult->LostAllocationCount = 0;
4387         outThreadResult->LostAllocationTotalSize = 0;
4388         outThreadResult->FailedAllocationCount = 0;
4389         outThreadResult->FailedAllocationTotalSize = 0;
4390 
4391         struct Item
4392         {
4393             VkDeviceSize BufferSize;
4394             VkExtent2D ImageSize;
4395             VkBuffer Buf;
4396             VkImage Image;
4397             VmaAllocation Alloc;
4398 
4399             VkDeviceSize CalcSizeBytes() const
4400             {
4401                 return BufferSize +
4402                     ImageSize.width * ImageSize.height * 4;
4403             }
4404         };
4405         std::vector<Item> unusedItems, usedItems;
4406 
4407         const size_t threadTotalItemCount = config.TotalItemCount / config.ThreadCount;
4408 
4409         // Create all items - all unused, not yet allocated.
4410         for(size_t i = 0; i < threadTotalItemCount; ++i)
4411         {
4412             Item item = {};
4413 
4414             uint32_t allocSizeIndex = 0;
4415             uint32_t r = threadRand.Generate() % allocationSizeProbabilitySum;
4416             while(r >= config.AllocationSizes[allocSizeIndex].Probability)
4417                 r -= config.AllocationSizes[allocSizeIndex++].Probability;
4418 
4419             const AllocationSize& allocSize = config.AllocationSizes[allocSizeIndex];
4420             if(allocSize.BufferSizeMax > 0)
4421             {
4422                 TEST(allocSize.BufferSizeMin > 0);
4423                 TEST(allocSize.ImageSizeMin == 0 && allocSize.ImageSizeMax == 0);
4424                 if(allocSize.BufferSizeMax == allocSize.BufferSizeMin)
4425                     item.BufferSize = allocSize.BufferSizeMin;
4426                 else
4427                 {
4428                     item.BufferSize = allocSize.BufferSizeMin + threadRand.Generate() % (allocSize.BufferSizeMax - allocSize.BufferSizeMin);
4429                     item.BufferSize = item.BufferSize / 16 * 16;
4430                 }
4431             }
4432             else
4433             {
4434                 TEST(allocSize.ImageSizeMin > 0 && allocSize.ImageSizeMax > 0);
4435                 if(allocSize.ImageSizeMax == allocSize.ImageSizeMin)
4436                     item.ImageSize.width = item.ImageSize.height = allocSize.ImageSizeMax;
4437                 else
4438                 {
4439                     item.ImageSize.width  = allocSize.ImageSizeMin + threadRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
4440                     item.ImageSize.height = allocSize.ImageSizeMin + threadRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
4441                 }
4442             }
4443 
4444             unusedItems.push_back(item);
4445         }
4446 
4447         auto Allocate = [&](Item& item) -> VkResult
4448         {
4449             VmaAllocationCreateInfo allocCreateInfo = {};
4450             allocCreateInfo.pool = pool;
4451             allocCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT |
4452                 VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
4453 
4454             if(item.BufferSize)
4455             {
4456                 bufferInfo.size = item.BufferSize;
4457                 PoolAllocationTimeRegisterObj timeRegisterObj(*outThreadResult);
4458                 return vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocCreateInfo, &item.Buf, &item.Alloc, nullptr);
4459             }
4460             else
4461             {
4462                 TEST(item.ImageSize.width && item.ImageSize.height);
4463 
4464                 imageInfo.extent.width = item.ImageSize.width;
4465                 imageInfo.extent.height = item.ImageSize.height;
4466                 PoolAllocationTimeRegisterObj timeRegisterObj(*outThreadResult);
4467                 return vmaCreateImage(g_hAllocator, &imageInfo, &allocCreateInfo, &item.Image, &item.Alloc, nullptr);
4468             }
4469         };
4470 
4471         ////////////////////////////////////////////////////////////////////////////////
4472         // Frames
4473         for(uint32_t frameIndex = 0; frameIndex < config.FrameCount; ++frameIndex)
4474         {
4475             WaitForSingleObject(frameStartEvent, INFINITE);
4476 
4477             // Always make some percent of used bufs unused, to choose different used ones.
4478             const size_t bufsToMakeUnused = usedItems.size() * config.ItemsToMakeUnusedPercent / 100;
4479             for(size_t i = 0; i < bufsToMakeUnused; ++i)
4480             {
4481                 size_t index = threadRand.Generate() % usedItems.size();
4482                 unusedItems.push_back(usedItems[index]);
4483                 usedItems.erase(usedItems.begin() + index);
4484             }
4485 
4486             // Determine which bufs we want to use in this frame.
4487             const size_t usedBufCount = (threadRand.Generate() % (config.UsedItemCountMax - config.UsedItemCountMin) + config.UsedItemCountMin)
4488                 / config.ThreadCount;
4489             TEST(usedBufCount < usedItems.size() + unusedItems.size());
4490             // Move some used to unused.
4491             while(usedBufCount < usedItems.size())
4492             {
4493                 size_t index = threadRand.Generate() % usedItems.size();
4494                 unusedItems.push_back(usedItems[index]);
4495                 usedItems.erase(usedItems.begin() + index);
4496             }
4497             // Move some unused to used.
4498             while(usedBufCount > usedItems.size())
4499             {
4500                 size_t index = threadRand.Generate() % unusedItems.size();
4501                 usedItems.push_back(unusedItems[index]);
4502                 unusedItems.erase(unusedItems.begin() + index);
4503             }
4504 
4505             uint32_t touchExistingCount = 0;
4506             uint32_t touchLostCount = 0;
4507             uint32_t createSucceededCount = 0;
4508             uint32_t createFailedCount = 0;
4509 
4510             // Touch all used bufs. If not created or lost, allocate.
4511             for(size_t i = 0; i < usedItems.size(); ++i)
4512             {
4513                 Item& item = usedItems[i];
4514                 // Not yet created.
4515                 if(item.Alloc == VK_NULL_HANDLE)
4516                 {
4517                     res = Allocate(item);
4518                     ++outThreadResult->AllocationCount;
4519                     if(res != VK_SUCCESS)
4520                     {
4521                         item.Alloc = VK_NULL_HANDLE;
4522                         item.Buf = VK_NULL_HANDLE;
4523                         ++outThreadResult->FailedAllocationCount;
4524                         outThreadResult->FailedAllocationTotalSize += item.CalcSizeBytes();
4525                         ++createFailedCount;
4526                     }
4527                     else
4528                         ++createSucceededCount;
4529                 }
4530                 else
4531                 {
4532                     // Touch.
4533                     VmaAllocationInfo allocInfo;
4534                     vmaGetAllocationInfo(g_hAllocator, item.Alloc, &allocInfo);
4535                     // Lost.
4536                     if(allocInfo.deviceMemory == VK_NULL_HANDLE)
4537                     {
4538                         ++touchLostCount;
4539 
4540                         // Destroy.
4541                         {
4542                             PoolDeallocationTimeRegisterObj timeRegisterObj(*outThreadResult);
4543                             if(item.Buf)
4544                                 vmaDestroyBuffer(g_hAllocator, item.Buf, item.Alloc);
4545                             else
4546                                 vmaDestroyImage(g_hAllocator, item.Image, item.Alloc);
4547                             ++outThreadResult->DeallocationCount;
4548                         }
4549                         item.Alloc = VK_NULL_HANDLE;
4550                         item.Buf = VK_NULL_HANDLE;
4551 
4552                         ++outThreadResult->LostAllocationCount;
4553                         outThreadResult->LostAllocationTotalSize += item.CalcSizeBytes();
4554 
4555                         // Recreate.
4556                         res = Allocate(item);
4557                         ++outThreadResult->AllocationCount;
4558                         // Creation failed.
4559                         if(res != VK_SUCCESS)
4560                         {
4561                             ++outThreadResult->FailedAllocationCount;
4562                             outThreadResult->FailedAllocationTotalSize += item.CalcSizeBytes();
4563                             ++createFailedCount;
4564                         }
4565                         else
4566                             ++createSucceededCount;
4567                     }
4568                     else
4569                         ++touchExistingCount;
4570                 }
4571             }
4572 
4573             /*
4574             printf("Thread %u frame %u: Touch existing %u lost %u, create succeeded %u failed %u\n",
4575                 randSeed, frameIndex,
4576                 touchExistingCount, touchLostCount,
4577                 createSucceededCount, createFailedCount);
4578             */
4579 
4580             SetEvent(frameEndEvent);
4581         }
4582 
4583         // Free all remaining items.
4584         for(size_t i = usedItems.size(); i--; )
4585         {
4586             PoolDeallocationTimeRegisterObj timeRegisterObj(*outThreadResult);
4587             if(usedItems[i].Buf)
4588                 vmaDestroyBuffer(g_hAllocator, usedItems[i].Buf, usedItems[i].Alloc);
4589             else
4590                 vmaDestroyImage(g_hAllocator, usedItems[i].Image, usedItems[i].Alloc);
4591             ++outThreadResult->DeallocationCount;
4592         }
4593         for(size_t i = unusedItems.size(); i--; )
4594         {
4595             PoolDeallocationTimeRegisterObj timeRegisterOb(*outThreadResult);
4596             if(unusedItems[i].Buf)
4597                 vmaDestroyBuffer(g_hAllocator, unusedItems[i].Buf, unusedItems[i].Alloc);
4598             else
4599                 vmaDestroyImage(g_hAllocator, unusedItems[i].Image, unusedItems[i].Alloc);
4600             ++outThreadResult->DeallocationCount;
4601         }
4602     };
4603 
4604     // Launch threads.
4605     uint32_t threadRandSeed = mainRand.Generate();
4606     std::vector<HANDLE> frameStartEvents{config.ThreadCount};
4607     std::vector<HANDLE> frameEndEvents{config.ThreadCount};
4608     std::vector<std::thread> bkgThreads;
4609     std::vector<PoolTestThreadResult> threadResults{config.ThreadCount};
4610     for(uint32_t threadIndex = 0; threadIndex < config.ThreadCount; ++threadIndex)
4611     {
4612         frameStartEvents[threadIndex] = CreateEvent(NULL, FALSE, FALSE, NULL);
4613         frameEndEvents[threadIndex] = CreateEvent(NULL, FALSE, FALSE, NULL);
4614         bkgThreads.emplace_back(std::bind(
4615             ThreadProc,
4616             &threadResults[threadIndex],
4617             threadRandSeed + threadIndex,
4618             frameStartEvents[threadIndex],
4619             frameEndEvents[threadIndex]));
4620     }
4621 
4622     // Execute frames.
4623     TEST(config.ThreadCount <= MAXIMUM_WAIT_OBJECTS);
4624     for(uint32_t frameIndex = 0; frameIndex < config.FrameCount; ++frameIndex)
4625     {
4626         vmaSetCurrentFrameIndex(g_hAllocator, frameIndex);
4627         for(size_t threadIndex = 0; threadIndex < config.ThreadCount; ++threadIndex)
4628             SetEvent(frameStartEvents[threadIndex]);
4629         WaitForMultipleObjects(config.ThreadCount, &frameEndEvents[0], TRUE, INFINITE);
4630     }
4631 
4632     // Wait for threads finished
4633     for(size_t i = 0; i < bkgThreads.size(); ++i)
4634     {
4635         bkgThreads[i].join();
4636         CloseHandle(frameEndEvents[i]);
4637         CloseHandle(frameStartEvents[i]);
4638     }
4639     bkgThreads.clear();
4640 
4641     // Finish time measurement - before destroying pool.
4642     outResult.TotalTime = std::chrono::high_resolution_clock::now() - timeBeg;
4643 
4644     vmaDestroyPool(g_hAllocator, pool);
4645 
4646     outResult.AllocationTimeMin = duration::max();
4647     outResult.AllocationTimeAvg = duration::zero();
4648     outResult.AllocationTimeMax = duration::min();
4649     outResult.DeallocationTimeMin = duration::max();
4650     outResult.DeallocationTimeAvg = duration::zero();
4651     outResult.DeallocationTimeMax = duration::min();
4652     outResult.LostAllocationCount = 0;
4653     outResult.LostAllocationTotalSize = 0;
4654     outResult.FailedAllocationCount = 0;
4655     outResult.FailedAllocationTotalSize = 0;
4656     size_t allocationCount = 0;
4657     size_t deallocationCount = 0;
4658     for(size_t threadIndex = 0; threadIndex < config.ThreadCount; ++threadIndex)
4659     {
4660         const PoolTestThreadResult& threadResult = threadResults[threadIndex];
4661         outResult.AllocationTimeMin = std::min(outResult.AllocationTimeMin, threadResult.AllocationTimeMin);
4662         outResult.AllocationTimeMax = std::max(outResult.AllocationTimeMax, threadResult.AllocationTimeMax);
4663         outResult.AllocationTimeAvg += threadResult.AllocationTimeSum;
4664         outResult.DeallocationTimeMin = std::min(outResult.DeallocationTimeMin, threadResult.DeallocationTimeMin);
4665         outResult.DeallocationTimeMax = std::max(outResult.DeallocationTimeMax, threadResult.DeallocationTimeMax);
4666         outResult.DeallocationTimeAvg += threadResult.DeallocationTimeSum;
4667         allocationCount += threadResult.AllocationCount;
4668         deallocationCount += threadResult.DeallocationCount;
4669         outResult.FailedAllocationCount += threadResult.FailedAllocationCount;
4670         outResult.FailedAllocationTotalSize += threadResult.FailedAllocationTotalSize;
4671         outResult.LostAllocationCount += threadResult.LostAllocationCount;
4672         outResult.LostAllocationTotalSize += threadResult.LostAllocationTotalSize;
4673     }
4674     if(allocationCount)
4675         outResult.AllocationTimeAvg /= allocationCount;
4676     if(deallocationCount)
4677         outResult.DeallocationTimeAvg /= deallocationCount;
4678 }
4679 
MemoryRegionsOverlap(char * ptr1,size_t size1,char * ptr2,size_t size2)4680 static inline bool MemoryRegionsOverlap(char* ptr1, size_t size1, char* ptr2, size_t size2)
4681 {
4682     if(ptr1 < ptr2)
4683         return ptr1 + size1 > ptr2;
4684     else if(ptr2 < ptr1)
4685         return ptr2 + size2 > ptr1;
4686     else
4687         return true;
4688 }
4689 
TestMemoryUsage()4690 static void TestMemoryUsage()
4691 {
4692     wprintf(L"Testing memory usage:\n");
4693 
4694     static const VmaMemoryUsage lastUsage = VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED;
4695     for(uint32_t usage = 0; usage <= lastUsage; ++usage)
4696     {
4697         switch(usage)
4698         {
4699         case VMA_MEMORY_USAGE_UNKNOWN: printf("  VMA_MEMORY_USAGE_UNKNOWN:\n"); break;
4700         case VMA_MEMORY_USAGE_GPU_ONLY: printf("  VMA_MEMORY_USAGE_GPU_ONLY:\n"); break;
4701         case VMA_MEMORY_USAGE_CPU_ONLY: printf("  VMA_MEMORY_USAGE_CPU_ONLY:\n"); break;
4702         case VMA_MEMORY_USAGE_CPU_TO_GPU: printf("  VMA_MEMORY_USAGE_CPU_TO_GPU:\n"); break;
4703         case VMA_MEMORY_USAGE_GPU_TO_CPU: printf("  VMA_MEMORY_USAGE_GPU_TO_CPU:\n"); break;
4704         case VMA_MEMORY_USAGE_CPU_COPY: printf("  VMA_MEMORY_USAGE_CPU_COPY:\n"); break;
4705         case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: printf("  VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED:\n"); break;
4706         default: assert(0);
4707         }
4708 
4709         auto printResult = [](const char* testName, VkResult res, uint32_t memoryTypeBits, uint32_t memoryTypeIndex)
4710         {
4711             if(res == VK_SUCCESS)
4712                 printf("    %s: memoryTypeBits=0x%X, memoryTypeIndex=%u\n", testName, memoryTypeBits, memoryTypeIndex);
4713             else
4714                 printf("    %s: memoryTypeBits=0x%X, FAILED with res=%d\n", testName, memoryTypeBits, (int32_t)res);
4715         };
4716 
4717         // 1: Buffer for copy
4718         {
4719             VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4720             bufCreateInfo.size = 65536;
4721             bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
4722 
4723             VkBuffer buf = VK_NULL_HANDLE;
4724             VkResult res = vkCreateBuffer(g_hDevice, &bufCreateInfo, g_Allocs, &buf);
4725             TEST(res == VK_SUCCESS && buf != VK_NULL_HANDLE);
4726 
4727             VkMemoryRequirements memReq = {};
4728             vkGetBufferMemoryRequirements(g_hDevice, buf, &memReq);
4729 
4730             VmaAllocationCreateInfo allocCreateInfo = {};
4731             allocCreateInfo.usage = (VmaMemoryUsage)usage;
4732             VmaAllocation alloc = VK_NULL_HANDLE;
4733             VmaAllocationInfo allocInfo = {};
4734             res = vmaAllocateMemoryForBuffer(g_hAllocator, buf, &allocCreateInfo, &alloc, &allocInfo);
4735             if(res == VK_SUCCESS)
4736             {
4737                 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
4738                 res = vkBindBufferMemory(g_hDevice, buf, allocInfo.deviceMemory, allocInfo.offset);
4739                 TEST(res == VK_SUCCESS);
4740             }
4741             printResult("Buffer TRANSFER_DST + TRANSFER_SRC", res, memReq.memoryTypeBits, allocInfo.memoryType);
4742             vmaDestroyBuffer(g_hAllocator, buf, alloc);
4743         }
4744 
4745         // 2: Vertex buffer
4746         {
4747             VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4748             bufCreateInfo.size = 65536;
4749             bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
4750 
4751             VkBuffer buf = VK_NULL_HANDLE;
4752             VkResult res = vkCreateBuffer(g_hDevice, &bufCreateInfo, g_Allocs, &buf);
4753             TEST(res == VK_SUCCESS && buf != VK_NULL_HANDLE);
4754 
4755             VkMemoryRequirements memReq = {};
4756             vkGetBufferMemoryRequirements(g_hDevice, buf, &memReq);
4757 
4758             VmaAllocationCreateInfo allocCreateInfo = {};
4759             allocCreateInfo.usage = (VmaMemoryUsage)usage;
4760             VmaAllocation alloc = VK_NULL_HANDLE;
4761             VmaAllocationInfo allocInfo = {};
4762             res = vmaAllocateMemoryForBuffer(g_hAllocator, buf, &allocCreateInfo, &alloc, &allocInfo);
4763             if(res == VK_SUCCESS)
4764             {
4765                 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
4766                 res = vkBindBufferMemory(g_hDevice, buf, allocInfo.deviceMemory, allocInfo.offset);
4767                 TEST(res == VK_SUCCESS);
4768             }
4769             printResult("Buffer TRANSFER_DST + VERTEX_BUFFER", res, memReq.memoryTypeBits, allocInfo.memoryType);
4770             vmaDestroyBuffer(g_hAllocator, buf, alloc);
4771         }
4772 
4773         // 3: Image for copy, OPTIMAL
4774         {
4775             VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
4776             imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
4777             imgCreateInfo.extent.width = 256;
4778             imgCreateInfo.extent.height = 256;
4779             imgCreateInfo.extent.depth = 1;
4780             imgCreateInfo.mipLevels = 1;
4781             imgCreateInfo.arrayLayers = 1;
4782             imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
4783             imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
4784             imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
4785             imgCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
4786             imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
4787 
4788             VkImage img = VK_NULL_HANDLE;
4789             VkResult res = vkCreateImage(g_hDevice, &imgCreateInfo, g_Allocs, &img);
4790             TEST(res == VK_SUCCESS && img != VK_NULL_HANDLE);
4791 
4792             VkMemoryRequirements memReq = {};
4793             vkGetImageMemoryRequirements(g_hDevice, img, &memReq);
4794 
4795             VmaAllocationCreateInfo allocCreateInfo = {};
4796             allocCreateInfo.usage = (VmaMemoryUsage)usage;
4797             VmaAllocation alloc = VK_NULL_HANDLE;
4798             VmaAllocationInfo allocInfo = {};
4799             res = vmaAllocateMemoryForImage(g_hAllocator, img, &allocCreateInfo, &alloc, &allocInfo);
4800             if(res == VK_SUCCESS)
4801             {
4802                 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
4803                 res = vkBindImageMemory(g_hDevice, img, allocInfo.deviceMemory, allocInfo.offset);
4804                 TEST(res == VK_SUCCESS);
4805             }
4806             printResult("Image OPTIMAL TRANSFER_DST + TRANSFER_SRC", res, memReq.memoryTypeBits, allocInfo.memoryType);
4807 
4808             vmaDestroyImage(g_hAllocator, img, alloc);
4809         }
4810 
4811         // 4: Image SAMPLED, OPTIMAL
4812         {
4813             VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
4814             imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
4815             imgCreateInfo.extent.width = 256;
4816             imgCreateInfo.extent.height = 256;
4817             imgCreateInfo.extent.depth = 1;
4818             imgCreateInfo.mipLevels = 1;
4819             imgCreateInfo.arrayLayers = 1;
4820             imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
4821             imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
4822             imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
4823             imgCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
4824             imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
4825 
4826             VkImage img = VK_NULL_HANDLE;
4827             VkResult res = vkCreateImage(g_hDevice, &imgCreateInfo, g_Allocs, &img);
4828             TEST(res == VK_SUCCESS && img != VK_NULL_HANDLE);
4829 
4830             VkMemoryRequirements memReq = {};
4831             vkGetImageMemoryRequirements(g_hDevice, img, &memReq);
4832 
4833             VmaAllocationCreateInfo allocCreateInfo = {};
4834             allocCreateInfo.usage = (VmaMemoryUsage)usage;
4835             VmaAllocation alloc = VK_NULL_HANDLE;
4836             VmaAllocationInfo allocInfo = {};
4837             res = vmaAllocateMemoryForImage(g_hAllocator, img, &allocCreateInfo, &alloc, &allocInfo);
4838             if(res == VK_SUCCESS)
4839             {
4840                 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
4841                 res = vkBindImageMemory(g_hDevice, img, allocInfo.deviceMemory, allocInfo.offset);
4842                 TEST(res == VK_SUCCESS);
4843             }
4844             printResult("Image OPTIMAL TRANSFER_DST + SAMPLED", res, memReq.memoryTypeBits, allocInfo.memoryType);
4845             vmaDestroyImage(g_hAllocator, img, alloc);
4846         }
4847 
4848         // 5: Image COLOR_ATTACHMENT, OPTIMAL
4849         {
4850             VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
4851             imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
4852             imgCreateInfo.extent.width = 256;
4853             imgCreateInfo.extent.height = 256;
4854             imgCreateInfo.extent.depth = 1;
4855             imgCreateInfo.mipLevels = 1;
4856             imgCreateInfo.arrayLayers = 1;
4857             imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
4858             imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
4859             imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
4860             imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
4861             imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
4862 
4863             VkImage img = VK_NULL_HANDLE;
4864             VkResult res = vkCreateImage(g_hDevice, &imgCreateInfo, g_Allocs, &img);
4865             TEST(res == VK_SUCCESS && img != VK_NULL_HANDLE);
4866 
4867             VkMemoryRequirements memReq = {};
4868             vkGetImageMemoryRequirements(g_hDevice, img, &memReq);
4869 
4870             VmaAllocationCreateInfo allocCreateInfo = {};
4871             allocCreateInfo.usage = (VmaMemoryUsage)usage;
4872             VmaAllocation alloc = VK_NULL_HANDLE;
4873             VmaAllocationInfo allocInfo = {};
4874             res = vmaAllocateMemoryForImage(g_hAllocator, img, &allocCreateInfo, &alloc, &allocInfo);
4875             if(res == VK_SUCCESS)
4876             {
4877                 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
4878                 res = vkBindImageMemory(g_hDevice, img, allocInfo.deviceMemory, allocInfo.offset);
4879                 TEST(res == VK_SUCCESS);
4880             }
4881             printResult("Image OPTIMAL SAMPLED + COLOR_ATTACHMENT", res, memReq.memoryTypeBits, allocInfo.memoryType);
4882             vmaDestroyImage(g_hAllocator, img, alloc);
4883         }
4884     }
4885 }
4886 
FindDeviceCoherentMemoryTypeBits()4887 static uint32_t FindDeviceCoherentMemoryTypeBits()
4888 {
4889     VkPhysicalDeviceMemoryProperties memProps;
4890     vkGetPhysicalDeviceMemoryProperties(g_hPhysicalDevice, &memProps);
4891 
4892     uint32_t memTypeBits = 0;
4893     for(uint32_t i = 0; i < memProps.memoryTypeCount; ++i)
4894     {
4895         if(memProps.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD)
4896             memTypeBits |= 1u << i;
4897     }
4898     return memTypeBits;
4899 }
4900 
TestDeviceCoherentMemory()4901 static void TestDeviceCoherentMemory()
4902 {
4903     if(!VK_AMD_device_coherent_memory_enabled)
4904         return;
4905 
4906     uint32_t deviceCoherentMemoryTypeBits = FindDeviceCoherentMemoryTypeBits();
4907     // Extension is enabled, feature is enabled, and the device still doesn't support any such memory type?
4908     // OK then, so it's just fake!
4909     if(deviceCoherentMemoryTypeBits == 0)
4910         return;
4911 
4912     wprintf(L"Testing device coherent memory...\n");
4913 
4914     // 1. Try to allocate buffer from a memory type that is DEVICE_COHERENT.
4915 
4916     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4917     bufCreateInfo.size = 0x10000;
4918     bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4919 
4920     VmaAllocationCreateInfo allocCreateInfo = {};
4921     allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
4922     allocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD;
4923 
4924     AllocInfo alloc = {};
4925     VmaAllocationInfo allocInfo = {};
4926     VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &alloc.m_Buffer, &alloc.m_Allocation, &allocInfo);
4927 
4928     // Make sure it succeeded and was really created in such memory type.
4929     TEST(res == VK_SUCCESS);
4930     TEST((1u << allocInfo.memoryType) & deviceCoherentMemoryTypeBits);
4931 
4932     alloc.Destroy();
4933 
4934     // 2. Try to create a pool in such memory type.
4935     {
4936         VmaPoolCreateInfo poolCreateInfo = {};
4937 
4938         res = vmaFindMemoryTypeIndex(g_hAllocator, UINT32_MAX, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex);
4939         TEST(res == VK_SUCCESS);
4940         TEST((1u << poolCreateInfo.memoryTypeIndex) & deviceCoherentMemoryTypeBits);
4941 
4942         VmaPool pool = VK_NULL_HANDLE;
4943         res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
4944         TEST(res == VK_SUCCESS);
4945 
4946         vmaDestroyPool(g_hAllocator, pool);
4947     }
4948 
4949     // 3. Try the same with a local allocator created without VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT.
4950 
4951     VmaAllocatorCreateInfo allocatorCreateInfo = {};
4952     SetAllocatorCreateInfo(allocatorCreateInfo);
4953     allocatorCreateInfo.flags &= ~VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT;
4954 
4955     VmaAllocator localAllocator = VK_NULL_HANDLE;
4956     res = vmaCreateAllocator(&allocatorCreateInfo, &localAllocator);
4957     TEST(res == VK_SUCCESS && localAllocator);
4958 
4959     res = vmaCreateBuffer(localAllocator, &bufCreateInfo, &allocCreateInfo, &alloc.m_Buffer, &alloc.m_Allocation, &allocInfo);
4960 
4961     // Make sure it failed.
4962     TEST(res != VK_SUCCESS && !alloc.m_Buffer && !alloc.m_Allocation);
4963 
4964     // 4. Try to find memory type.
4965     {
4966         uint32_t memTypeIndex = UINT_MAX;
4967         res = vmaFindMemoryTypeIndex(localAllocator, UINT32_MAX, &allocCreateInfo, &memTypeIndex);
4968         TEST(res != VK_SUCCESS);
4969     }
4970 
4971     vmaDestroyAllocator(localAllocator);
4972 }
4973 
TestBudget()4974 static void TestBudget()
4975 {
4976     wprintf(L"Testing budget...\n");
4977 
4978     static const VkDeviceSize BUF_SIZE = 10ull * 1024 * 1024;
4979     static const uint32_t BUF_COUNT = 4;
4980 
4981     const VkPhysicalDeviceMemoryProperties* memProps = {};
4982     vmaGetMemoryProperties(g_hAllocator, &memProps);
4983 
4984     for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
4985     {
4986         vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
4987 
4988         VmaBudget budgetBeg[VK_MAX_MEMORY_HEAPS] = {};
4989         vmaGetBudget(g_hAllocator, budgetBeg);
4990 
4991         for(uint32_t i = 0; i < memProps->memoryHeapCount; ++i)
4992         {
4993             TEST(budgetBeg[i].budget > 0);
4994             TEST(budgetBeg[i].budget <= memProps->memoryHeaps[i].size);
4995             TEST(budgetBeg[i].allocationBytes <= budgetBeg[i].blockBytes);
4996         }
4997 
4998         VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4999         bufInfo.size = BUF_SIZE;
5000         bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
5001 
5002         VmaAllocationCreateInfo allocCreateInfo = {};
5003         allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
5004         if(testIndex == 0)
5005         {
5006             allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
5007         }
5008 
5009         // CREATE BUFFERS
5010         uint32_t heapIndex = 0;
5011         BufferInfo bufInfos[BUF_COUNT] = {};
5012         for(uint32_t bufIndex = 0; bufIndex < BUF_COUNT; ++bufIndex)
5013         {
5014             VmaAllocationInfo allocInfo;
5015             VkResult res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo,
5016                 &bufInfos[bufIndex].Buffer, &bufInfos[bufIndex].Allocation, &allocInfo);
5017             TEST(res == VK_SUCCESS);
5018             if(bufIndex == 0)
5019             {
5020                 heapIndex = MemoryTypeToHeap(allocInfo.memoryType);
5021             }
5022             else
5023             {
5024                 // All buffers need to fall into the same heap.
5025                 TEST(MemoryTypeToHeap(allocInfo.memoryType) == heapIndex);
5026             }
5027         }
5028 
5029         VmaBudget budgetWithBufs[VK_MAX_MEMORY_HEAPS] = {};
5030         vmaGetBudget(g_hAllocator, budgetWithBufs);
5031 
5032         // DESTROY BUFFERS
5033         for(size_t bufIndex = BUF_COUNT; bufIndex--; )
5034         {
5035             vmaDestroyBuffer(g_hAllocator, bufInfos[bufIndex].Buffer, bufInfos[bufIndex].Allocation);
5036         }
5037 
5038         VmaBudget budgetEnd[VK_MAX_MEMORY_HEAPS] = {};
5039         vmaGetBudget(g_hAllocator, budgetEnd);
5040 
5041         // CHECK
5042         for(uint32_t i = 0; i < memProps->memoryHeapCount; ++i)
5043         {
5044             TEST(budgetEnd[i].allocationBytes <= budgetEnd[i].blockBytes);
5045             if(i == heapIndex)
5046             {
5047                 TEST(budgetEnd[i].allocationBytes == budgetBeg[i].allocationBytes);
5048                 TEST(budgetWithBufs[i].allocationBytes == budgetBeg[i].allocationBytes + BUF_SIZE * BUF_COUNT);
5049                 TEST(budgetWithBufs[i].blockBytes >= budgetEnd[i].blockBytes);
5050             }
5051             else
5052             {
5053                 TEST(budgetEnd[i].allocationBytes == budgetEnd[i].allocationBytes &&
5054                     budgetEnd[i].allocationBytes == budgetWithBufs[i].allocationBytes);
5055                 TEST(budgetEnd[i].blockBytes == budgetEnd[i].blockBytes &&
5056                     budgetEnd[i].blockBytes == budgetWithBufs[i].blockBytes);
5057             }
5058         }
5059     }
5060 }
5061 
TestMapping()5062 static void TestMapping()
5063 {
5064     wprintf(L"Testing mapping...\n");
5065 
5066     VkResult res;
5067     uint32_t memTypeIndex = UINT32_MAX;
5068 
5069     enum TEST
5070     {
5071         TEST_NORMAL,
5072         TEST_POOL,
5073         TEST_DEDICATED,
5074         TEST_COUNT
5075     };
5076     for(uint32_t testIndex = 0; testIndex < TEST_COUNT; ++testIndex)
5077     {
5078         VmaPool pool = nullptr;
5079         if(testIndex == TEST_POOL)
5080         {
5081             TEST(memTypeIndex != UINT32_MAX);
5082             VmaPoolCreateInfo poolInfo = {};
5083             poolInfo.memoryTypeIndex = memTypeIndex;
5084             res = vmaCreatePool(g_hAllocator, &poolInfo, &pool);
5085             TEST(res == VK_SUCCESS);
5086         }
5087 
5088         VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5089         bufInfo.size = 0x10000;
5090         bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
5091 
5092         VmaAllocationCreateInfo allocCreateInfo = {};
5093         allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
5094         allocCreateInfo.pool = pool;
5095         if(testIndex == TEST_DEDICATED)
5096             allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
5097 
5098         VmaAllocationInfo allocInfo;
5099 
5100         // Mapped manually
5101 
5102         // Create 2 buffers.
5103         BufferInfo bufferInfos[3];
5104         for(size_t i = 0; i < 2; ++i)
5105         {
5106             res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo,
5107                 &bufferInfos[i].Buffer, &bufferInfos[i].Allocation, &allocInfo);
5108             TEST(res == VK_SUCCESS);
5109             TEST(allocInfo.pMappedData == nullptr);
5110             memTypeIndex = allocInfo.memoryType;
5111         }
5112 
5113         // Map buffer 0.
5114         char* data00 = nullptr;
5115         res = vmaMapMemory(g_hAllocator, bufferInfos[0].Allocation, (void**)&data00);
5116         TEST(res == VK_SUCCESS && data00 != nullptr);
5117         data00[0xFFFF] = data00[0];
5118 
5119         // Map buffer 0 second time.
5120         char* data01 = nullptr;
5121         res = vmaMapMemory(g_hAllocator, bufferInfos[0].Allocation, (void**)&data01);
5122         TEST(res == VK_SUCCESS && data01 == data00);
5123 
5124         // Map buffer 1.
5125         char* data1 = nullptr;
5126         res = vmaMapMemory(g_hAllocator, bufferInfos[1].Allocation, (void**)&data1);
5127         TEST(res == VK_SUCCESS && data1 != nullptr);
5128         TEST(!MemoryRegionsOverlap(data00, (size_t)bufInfo.size, data1, (size_t)bufInfo.size));
5129         data1[0xFFFF] = data1[0];
5130 
5131         // Unmap buffer 0 two times.
5132         vmaUnmapMemory(g_hAllocator, bufferInfos[0].Allocation);
5133         vmaUnmapMemory(g_hAllocator, bufferInfos[0].Allocation);
5134         vmaGetAllocationInfo(g_hAllocator, bufferInfos[0].Allocation, &allocInfo);
5135         TEST(allocInfo.pMappedData == nullptr);
5136 
5137         // Unmap buffer 1.
5138         vmaUnmapMemory(g_hAllocator, bufferInfos[1].Allocation);
5139         vmaGetAllocationInfo(g_hAllocator, bufferInfos[1].Allocation, &allocInfo);
5140         TEST(allocInfo.pMappedData == nullptr);
5141 
5142         // Create 3rd buffer - persistently mapped.
5143         allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
5144         res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo,
5145             &bufferInfos[2].Buffer, &bufferInfos[2].Allocation, &allocInfo);
5146         TEST(res == VK_SUCCESS && allocInfo.pMappedData != nullptr);
5147 
5148         // Map buffer 2.
5149         char* data2 = nullptr;
5150         res = vmaMapMemory(g_hAllocator, bufferInfos[2].Allocation, (void**)&data2);
5151         TEST(res == VK_SUCCESS && data2 == allocInfo.pMappedData);
5152         data2[0xFFFF] = data2[0];
5153 
5154         // Unmap buffer 2.
5155         vmaUnmapMemory(g_hAllocator, bufferInfos[2].Allocation);
5156         vmaGetAllocationInfo(g_hAllocator, bufferInfos[2].Allocation, &allocInfo);
5157         TEST(allocInfo.pMappedData == data2);
5158 
5159         // Destroy all buffers.
5160         for(size_t i = 3; i--; )
5161             vmaDestroyBuffer(g_hAllocator, bufferInfos[i].Buffer, bufferInfos[i].Allocation);
5162 
5163         vmaDestroyPool(g_hAllocator, pool);
5164     }
5165 }
5166 
5167 // Test CREATE_MAPPED with required DEVICE_LOCAL. There was a bug with it.
TestDeviceLocalMapped()5168 static void TestDeviceLocalMapped()
5169 {
5170     VkResult res;
5171 
5172     for(uint32_t testIndex = 0; testIndex < 3; ++testIndex)
5173     {
5174         VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5175         bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
5176         bufCreateInfo.size = 4096;
5177 
5178         VmaPool pool = VK_NULL_HANDLE;
5179         VmaAllocationCreateInfo allocCreateInfo = {};
5180         allocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
5181         allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
5182         if(testIndex == 2)
5183         {
5184             VmaPoolCreateInfo poolCreateInfo = {};
5185             res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex);
5186             TEST(res == VK_SUCCESS);
5187             res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
5188             TEST(res == VK_SUCCESS);
5189             allocCreateInfo.pool = pool;
5190         }
5191         else if(testIndex == 1)
5192         {
5193             allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
5194         }
5195 
5196         VkBuffer buf = VK_NULL_HANDLE;
5197         VmaAllocation alloc = VK_NULL_HANDLE;
5198         VmaAllocationInfo allocInfo = {};
5199         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
5200         TEST(res == VK_SUCCESS && alloc);
5201 
5202         VkMemoryPropertyFlags memTypeFlags = 0;
5203         vmaGetMemoryTypeProperties(g_hAllocator, allocInfo.memoryType, &memTypeFlags);
5204         const bool shouldBeMapped = (memTypeFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
5205         TEST((allocInfo.pMappedData != nullptr) == shouldBeMapped);
5206 
5207         vmaDestroyBuffer(g_hAllocator, buf, alloc);
5208         vmaDestroyPool(g_hAllocator, pool);
5209     }
5210 }
5211 
TestMappingMultithreaded()5212 static void TestMappingMultithreaded()
5213 {
5214     wprintf(L"Testing mapping multithreaded...\n");
5215 
5216     static const uint32_t threadCount = 16;
5217     static const uint32_t bufferCount = 1024;
5218     static const uint32_t threadBufferCount = bufferCount / threadCount;
5219 
5220     VkResult res;
5221     volatile uint32_t memTypeIndex = UINT32_MAX;
5222 
5223     enum TEST
5224     {
5225         TEST_NORMAL,
5226         TEST_POOL,
5227         TEST_DEDICATED,
5228         TEST_COUNT
5229     };
5230     for(uint32_t testIndex = 0; testIndex < TEST_COUNT; ++testIndex)
5231     {
5232         VmaPool pool = nullptr;
5233         if(testIndex == TEST_POOL)
5234         {
5235             TEST(memTypeIndex != UINT32_MAX);
5236             VmaPoolCreateInfo poolInfo = {};
5237             poolInfo.memoryTypeIndex = memTypeIndex;
5238             res = vmaCreatePool(g_hAllocator, &poolInfo, &pool);
5239             TEST(res == VK_SUCCESS);
5240         }
5241 
5242         VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5243         bufCreateInfo.size = 0x10000;
5244         bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
5245 
5246         VmaAllocationCreateInfo allocCreateInfo = {};
5247         allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
5248         allocCreateInfo.pool = pool;
5249         if(testIndex == TEST_DEDICATED)
5250             allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
5251 
5252         std::thread threads[threadCount];
5253         for(uint32_t threadIndex = 0; threadIndex < threadCount; ++threadIndex)
5254         {
5255             threads[threadIndex] = std::thread([=, &memTypeIndex](){
5256                 // ======== THREAD FUNCTION ========
5257 
5258                 RandomNumberGenerator rand{threadIndex};
5259 
5260                 enum class MODE
5261                 {
5262                     // Don't map this buffer at all.
5263                     DONT_MAP,
5264                     // Map and quickly unmap.
5265                     MAP_FOR_MOMENT,
5266                     // Map and unmap before destruction.
5267                     MAP_FOR_LONGER,
5268                     // Map two times. Quickly unmap, second unmap before destruction.
5269                     MAP_TWO_TIMES,
5270                     // Create this buffer as persistently mapped.
5271                     PERSISTENTLY_MAPPED,
5272                     COUNT
5273                 };
5274                 std::vector<BufferInfo> bufInfos{threadBufferCount};
5275                 std::vector<MODE> bufModes{threadBufferCount};
5276 
5277                 for(uint32_t bufferIndex = 0; bufferIndex < threadBufferCount; ++bufferIndex)
5278                 {
5279                     BufferInfo& bufInfo = bufInfos[bufferIndex];
5280                     const MODE mode = (MODE)(rand.Generate() % (uint32_t)MODE::COUNT);
5281                     bufModes[bufferIndex] = mode;
5282 
5283                     VmaAllocationCreateInfo localAllocCreateInfo = allocCreateInfo;
5284                     if(mode == MODE::PERSISTENTLY_MAPPED)
5285                         localAllocCreateInfo.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
5286 
5287                     VmaAllocationInfo allocInfo;
5288                     VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &localAllocCreateInfo,
5289                         &bufInfo.Buffer, &bufInfo.Allocation, &allocInfo);
5290                     TEST(res == VK_SUCCESS);
5291 
5292                     if(memTypeIndex == UINT32_MAX)
5293                         memTypeIndex = allocInfo.memoryType;
5294 
5295                     char* data = nullptr;
5296 
5297                     if(mode == MODE::PERSISTENTLY_MAPPED)
5298                     {
5299                         data = (char*)allocInfo.pMappedData;
5300                         TEST(data != nullptr);
5301                     }
5302                     else if(mode == MODE::MAP_FOR_MOMENT || mode == MODE::MAP_FOR_LONGER ||
5303                         mode == MODE::MAP_TWO_TIMES)
5304                     {
5305                         TEST(data == nullptr);
5306                         res = vmaMapMemory(g_hAllocator, bufInfo.Allocation, (void**)&data);
5307                         TEST(res == VK_SUCCESS && data != nullptr);
5308 
5309                         if(mode == MODE::MAP_TWO_TIMES)
5310                         {
5311                             char* data2 = nullptr;
5312                             res = vmaMapMemory(g_hAllocator, bufInfo.Allocation, (void**)&data2);
5313                             TEST(res == VK_SUCCESS && data2 == data);
5314                         }
5315                     }
5316                     else if(mode == MODE::DONT_MAP)
5317                     {
5318                         TEST(allocInfo.pMappedData == nullptr);
5319                     }
5320                     else
5321                         TEST(0);
5322 
5323                     // Test if reading and writing from the beginning and end of mapped memory doesn't crash.
5324                     if(data)
5325                         data[0xFFFF] = data[0];
5326 
5327                     if(mode == MODE::MAP_FOR_MOMENT || mode == MODE::MAP_TWO_TIMES)
5328                     {
5329                         vmaUnmapMemory(g_hAllocator, bufInfo.Allocation);
5330 
5331                         VmaAllocationInfo allocInfo;
5332                         vmaGetAllocationInfo(g_hAllocator, bufInfo.Allocation, &allocInfo);
5333                         if(mode == MODE::MAP_FOR_MOMENT)
5334                             TEST(allocInfo.pMappedData == nullptr);
5335                         else
5336                             TEST(allocInfo.pMappedData == data);
5337                     }
5338 
5339                     switch(rand.Generate() % 3)
5340                     {
5341                     case 0: Sleep(0); break; // Yield.
5342                     case 1: Sleep(10); break; // 10 ms
5343                     // default: No sleep.
5344                     }
5345 
5346                     // Test if reading and writing from the beginning and end of mapped memory doesn't crash.
5347                     if(data)
5348                         data[0xFFFF] = data[0];
5349                 }
5350 
5351                 for(size_t bufferIndex = threadBufferCount; bufferIndex--; )
5352                 {
5353                     if(bufModes[bufferIndex] == MODE::MAP_FOR_LONGER ||
5354                         bufModes[bufferIndex] == MODE::MAP_TWO_TIMES)
5355                     {
5356                         vmaUnmapMemory(g_hAllocator, bufInfos[bufferIndex].Allocation);
5357 
5358                         VmaAllocationInfo allocInfo;
5359                         vmaGetAllocationInfo(g_hAllocator, bufInfos[bufferIndex].Allocation, &allocInfo);
5360                         TEST(allocInfo.pMappedData == nullptr);
5361                     }
5362 
5363                     vmaDestroyBuffer(g_hAllocator, bufInfos[bufferIndex].Buffer, bufInfos[bufferIndex].Allocation);
5364                 }
5365             });
5366         }
5367 
5368         for(uint32_t threadIndex = 0; threadIndex < threadCount; ++threadIndex)
5369             threads[threadIndex].join();
5370 
5371         vmaDestroyPool(g_hAllocator, pool);
5372     }
5373 }
5374 
WriteMainTestResultHeader(FILE * file)5375 static void WriteMainTestResultHeader(FILE* file)
5376 {
5377     fprintf(file,
5378         "Code,Time,"
5379         "Threads,Buffers and images,Sizes,Operations,Allocation strategy,Free order,"
5380         "Total Time (us),"
5381         "Allocation Time Min (us),"
5382         "Allocation Time Avg (us),"
5383         "Allocation Time Max (us),"
5384         "Deallocation Time Min (us),"
5385         "Deallocation Time Avg (us),"
5386         "Deallocation Time Max (us),"
5387         "Total Memory Allocated (B),"
5388         "Free Range Size Avg (B),"
5389         "Free Range Size Max (B)\n");
5390 }
5391 
WriteMainTestResult(FILE * file,const char * codeDescription,const char * testDescription,const Config & config,const Result & result)5392 static void WriteMainTestResult(
5393     FILE* file,
5394     const char* codeDescription,
5395     const char* testDescription,
5396     const Config& config, const Result& result)
5397 {
5398     float totalTimeSeconds = ToFloatSeconds(result.TotalTime);
5399     float allocationTimeMinSeconds = ToFloatSeconds(result.AllocationTimeMin);
5400     float allocationTimeAvgSeconds = ToFloatSeconds(result.AllocationTimeAvg);
5401     float allocationTimeMaxSeconds = ToFloatSeconds(result.AllocationTimeMax);
5402     float deallocationTimeMinSeconds = ToFloatSeconds(result.DeallocationTimeMin);
5403     float deallocationTimeAvgSeconds = ToFloatSeconds(result.DeallocationTimeAvg);
5404     float deallocationTimeMaxSeconds = ToFloatSeconds(result.DeallocationTimeMax);
5405 
5406     std::string currTime;
5407     CurrentTimeToStr(currTime);
5408 
5409     fprintf(file,
5410         "%s,%s,%s,"
5411         "%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%I64u,%I64u,%I64u\n",
5412         codeDescription,
5413         currTime.c_str(),
5414         testDescription,
5415         totalTimeSeconds * 1e6f,
5416         allocationTimeMinSeconds * 1e6f,
5417         allocationTimeAvgSeconds * 1e6f,
5418         allocationTimeMaxSeconds * 1e6f,
5419         deallocationTimeMinSeconds * 1e6f,
5420         deallocationTimeAvgSeconds * 1e6f,
5421         deallocationTimeMaxSeconds * 1e6f,
5422         result.TotalMemoryAllocated,
5423         result.FreeRangeSizeAvg,
5424         result.FreeRangeSizeMax);
5425 }
5426 
WritePoolTestResultHeader(FILE * file)5427 static void WritePoolTestResultHeader(FILE* file)
5428 {
5429     fprintf(file,
5430         "Code,Test,Time,"
5431         "Config,"
5432         "Total Time (us),"
5433         "Allocation Time Min (us),"
5434         "Allocation Time Avg (us),"
5435         "Allocation Time Max (us),"
5436         "Deallocation Time Min (us),"
5437         "Deallocation Time Avg (us),"
5438         "Deallocation Time Max (us),"
5439         "Lost Allocation Count,"
5440         "Lost Allocation Total Size (B),"
5441         "Failed Allocation Count,"
5442         "Failed Allocation Total Size (B)\n");
5443 }
5444 
WritePoolTestResult(FILE * file,const char * codeDescription,const char * testDescription,const PoolTestConfig & config,const PoolTestResult & result)5445 static void WritePoolTestResult(
5446     FILE* file,
5447     const char* codeDescription,
5448     const char* testDescription,
5449     const PoolTestConfig& config,
5450     const PoolTestResult& result)
5451 {
5452     float totalTimeSeconds = ToFloatSeconds(result.TotalTime);
5453     float allocationTimeMinSeconds = ToFloatSeconds(result.AllocationTimeMin);
5454     float allocationTimeAvgSeconds = ToFloatSeconds(result.AllocationTimeAvg);
5455     float allocationTimeMaxSeconds = ToFloatSeconds(result.AllocationTimeMax);
5456     float deallocationTimeMinSeconds = ToFloatSeconds(result.DeallocationTimeMin);
5457     float deallocationTimeAvgSeconds = ToFloatSeconds(result.DeallocationTimeAvg);
5458     float deallocationTimeMaxSeconds = ToFloatSeconds(result.DeallocationTimeMax);
5459 
5460     std::string currTime;
5461     CurrentTimeToStr(currTime);
5462 
5463     fprintf(file,
5464         "%s,%s,%s,"
5465         "ThreadCount=%u PoolSize=%llu FrameCount=%u TotalItemCount=%u UsedItemCount=%u...%u ItemsToMakeUnusedPercent=%u,"
5466         "%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%I64u,%I64u,%I64u,%I64u\n",
5467         // General
5468         codeDescription,
5469         testDescription,
5470         currTime.c_str(),
5471         // Config
5472         config.ThreadCount,
5473         (unsigned long long)config.PoolSize,
5474         config.FrameCount,
5475         config.TotalItemCount,
5476         config.UsedItemCountMin,
5477         config.UsedItemCountMax,
5478         config.ItemsToMakeUnusedPercent,
5479         // Results
5480         totalTimeSeconds * 1e6f,
5481         allocationTimeMinSeconds * 1e6f,
5482         allocationTimeAvgSeconds * 1e6f,
5483         allocationTimeMaxSeconds * 1e6f,
5484         deallocationTimeMinSeconds * 1e6f,
5485         deallocationTimeAvgSeconds * 1e6f,
5486         deallocationTimeMaxSeconds * 1e6f,
5487         result.LostAllocationCount,
5488         result.LostAllocationTotalSize,
5489         result.FailedAllocationCount,
5490         result.FailedAllocationTotalSize);
5491 }
5492 
PerformCustomMainTest(FILE * file)5493 static void PerformCustomMainTest(FILE* file)
5494 {
5495     Config config{};
5496     config.RandSeed = 65735476;
5497     //config.MaxBytesToAllocate = 4ull * 1024 * 1024; // 4 MB
5498     config.MaxBytesToAllocate = 4ull * 1024 * 1024 * 1024; // 4 GB
5499     config.MemUsageProbability[0] = 1; // VMA_MEMORY_USAGE_GPU_ONLY
5500     config.FreeOrder = FREE_ORDER::FORWARD;
5501     config.ThreadCount = 16;
5502     config.ThreadsUsingCommonAllocationsProbabilityPercent = 50;
5503     config.AllocationStrategy = 0;
5504 
5505     // Buffers
5506     //config.AllocationSizes.push_back({4, 16, 1024});
5507     config.AllocationSizes.push_back({4, 0x10000, 0xA00000}); // 64 KB ... 10 MB
5508 
5509     // Images
5510     //config.AllocationSizes.push_back({4, 0, 0, 4, 32});
5511     //config.AllocationSizes.push_back({4, 0, 0, 256, 2048});
5512 
5513     config.BeginBytesToAllocate = config.MaxBytesToAllocate * 5 / 100;
5514     config.AdditionalOperationCount = 1024;
5515 
5516     Result result{};
5517     VkResult res = MainTest(result, config);
5518     TEST(res == VK_SUCCESS);
5519     WriteMainTestResult(file, "Foo", "CustomTest", config, result);
5520 }
5521 
PerformCustomPoolTest(FILE * file)5522 static void PerformCustomPoolTest(FILE* file)
5523 {
5524     PoolTestConfig config;
5525     config.PoolSize = 100 * 1024 * 1024;
5526     config.RandSeed = 2345764;
5527     config.ThreadCount = 1;
5528     config.FrameCount = 200;
5529     config.ItemsToMakeUnusedPercent = 2;
5530 
5531     AllocationSize allocSize = {};
5532     allocSize.BufferSizeMin = 1024;
5533     allocSize.BufferSizeMax = 1024 * 1024;
5534     allocSize.Probability = 1;
5535     config.AllocationSizes.push_back(allocSize);
5536 
5537     allocSize.BufferSizeMin = 0;
5538     allocSize.BufferSizeMax = 0;
5539     allocSize.ImageSizeMin = 128;
5540     allocSize.ImageSizeMax = 1024;
5541     allocSize.Probability = 1;
5542     config.AllocationSizes.push_back(allocSize);
5543 
5544     config.PoolSize = config.CalcAvgResourceSize() * 200;
5545     config.UsedItemCountMax = 160;
5546     config.TotalItemCount = config.UsedItemCountMax * 10;
5547     config.UsedItemCountMin = config.UsedItemCountMax * 80 / 100;
5548 
5549     g_MemoryAliasingWarningEnabled = false;
5550     PoolTestResult result = {};
5551     TestPool_Benchmark(result, config);
5552     g_MemoryAliasingWarningEnabled = true;
5553 
5554     WritePoolTestResult(file, "Code desc", "Test desc", config, result);
5555 }
5556 
PerformMainTests(FILE * file)5557 static void PerformMainTests(FILE* file)
5558 {
5559     uint32_t repeatCount = 1;
5560     if(ConfigType >= CONFIG_TYPE_MAXIMUM) repeatCount = 3;
5561 
5562     Config config{};
5563     config.RandSeed = 65735476;
5564     config.MemUsageProbability[0] = 1; // VMA_MEMORY_USAGE_GPU_ONLY
5565     config.FreeOrder = FREE_ORDER::FORWARD;
5566 
5567     size_t threadCountCount = 1;
5568     switch(ConfigType)
5569     {
5570     case CONFIG_TYPE_MINIMUM: threadCountCount = 1; break;
5571     case CONFIG_TYPE_SMALL:   threadCountCount = 2; break;
5572     case CONFIG_TYPE_AVERAGE: threadCountCount = 3; break;
5573     case CONFIG_TYPE_LARGE:   threadCountCount = 5; break;
5574     case CONFIG_TYPE_MAXIMUM: threadCountCount = 7; break;
5575     default: assert(0);
5576     }
5577 
5578     const size_t strategyCount = GetAllocationStrategyCount();
5579 
5580     for(size_t threadCountIndex = 0; threadCountIndex < threadCountCount; ++threadCountIndex)
5581     {
5582         std::string desc1;
5583 
5584         switch(threadCountIndex)
5585         {
5586         case 0:
5587             desc1 += "1_thread";
5588             config.ThreadCount = 1;
5589             config.ThreadsUsingCommonAllocationsProbabilityPercent = 0;
5590             break;
5591         case 1:
5592             desc1 += "16_threads+0%_common";
5593             config.ThreadCount = 16;
5594             config.ThreadsUsingCommonAllocationsProbabilityPercent = 0;
5595             break;
5596         case 2:
5597             desc1 += "16_threads+50%_common";
5598             config.ThreadCount = 16;
5599             config.ThreadsUsingCommonAllocationsProbabilityPercent = 50;
5600             break;
5601         case 3:
5602             desc1 += "16_threads+100%_common";
5603             config.ThreadCount = 16;
5604             config.ThreadsUsingCommonAllocationsProbabilityPercent = 100;
5605             break;
5606         case 4:
5607             desc1 += "2_threads+0%_common";
5608             config.ThreadCount = 2;
5609             config.ThreadsUsingCommonAllocationsProbabilityPercent = 0;
5610             break;
5611         case 5:
5612             desc1 += "2_threads+50%_common";
5613             config.ThreadCount = 2;
5614             config.ThreadsUsingCommonAllocationsProbabilityPercent = 50;
5615             break;
5616         case 6:
5617             desc1 += "2_threads+100%_common";
5618             config.ThreadCount = 2;
5619             config.ThreadsUsingCommonAllocationsProbabilityPercent = 100;
5620             break;
5621         default:
5622             assert(0);
5623         }
5624 
5625         // 0 = buffers, 1 = images, 2 = buffers and images
5626         size_t buffersVsImagesCount = 2;
5627         if(ConfigType >= CONFIG_TYPE_LARGE) ++buffersVsImagesCount;
5628         for(size_t buffersVsImagesIndex = 0; buffersVsImagesIndex < buffersVsImagesCount; ++buffersVsImagesIndex)
5629         {
5630             std::string desc2 = desc1;
5631             switch(buffersVsImagesIndex)
5632             {
5633             case 0: desc2 += ",Buffers"; break;
5634             case 1: desc2 += ",Images"; break;
5635             case 2: desc2 += ",Buffers+Images"; break;
5636             default: assert(0);
5637             }
5638 
5639             // 0 = small, 1 = large, 2 = small and large
5640             size_t smallVsLargeCount = 2;
5641             if(ConfigType >= CONFIG_TYPE_LARGE) ++smallVsLargeCount;
5642             for(size_t smallVsLargeIndex = 0; smallVsLargeIndex < smallVsLargeCount; ++smallVsLargeIndex)
5643             {
5644                 std::string desc3 = desc2;
5645                 switch(smallVsLargeIndex)
5646                 {
5647                 case 0: desc3 += ",Small"; break;
5648                 case 1: desc3 += ",Large"; break;
5649                 case 2: desc3 += ",Small+Large"; break;
5650                 default: assert(0);
5651                 }
5652 
5653                 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
5654                     config.MaxBytesToAllocate = 4ull * 1024 * 1024 * 1024; // 4 GB
5655                 else
5656                     config.MaxBytesToAllocate = 4ull * 1024 * 1024;
5657 
5658                 // 0 = varying sizes min...max, 1 = set of constant sizes
5659                 size_t constantSizesCount = 1;
5660                 if(ConfigType >= CONFIG_TYPE_SMALL) ++constantSizesCount;
5661                 for(size_t constantSizesIndex = 0; constantSizesIndex < constantSizesCount; ++constantSizesIndex)
5662                 {
5663                     std::string desc4 = desc3;
5664                     switch(constantSizesIndex)
5665                     {
5666                     case 0: desc4 += " Varying_sizes"; break;
5667                     case 1: desc4 += " Constant_sizes"; break;
5668                     default: assert(0);
5669                     }
5670 
5671                     config.AllocationSizes.clear();
5672                     // Buffers present
5673                     if(buffersVsImagesIndex == 0 || buffersVsImagesIndex == 2)
5674                     {
5675                         // Small
5676                         if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
5677                         {
5678                             // Varying size
5679                             if(constantSizesIndex == 0)
5680                                 config.AllocationSizes.push_back({4, 16, 1024});
5681                             // Constant sizes
5682                             else
5683                             {
5684                                 config.AllocationSizes.push_back({1, 16, 16});
5685                                 config.AllocationSizes.push_back({1, 64, 64});
5686                                 config.AllocationSizes.push_back({1, 256, 256});
5687                                 config.AllocationSizes.push_back({1, 1024, 1024});
5688                             }
5689                         }
5690                         // Large
5691                         if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
5692                         {
5693                             // Varying size
5694                             if(constantSizesIndex == 0)
5695                                 config.AllocationSizes.push_back({4, 0x10000, 0xA00000}); // 64 KB ... 10 MB
5696                             // Constant sizes
5697                             else
5698                             {
5699                                 config.AllocationSizes.push_back({1, 0x10000, 0x10000});
5700                                 config.AllocationSizes.push_back({1, 0x80000, 0x80000});
5701                                 config.AllocationSizes.push_back({1, 0x200000, 0x200000});
5702                                 config.AllocationSizes.push_back({1, 0xA00000, 0xA00000});
5703                             }
5704                         }
5705                     }
5706                     // Images present
5707                     if(buffersVsImagesIndex == 1 || buffersVsImagesIndex == 2)
5708                     {
5709                         // Small
5710                         if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
5711                         {
5712                             // Varying size
5713                             if(constantSizesIndex == 0)
5714                                 config.AllocationSizes.push_back({4, 0, 0, 4, 32});
5715                             // Constant sizes
5716                             else
5717                             {
5718                                 config.AllocationSizes.push_back({1, 0, 0,  4,  4});
5719                                 config.AllocationSizes.push_back({1, 0, 0,  8,  8});
5720                                 config.AllocationSizes.push_back({1, 0, 0, 16, 16});
5721                                 config.AllocationSizes.push_back({1, 0, 0, 32, 32});
5722                             }
5723                         }
5724                         // Large
5725                         if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
5726                         {
5727                             // Varying size
5728                             if(constantSizesIndex == 0)
5729                                 config.AllocationSizes.push_back({4, 0, 0, 256, 2048});
5730                             // Constant sizes
5731                             else
5732                             {
5733                                 config.AllocationSizes.push_back({1, 0, 0,  256,  256});
5734                                 config.AllocationSizes.push_back({1, 0, 0,  512,  512});
5735                                 config.AllocationSizes.push_back({1, 0, 0, 1024, 1024});
5736                                 config.AllocationSizes.push_back({1, 0, 0, 2048, 2048});
5737                             }
5738                         }
5739                     }
5740 
5741                     // 0 = 100%, additional_operations = 0, 1 = 50%, 2 = 5%, 3 = 95% additional_operations = a lot
5742                     size_t beginBytesToAllocateCount = 1;
5743                     if(ConfigType >= CONFIG_TYPE_SMALL) ++beginBytesToAllocateCount;
5744                     if(ConfigType >= CONFIG_TYPE_AVERAGE) ++beginBytesToAllocateCount;
5745                     if(ConfigType >= CONFIG_TYPE_LARGE) ++beginBytesToAllocateCount;
5746                     for(size_t beginBytesToAllocateIndex = 0; beginBytesToAllocateIndex < beginBytesToAllocateCount; ++beginBytesToAllocateIndex)
5747                     {
5748                         std::string desc5 = desc4;
5749 
5750                         switch(beginBytesToAllocateIndex)
5751                         {
5752                         case 0:
5753                             desc5 += ",Allocate_100%";
5754                             config.BeginBytesToAllocate = config.MaxBytesToAllocate;
5755                             config.AdditionalOperationCount = 0;
5756                             break;
5757                         case 1:
5758                             desc5 += ",Allocate_50%+Operations";
5759                             config.BeginBytesToAllocate = config.MaxBytesToAllocate * 50 / 100;
5760                             config.AdditionalOperationCount = 1024;
5761                             break;
5762                         case 2:
5763                             desc5 += ",Allocate_5%+Operations";
5764                             config.BeginBytesToAllocate = config.MaxBytesToAllocate *  5 / 100;
5765                             config.AdditionalOperationCount = 1024;
5766                             break;
5767                         case 3:
5768                             desc5 += ",Allocate_95%+Operations";
5769                             config.BeginBytesToAllocate = config.MaxBytesToAllocate * 95 / 100;
5770                             config.AdditionalOperationCount = 1024;
5771                             break;
5772                         default:
5773                             assert(0);
5774                         }
5775 
5776                         for(size_t strategyIndex = 0; strategyIndex < strategyCount; ++strategyIndex)
5777                         {
5778                             std::string desc6 = desc5;
5779                             switch(strategyIndex)
5780                             {
5781                             case 0:
5782                                 desc6 += ",BestFit";
5783                                 config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT;
5784                                 break;
5785                             case 1:
5786                                 desc6 += ",WorstFit";
5787                                 config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT;
5788                                 break;
5789                             case 2:
5790                                 desc6 += ",FirstFit";
5791                                 config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT;
5792                                 break;
5793                             default:
5794                                 assert(0);
5795                             }
5796 
5797                             desc6 += ',';
5798                             desc6 += FREE_ORDER_NAMES[(uint32_t)config.FreeOrder];
5799 
5800                             const char* testDescription = desc6.c_str();
5801 
5802                             for(size_t repeat = 0; repeat < repeatCount; ++repeat)
5803                             {
5804                                 printf("%s #%u\n", testDescription, (uint32_t)repeat);
5805 
5806                                 Result result{};
5807                                 VkResult res = MainTest(result, config);
5808                                 TEST(res == VK_SUCCESS);
5809                                 if(file)
5810                                 {
5811                                     WriteMainTestResult(file, CODE_DESCRIPTION, testDescription, config, result);
5812                                 }
5813                             }
5814                         }
5815                     }
5816                 }
5817             }
5818         }
5819     }
5820 }
5821 
PerformPoolTests(FILE * file)5822 static void PerformPoolTests(FILE* file)
5823 {
5824     const size_t AVG_RESOURCES_PER_POOL = 300;
5825 
5826     uint32_t repeatCount = 1;
5827     if(ConfigType >= CONFIG_TYPE_MAXIMUM) repeatCount = 3;
5828 
5829     PoolTestConfig config{};
5830     config.RandSeed = 2346343;
5831     config.FrameCount = 200;
5832     config.ItemsToMakeUnusedPercent = 2;
5833 
5834     size_t threadCountCount = 1;
5835     switch(ConfigType)
5836     {
5837     case CONFIG_TYPE_MINIMUM: threadCountCount = 1; break;
5838     case CONFIG_TYPE_SMALL:   threadCountCount = 2; break;
5839     case CONFIG_TYPE_AVERAGE: threadCountCount = 2; break;
5840     case CONFIG_TYPE_LARGE:   threadCountCount = 3; break;
5841     case CONFIG_TYPE_MAXIMUM: threadCountCount = 3; break;
5842     default: assert(0);
5843     }
5844     for(size_t threadCountIndex = 0; threadCountIndex < threadCountCount; ++threadCountIndex)
5845     {
5846         std::string desc1;
5847 
5848         switch(threadCountIndex)
5849         {
5850         case 0:
5851             desc1 += "1_thread";
5852             config.ThreadCount = 1;
5853             break;
5854         case 1:
5855             desc1 += "16_threads";
5856             config.ThreadCount = 16;
5857             break;
5858         case 2:
5859             desc1 += "2_threads";
5860             config.ThreadCount = 2;
5861             break;
5862         default:
5863             assert(0);
5864         }
5865 
5866         // 0 = buffers, 1 = images, 2 = buffers and images
5867         size_t buffersVsImagesCount = 2;
5868         if(ConfigType >= CONFIG_TYPE_LARGE) ++buffersVsImagesCount;
5869         for(size_t buffersVsImagesIndex = 0; buffersVsImagesIndex < buffersVsImagesCount; ++buffersVsImagesIndex)
5870         {
5871             std::string desc2 = desc1;
5872             switch(buffersVsImagesIndex)
5873             {
5874             case 0: desc2 += " Buffers"; break;
5875             case 1: desc2 += " Images"; break;
5876             case 2: desc2 += " Buffers+Images"; break;
5877             default: assert(0);
5878             }
5879 
5880             // 0 = small, 1 = large, 2 = small and large
5881             size_t smallVsLargeCount = 2;
5882             if(ConfigType >= CONFIG_TYPE_LARGE) ++smallVsLargeCount;
5883             for(size_t smallVsLargeIndex = 0; smallVsLargeIndex < smallVsLargeCount; ++smallVsLargeIndex)
5884             {
5885                 std::string desc3 = desc2;
5886                 switch(smallVsLargeIndex)
5887                 {
5888                 case 0: desc3 += " Small"; break;
5889                 case 1: desc3 += " Large"; break;
5890                 case 2: desc3 += " Small+Large"; break;
5891                 default: assert(0);
5892                 }
5893 
5894                 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
5895                     config.PoolSize = 6ull * 1024 * 1024 * 1024; // 6 GB
5896                 else
5897                     config.PoolSize = 4ull * 1024 * 1024;
5898 
5899                 // 0 = varying sizes min...max, 1 = set of constant sizes
5900                 size_t constantSizesCount = 1;
5901                 if(ConfigType >= CONFIG_TYPE_SMALL) ++constantSizesCount;
5902                 for(size_t constantSizesIndex = 0; constantSizesIndex < constantSizesCount; ++constantSizesIndex)
5903                 {
5904                     std::string desc4 = desc3;
5905                     switch(constantSizesIndex)
5906                     {
5907                     case 0: desc4 += " Varying_sizes"; break;
5908                     case 1: desc4 += " Constant_sizes"; break;
5909                     default: assert(0);
5910                     }
5911 
5912                     config.AllocationSizes.clear();
5913                     // Buffers present
5914                     if(buffersVsImagesIndex == 0 || buffersVsImagesIndex == 2)
5915                     {
5916                         // Small
5917                         if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
5918                         {
5919                             // Varying size
5920                             if(constantSizesIndex == 0)
5921                                 config.AllocationSizes.push_back({4, 16, 1024});
5922                             // Constant sizes
5923                             else
5924                             {
5925                                 config.AllocationSizes.push_back({1, 16, 16});
5926                                 config.AllocationSizes.push_back({1, 64, 64});
5927                                 config.AllocationSizes.push_back({1, 256, 256});
5928                                 config.AllocationSizes.push_back({1, 1024, 1024});
5929                             }
5930                         }
5931                         // Large
5932                         if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
5933                         {
5934                             // Varying size
5935                             if(constantSizesIndex == 0)
5936                                 config.AllocationSizes.push_back({4, 0x10000, 0xA00000}); // 64 KB ... 10 MB
5937                             // Constant sizes
5938                             else
5939                             {
5940                                 config.AllocationSizes.push_back({1, 0x10000, 0x10000});
5941                                 config.AllocationSizes.push_back({1, 0x80000, 0x80000});
5942                                 config.AllocationSizes.push_back({1, 0x200000, 0x200000});
5943                                 config.AllocationSizes.push_back({1, 0xA00000, 0xA00000});
5944                             }
5945                         }
5946                     }
5947                     // Images present
5948                     if(buffersVsImagesIndex == 1 || buffersVsImagesIndex == 2)
5949                     {
5950                         // Small
5951                         if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
5952                         {
5953                             // Varying size
5954                             if(constantSizesIndex == 0)
5955                                 config.AllocationSizes.push_back({4, 0, 0, 4, 32});
5956                             // Constant sizes
5957                             else
5958                             {
5959                                 config.AllocationSizes.push_back({1, 0, 0,  4,  4});
5960                                 config.AllocationSizes.push_back({1, 0, 0,  8,  8});
5961                                 config.AllocationSizes.push_back({1, 0, 0, 16, 16});
5962                                 config.AllocationSizes.push_back({1, 0, 0, 32, 32});
5963                             }
5964                         }
5965                         // Large
5966                         if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
5967                         {
5968                             // Varying size
5969                             if(constantSizesIndex == 0)
5970                                 config.AllocationSizes.push_back({4, 0, 0, 256, 2048});
5971                             // Constant sizes
5972                             else
5973                             {
5974                                 config.AllocationSizes.push_back({1, 0, 0,  256,  256});
5975                                 config.AllocationSizes.push_back({1, 0, 0,  512,  512});
5976                                 config.AllocationSizes.push_back({1, 0, 0, 1024, 1024});
5977                                 config.AllocationSizes.push_back({1, 0, 0, 2048, 2048});
5978                             }
5979                         }
5980                     }
5981 
5982                     const VkDeviceSize avgResourceSize = config.CalcAvgResourceSize();
5983                     config.PoolSize = avgResourceSize * AVG_RESOURCES_PER_POOL;
5984 
5985                     // 0 = 66%, 1 = 133%, 2 = 100%, 3 = 33%, 4 = 166%
5986                     size_t subscriptionModeCount;
5987                     switch(ConfigType)
5988                     {
5989                     case CONFIG_TYPE_MINIMUM: subscriptionModeCount = 2; break;
5990                     case CONFIG_TYPE_SMALL:   subscriptionModeCount = 2; break;
5991                     case CONFIG_TYPE_AVERAGE: subscriptionModeCount = 3; break;
5992                     case CONFIG_TYPE_LARGE:   subscriptionModeCount = 5; break;
5993                     case CONFIG_TYPE_MAXIMUM: subscriptionModeCount = 5; break;
5994                     default: assert(0);
5995                     }
5996                     for(size_t subscriptionModeIndex = 0; subscriptionModeIndex < subscriptionModeCount; ++subscriptionModeIndex)
5997                     {
5998                         std::string desc5 = desc4;
5999 
6000                         switch(subscriptionModeIndex)
6001                         {
6002                         case 0:
6003                             desc5 += " Subscription_66%";
6004                             config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 66 / 100;
6005                             break;
6006                         case 1:
6007                             desc5 += " Subscription_133%";
6008                             config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 133 / 100;
6009                             break;
6010                         case 2:
6011                             desc5 += " Subscription_100%";
6012                             config.UsedItemCountMax = AVG_RESOURCES_PER_POOL;
6013                             break;
6014                         case 3:
6015                             desc5 += " Subscription_33%";
6016                             config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 33 / 100;
6017                             break;
6018                         case 4:
6019                             desc5 += " Subscription_166%";
6020                             config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 166 / 100;
6021                             break;
6022                         default:
6023                             assert(0);
6024                         }
6025 
6026                         config.TotalItemCount = config.UsedItemCountMax * 5;
6027                         config.UsedItemCountMin = config.UsedItemCountMax * 80 / 100;
6028 
6029                         const char* testDescription = desc5.c_str();
6030 
6031                         for(size_t repeat = 0; repeat < repeatCount; ++repeat)
6032                         {
6033                             printf("%s #%u\n", testDescription, (uint32_t)repeat);
6034 
6035                             PoolTestResult result{};
6036                             g_MemoryAliasingWarningEnabled = false;
6037                             TestPool_Benchmark(result, config);
6038                             g_MemoryAliasingWarningEnabled = true;
6039                             WritePoolTestResult(file, CODE_DESCRIPTION, testDescription, config, result);
6040                         }
6041                     }
6042                 }
6043             }
6044         }
6045     }
6046 }
6047 
BasicTestBuddyAllocator()6048 static void BasicTestBuddyAllocator()
6049 {
6050     wprintf(L"Basic test buddy allocator\n");
6051 
6052     RandomNumberGenerator rand{76543};
6053 
6054     VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
6055     sampleBufCreateInfo.size = 1024; // Whatever.
6056     sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
6057 
6058     VmaAllocationCreateInfo sampleAllocCreateInfo = {};
6059     sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
6060 
6061     VmaPoolCreateInfo poolCreateInfo = {};
6062     VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
6063     TEST(res == VK_SUCCESS);
6064 
6065     // Deliberately adding 1023 to test usable size smaller than memory block size.
6066     poolCreateInfo.blockSize = 1024 * 1024 + 1023;
6067     poolCreateInfo.flags = VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT;
6068     //poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
6069 
6070     VmaPool pool = nullptr;
6071     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
6072     TEST(res == VK_SUCCESS);
6073 
6074     VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
6075 
6076     VmaAllocationCreateInfo allocCreateInfo = {};
6077     allocCreateInfo.pool = pool;
6078 
6079     std::vector<BufferInfo> bufInfo;
6080     BufferInfo newBufInfo;
6081     VmaAllocationInfo allocInfo;
6082 
6083     bufCreateInfo.size = 1024 * 256;
6084     res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6085         &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6086     TEST(res == VK_SUCCESS);
6087     bufInfo.push_back(newBufInfo);
6088 
6089     bufCreateInfo.size = 1024 * 512;
6090     res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6091         &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6092     TEST(res == VK_SUCCESS);
6093     bufInfo.push_back(newBufInfo);
6094 
6095     bufCreateInfo.size = 1024 * 128;
6096     res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6097         &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6098     TEST(res == VK_SUCCESS);
6099     bufInfo.push_back(newBufInfo);
6100 
6101     // Test very small allocation, smaller than minimum node size.
6102     bufCreateInfo.size = 1;
6103     res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6104         &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6105     TEST(res == VK_SUCCESS);
6106     bufInfo.push_back(newBufInfo);
6107 
6108     // Test some small allocation with alignment requirement.
6109     {
6110         VkMemoryRequirements memReq;
6111         memReq.alignment = 256;
6112         memReq.memoryTypeBits = UINT32_MAX;
6113         memReq.size = 32;
6114 
6115         newBufInfo.Buffer = VK_NULL_HANDLE;
6116         res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo,
6117             &newBufInfo.Allocation, &allocInfo);
6118         TEST(res == VK_SUCCESS);
6119         TEST(allocInfo.offset % memReq.alignment == 0);
6120         bufInfo.push_back(newBufInfo);
6121     }
6122 
6123     //SaveAllocatorStatsToFile(L"TEST.json");
6124 
6125     VmaPoolStats stats = {};
6126     vmaGetPoolStats(g_hAllocator, pool, &stats);
6127     int DBG = 0; // Set breakpoint here to inspect `stats`.
6128 
6129     // Allocate enough new buffers to surely fall into second block.
6130     for(uint32_t i = 0; i < 32; ++i)
6131     {
6132         bufCreateInfo.size = 1024 * (rand.Generate() % 32 + 1);
6133         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6134             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6135         TEST(res == VK_SUCCESS);
6136         bufInfo.push_back(newBufInfo);
6137     }
6138 
6139     SaveAllocatorStatsToFile(L"BuddyTest01.json");
6140 
6141     // Destroy the buffers in random order.
6142     while(!bufInfo.empty())
6143     {
6144         const size_t indexToDestroy = rand.Generate() % bufInfo.size();
6145         const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
6146         vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
6147         bufInfo.erase(bufInfo.begin() + indexToDestroy);
6148     }
6149 
6150     vmaDestroyPool(g_hAllocator, pool);
6151 }
6152 
BasicTestAllocatePages()6153 static void BasicTestAllocatePages()
6154 {
6155     wprintf(L"Basic test allocate pages\n");
6156 
6157     RandomNumberGenerator rand{765461};
6158 
6159     VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
6160     sampleBufCreateInfo.size = 1024; // Whatever.
6161     sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
6162 
6163     VmaAllocationCreateInfo sampleAllocCreateInfo = {};
6164     sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
6165 
6166     VmaPoolCreateInfo poolCreateInfo = {};
6167     VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
6168     TEST(res == VK_SUCCESS);
6169 
6170     // 1 block of 1 MB.
6171     poolCreateInfo.blockSize = 1024 * 1024;
6172     poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
6173 
6174     // Create pool.
6175     VmaPool pool = nullptr;
6176     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
6177     TEST(res == VK_SUCCESS);
6178 
6179     // Make 100 allocations of 4 KB - they should fit into the pool.
6180     VkMemoryRequirements memReq;
6181     memReq.memoryTypeBits = UINT32_MAX;
6182     memReq.alignment = 4 * 1024;
6183     memReq.size = 4 * 1024;
6184 
6185     VmaAllocationCreateInfo allocCreateInfo = {};
6186     allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
6187     allocCreateInfo.pool = pool;
6188 
6189     constexpr uint32_t allocCount = 100;
6190 
6191     std::vector<VmaAllocation> alloc{allocCount};
6192     std::vector<VmaAllocationInfo> allocInfo{allocCount};
6193     res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &allocCreateInfo, allocCount, alloc.data(), allocInfo.data());
6194     TEST(res == VK_SUCCESS);
6195     for(uint32_t i = 0; i < allocCount; ++i)
6196     {
6197         TEST(alloc[i] != VK_NULL_HANDLE &&
6198             allocInfo[i].pMappedData != nullptr &&
6199             allocInfo[i].deviceMemory == allocInfo[0].deviceMemory &&
6200             allocInfo[i].memoryType == allocInfo[0].memoryType);
6201     }
6202 
6203     // Free the allocations.
6204     vmaFreeMemoryPages(g_hAllocator, allocCount, alloc.data());
6205     std::fill(alloc.begin(), alloc.end(), nullptr);
6206     std::fill(allocInfo.begin(), allocInfo.end(), VmaAllocationInfo{});
6207 
6208     // Try to make 100 allocations of 100 KB. This call should fail due to not enough memory.
6209     // Also test optional allocationInfo = null.
6210     memReq.size = 100 * 1024;
6211     res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &allocCreateInfo, allocCount, alloc.data(), nullptr);
6212     TEST(res != VK_SUCCESS);
6213     TEST(std::find_if(alloc.begin(), alloc.end(), [](VmaAllocation alloc){ return alloc != VK_NULL_HANDLE; }) == alloc.end());
6214 
6215     // Make 100 allocations of 4 KB, but with required alignment of 128 KB. This should also fail.
6216     memReq.size = 4 * 1024;
6217     memReq.alignment = 128 * 1024;
6218     res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &allocCreateInfo, allocCount, alloc.data(), allocInfo.data());
6219     TEST(res != VK_SUCCESS);
6220 
6221     // Make 100 dedicated allocations of 4 KB.
6222     memReq.alignment = 4 * 1024;
6223     memReq.size = 4 * 1024;
6224 
6225     VmaAllocationCreateInfo dedicatedAllocCreateInfo = {};
6226     dedicatedAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
6227     dedicatedAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
6228     res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &dedicatedAllocCreateInfo, allocCount, alloc.data(), allocInfo.data());
6229     TEST(res == VK_SUCCESS);
6230     for(uint32_t i = 0; i < allocCount; ++i)
6231     {
6232         TEST(alloc[i] != VK_NULL_HANDLE &&
6233             allocInfo[i].pMappedData != nullptr &&
6234             allocInfo[i].memoryType == allocInfo[0].memoryType &&
6235             allocInfo[i].offset == 0);
6236         if(i > 0)
6237         {
6238             TEST(allocInfo[i].deviceMemory != allocInfo[0].deviceMemory);
6239         }
6240     }
6241 
6242     // Free the allocations.
6243     vmaFreeMemoryPages(g_hAllocator, allocCount, alloc.data());
6244     std::fill(alloc.begin(), alloc.end(), nullptr);
6245     std::fill(allocInfo.begin(), allocInfo.end(), VmaAllocationInfo{});
6246 
6247     vmaDestroyPool(g_hAllocator, pool);
6248 }
6249 
6250 // Test the testing environment.
TestGpuData()6251 static void TestGpuData()
6252 {
6253     RandomNumberGenerator rand = { 53434 };
6254 
6255     std::vector<AllocInfo> allocInfo;
6256 
6257     for(size_t i = 0; i < 100; ++i)
6258     {
6259         AllocInfo info = {};
6260 
6261         info.m_BufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
6262         info.m_BufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
6263             VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
6264             VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
6265         info.m_BufferInfo.size = 1024 * 1024 * (rand.Generate() % 9 + 1);
6266 
6267         VmaAllocationCreateInfo allocCreateInfo = {};
6268         allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
6269 
6270         VkResult res = vmaCreateBuffer(g_hAllocator, &info.m_BufferInfo, &allocCreateInfo, &info.m_Buffer, &info.m_Allocation, nullptr);
6271         TEST(res == VK_SUCCESS);
6272 
6273         info.m_StartValue = rand.Generate();
6274 
6275         allocInfo.push_back(std::move(info));
6276     }
6277 
6278     UploadGpuData(allocInfo.data(), allocInfo.size());
6279 
6280     ValidateGpuData(allocInfo.data(), allocInfo.size());
6281 
6282     DestroyAllAllocations(allocInfo);
6283 }
6284 
Test()6285 void Test()
6286 {
6287     wprintf(L"TESTING:\n");
6288 
6289     if(false)
6290     {
6291         ////////////////////////////////////////////////////////////////////////////////
6292         // Temporarily insert custom tests here:
6293         return;
6294     }
6295 
6296     // # Simple tests
6297 
6298     TestBasics();
6299     //TestGpuData(); // Not calling this because it's just testing the testing environment.
6300 #if VMA_DEBUG_MARGIN
6301     TestDebugMargin();
6302 #else
6303     TestPool_SameSize();
6304     TestPool_MinBlockCount();
6305     TestHeapSizeLimit();
6306 #endif
6307 #if VMA_DEBUG_INITIALIZE_ALLOCATIONS
6308     TestAllocationsInitialization();
6309 #endif
6310     TestMemoryUsage();
6311     TestDeviceCoherentMemory();
6312     TestBudget();
6313     TestMapping();
6314     TestDeviceLocalMapped();
6315     TestMappingMultithreaded();
6316     TestLinearAllocator();
6317     ManuallyTestLinearAllocator();
6318     TestLinearAllocatorMultiBlock();
6319 
6320     BasicTestBuddyAllocator();
6321     BasicTestAllocatePages();
6322 
6323     if(g_BufferDeviceAddressEnabled)
6324         TestBufferDeviceAddress();
6325 
6326     {
6327         FILE* file;
6328         fopen_s(&file, "Algorithms.csv", "w");
6329         assert(file != NULL);
6330         BenchmarkAlgorithms(file);
6331         fclose(file);
6332     }
6333 
6334     TestDefragmentationSimple();
6335     TestDefragmentationFull();
6336     TestDefragmentationWholePool();
6337     TestDefragmentationGpu();
6338     TestDefragmentationIncrementalBasic();
6339     TestDefragmentationIncrementalComplex();
6340 
6341     // # Detailed tests
6342     FILE* file;
6343     fopen_s(&file, "Results.csv", "w");
6344     assert(file != NULL);
6345 
6346     WriteMainTestResultHeader(file);
6347     PerformMainTests(file);
6348     //PerformCustomMainTest(file);
6349 
6350     WritePoolTestResultHeader(file);
6351     PerformPoolTests(file);
6352     //PerformCustomPoolTest(file);
6353 
6354     fclose(file);
6355 
6356     wprintf(L"Done.\n");
6357 }
6358 
6359 #endif // #ifdef _WIN32
6360