• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved.
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
5 // of this software and associated documentation files (the "Software"), to deal
6 // in the Software without restriction, including without limitation the rights
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the Software is
9 // furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 // THE SOFTWARE.
21 //
22 
23 #include "Tests.h"
24 #include "VmaUsage.h"
25 #include "Common.h"
26 #include <atomic>
27 #include <thread>
28 #include <mutex>
29 #include <functional>
30 
31 #ifdef _WIN32
32 
33 static const char* CODE_DESCRIPTION = "Foo";
34 static constexpr VkDeviceSize MEGABYTE = 1024 * 1024;
35 
36 extern VkCommandBuffer g_hTemporaryCommandBuffer;
37 extern const VkAllocationCallbacks* g_Allocs;
38 extern bool VK_KHR_buffer_device_address_enabled;
39 extern bool VK_EXT_memory_priority_enabled;
40 extern PFN_vkGetBufferDeviceAddressKHR g_vkGetBufferDeviceAddressKHR;
41 void BeginSingleTimeCommands();
42 void EndSingleTimeCommands();
43 void SetDebugUtilsObjectName(VkObjectType type, uint64_t handle, const char* name);
44 
45 #ifndef VMA_DEBUG_MARGIN
46     #define VMA_DEBUG_MARGIN 0
47 #endif
48 
49 enum CONFIG_TYPE {
50     CONFIG_TYPE_MINIMUM,
51     CONFIG_TYPE_SMALL,
52     CONFIG_TYPE_AVERAGE,
53     CONFIG_TYPE_LARGE,
54     CONFIG_TYPE_MAXIMUM,
55     CONFIG_TYPE_COUNT
56 };
57 
58 static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;
59 //static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;
60 
61 enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT };
62 
63 static const char* FREE_ORDER_NAMES[] = {
64     "FORWARD",
65     "BACKWARD",
66     "RANDOM",
67 };
68 
69 // Copy of internal VmaAlgorithmToStr.
AlgorithmToStr(uint32_t algorithm)70 static const char* AlgorithmToStr(uint32_t algorithm)
71 {
72     switch(algorithm)
73     {
74     case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT:
75         return "Linear";
76     case VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT:
77         return "Buddy";
78     case 0:
79         return "Default";
80     default:
81         assert(0);
82         return "";
83     }
84 }
85 
86 struct AllocationSize
87 {
88     uint32_t Probability;
89     VkDeviceSize BufferSizeMin, BufferSizeMax;
90     uint32_t ImageSizeMin, ImageSizeMax;
91 };
92 
93 struct Config
94 {
95     uint32_t RandSeed;
96     VkDeviceSize BeginBytesToAllocate;
97     uint32_t AdditionalOperationCount;
98     VkDeviceSize MaxBytesToAllocate;
99     uint32_t MemUsageProbability[4]; // For VMA_MEMORY_USAGE_*
100     std::vector<AllocationSize> AllocationSizes;
101     uint32_t ThreadCount;
102     uint32_t ThreadsUsingCommonAllocationsProbabilityPercent;
103     FREE_ORDER FreeOrder;
104     VmaAllocationCreateFlags AllocationStrategy; // For VMA_ALLOCATION_CREATE_STRATEGY_*
105 };
106 
107 struct Result
108 {
109     duration TotalTime;
110     duration AllocationTimeMin, AllocationTimeAvg, AllocationTimeMax;
111     duration DeallocationTimeMin, DeallocationTimeAvg, DeallocationTimeMax;
112     VkDeviceSize TotalMemoryAllocated;
113     VkDeviceSize FreeRangeSizeAvg, FreeRangeSizeMax;
114 };
115 
116 void TestDefragmentationSimple();
117 void TestDefragmentationFull();
118 
119 struct PoolTestConfig
120 {
121     uint32_t RandSeed;
122     uint32_t ThreadCount;
123     VkDeviceSize PoolSize;
124     uint32_t FrameCount;
125     uint32_t TotalItemCount;
126     // Range for number of items used in each frame.
127     uint32_t UsedItemCountMin, UsedItemCountMax;
128     // Percent of items to make unused, and possibly make some others used in each frame.
129     uint32_t ItemsToMakeUnusedPercent;
130     std::vector<AllocationSize> AllocationSizes;
131 
CalcAvgResourceSizePoolTestConfig132     VkDeviceSize CalcAvgResourceSize() const
133     {
134         uint32_t probabilitySum = 0;
135         VkDeviceSize sizeSum = 0;
136         for(size_t i = 0; i < AllocationSizes.size(); ++i)
137         {
138             const AllocationSize& allocSize = AllocationSizes[i];
139             if(allocSize.BufferSizeMax > 0)
140                 sizeSum += (allocSize.BufferSizeMin + allocSize.BufferSizeMax) / 2 * allocSize.Probability;
141             else
142             {
143                 const VkDeviceSize avgDimension = (allocSize.ImageSizeMin + allocSize.ImageSizeMax) / 2;
144                 sizeSum += avgDimension * avgDimension * 4 * allocSize.Probability;
145             }
146             probabilitySum += allocSize.Probability;
147         }
148         return sizeSum / probabilitySum;
149     }
150 
UsesBuffersPoolTestConfig151     bool UsesBuffers() const
152     {
153         for(size_t i = 0; i < AllocationSizes.size(); ++i)
154             if(AllocationSizes[i].BufferSizeMax > 0)
155                 return true;
156         return false;
157     }
158 
UsesImagesPoolTestConfig159     bool UsesImages() const
160     {
161         for(size_t i = 0; i < AllocationSizes.size(); ++i)
162             if(AllocationSizes[i].ImageSizeMax > 0)
163                 return true;
164         return false;
165     }
166 };
167 
168 struct PoolTestResult
169 {
170     duration TotalTime;
171     duration AllocationTimeMin, AllocationTimeAvg, AllocationTimeMax;
172     duration DeallocationTimeMin, DeallocationTimeAvg, DeallocationTimeMax;
173     size_t LostAllocationCount, LostAllocationTotalSize;
174     size_t FailedAllocationCount, FailedAllocationTotalSize;
175 };
176 
177 static const uint32_t IMAGE_BYTES_PER_PIXEL = 1;
178 
179 uint32_t g_FrameIndex = 0;
180 
181 struct BufferInfo
182 {
183     VkBuffer Buffer = VK_NULL_HANDLE;
184     VmaAllocation Allocation = VK_NULL_HANDLE;
185 };
186 
MemoryTypeToHeap(uint32_t memoryTypeIndex)187 static uint32_t MemoryTypeToHeap(uint32_t memoryTypeIndex)
188 {
189     const VkPhysicalDeviceMemoryProperties* props;
190     vmaGetMemoryProperties(g_hAllocator, &props);
191     return props->memoryTypes[memoryTypeIndex].heapIndex;
192 }
193 
GetAllocationStrategyCount()194 static uint32_t GetAllocationStrategyCount()
195 {
196     uint32_t strategyCount = 0;
197     switch(ConfigType)
198     {
199     case CONFIG_TYPE_MINIMUM: strategyCount = 1; break;
200     case CONFIG_TYPE_SMALL:   strategyCount = 1; break;
201     case CONFIG_TYPE_AVERAGE: strategyCount = 2; break;
202     case CONFIG_TYPE_LARGE:   strategyCount = 2; break;
203     case CONFIG_TYPE_MAXIMUM: strategyCount = 3; break;
204     default: assert(0);
205     }
206     return strategyCount;
207 }
208 
GetAllocationStrategyName(VmaAllocationCreateFlags allocStrategy)209 static const char* GetAllocationStrategyName(VmaAllocationCreateFlags allocStrategy)
210 {
211     switch(allocStrategy)
212     {
213     case VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT: return "BEST_FIT"; break;
214     case VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT: return "WORST_FIT"; break;
215     case VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT: return "FIRST_FIT"; break;
216     case 0: return "Default"; break;
217     default: assert(0); return "";
218     }
219 }
220 
InitResult(Result & outResult)221 static void InitResult(Result& outResult)
222 {
223     outResult.TotalTime = duration::zero();
224     outResult.AllocationTimeMin = duration::max();
225     outResult.AllocationTimeAvg = duration::zero();
226     outResult.AllocationTimeMax = duration::min();
227     outResult.DeallocationTimeMin = duration::max();
228     outResult.DeallocationTimeAvg = duration::zero();
229     outResult.DeallocationTimeMax = duration::min();
230     outResult.TotalMemoryAllocated = 0;
231     outResult.FreeRangeSizeAvg = 0;
232     outResult.FreeRangeSizeMax = 0;
233 }
234 
235 class TimeRegisterObj
236 {
237 public:
TimeRegisterObj(duration & min,duration & sum,duration & max)238     TimeRegisterObj(duration& min, duration& sum, duration& max) :
239         m_Min(min),
240         m_Sum(sum),
241         m_Max(max),
242         m_TimeBeg(std::chrono::high_resolution_clock::now())
243     {
244     }
245 
~TimeRegisterObj()246     ~TimeRegisterObj()
247     {
248         duration d = std::chrono::high_resolution_clock::now() - m_TimeBeg;
249         m_Sum += d;
250         if(d < m_Min) m_Min = d;
251         if(d > m_Max) m_Max = d;
252     }
253 
254 private:
255     duration& m_Min;
256     duration& m_Sum;
257     duration& m_Max;
258     time_point m_TimeBeg;
259 };
260 
261 struct PoolTestThreadResult
262 {
263     duration AllocationTimeMin, AllocationTimeSum, AllocationTimeMax;
264     duration DeallocationTimeMin, DeallocationTimeSum, DeallocationTimeMax;
265     size_t AllocationCount, DeallocationCount;
266     size_t LostAllocationCount, LostAllocationTotalSize;
267     size_t FailedAllocationCount, FailedAllocationTotalSize;
268 };
269 
270 class AllocationTimeRegisterObj : public TimeRegisterObj
271 {
272 public:
AllocationTimeRegisterObj(Result & result)273     AllocationTimeRegisterObj(Result& result) :
274         TimeRegisterObj(result.AllocationTimeMin, result.AllocationTimeAvg, result.AllocationTimeMax)
275     {
276     }
277 };
278 
279 class DeallocationTimeRegisterObj : public TimeRegisterObj
280 {
281 public:
DeallocationTimeRegisterObj(Result & result)282     DeallocationTimeRegisterObj(Result& result) :
283         TimeRegisterObj(result.DeallocationTimeMin, result.DeallocationTimeAvg, result.DeallocationTimeMax)
284     {
285     }
286 };
287 
288 class PoolAllocationTimeRegisterObj : public TimeRegisterObj
289 {
290 public:
PoolAllocationTimeRegisterObj(PoolTestThreadResult & result)291     PoolAllocationTimeRegisterObj(PoolTestThreadResult& result) :
292         TimeRegisterObj(result.AllocationTimeMin, result.AllocationTimeSum, result.AllocationTimeMax)
293     {
294     }
295 };
296 
297 class PoolDeallocationTimeRegisterObj : public TimeRegisterObj
298 {
299 public:
PoolDeallocationTimeRegisterObj(PoolTestThreadResult & result)300     PoolDeallocationTimeRegisterObj(PoolTestThreadResult& result) :
301         TimeRegisterObj(result.DeallocationTimeMin, result.DeallocationTimeSum, result.DeallocationTimeMax)
302     {
303     }
304 };
305 
CurrentTimeToStr(std::string & out)306 static void CurrentTimeToStr(std::string& out)
307 {
308     time_t rawTime; time(&rawTime);
309     struct tm timeInfo; localtime_s(&timeInfo, &rawTime);
310     char timeStr[128];
311     strftime(timeStr, _countof(timeStr), "%c", &timeInfo);
312     out = timeStr;
313 }
314 
MainTest(Result & outResult,const Config & config)315 VkResult MainTest(Result& outResult, const Config& config)
316 {
317     assert(config.ThreadCount > 0);
318 
319     InitResult(outResult);
320 
321     RandomNumberGenerator mainRand{config.RandSeed};
322 
323     time_point timeBeg = std::chrono::high_resolution_clock::now();
324 
325     std::atomic<size_t> allocationCount = 0;
326     VkResult res = VK_SUCCESS;
327 
328     uint32_t memUsageProbabilitySum =
329         config.MemUsageProbability[0] + config.MemUsageProbability[1] +
330         config.MemUsageProbability[2] + config.MemUsageProbability[3];
331     assert(memUsageProbabilitySum > 0);
332 
333     uint32_t allocationSizeProbabilitySum = std::accumulate(
334         config.AllocationSizes.begin(),
335         config.AllocationSizes.end(),
336         0u,
337         [](uint32_t sum, const AllocationSize& allocSize) {
338             return sum + allocSize.Probability;
339         });
340 
341     struct Allocation
342     {
343         VkBuffer Buffer;
344         VkImage Image;
345         VmaAllocation Alloc;
346     };
347 
348     std::vector<Allocation> commonAllocations;
349     std::mutex commonAllocationsMutex;
350 
351     auto Allocate = [&](
352         VkDeviceSize bufferSize,
353         const VkExtent2D imageExtent,
354         RandomNumberGenerator& localRand,
355         VkDeviceSize& totalAllocatedBytes,
356         std::vector<Allocation>& allocations) -> VkResult
357     {
358         assert((bufferSize == 0) != (imageExtent.width == 0 && imageExtent.height == 0));
359 
360         uint32_t memUsageIndex = 0;
361         uint32_t memUsageRand = localRand.Generate() % memUsageProbabilitySum;
362         while(memUsageRand >= config.MemUsageProbability[memUsageIndex])
363             memUsageRand -= config.MemUsageProbability[memUsageIndex++];
364 
365         VmaAllocationCreateInfo memReq = {};
366         memReq.usage = (VmaMemoryUsage)(VMA_MEMORY_USAGE_GPU_ONLY + memUsageIndex);
367         memReq.flags |= config.AllocationStrategy;
368 
369         Allocation allocation = {};
370         VmaAllocationInfo allocationInfo;
371 
372         // Buffer
373         if(bufferSize > 0)
374         {
375             assert(imageExtent.width == 0);
376             VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
377             bufferInfo.size = bufferSize;
378             bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
379 
380             {
381                 AllocationTimeRegisterObj timeRegisterObj{outResult};
382                 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &memReq, &allocation.Buffer, &allocation.Alloc, &allocationInfo);
383             }
384         }
385         // Image
386         else
387         {
388             VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
389             imageInfo.imageType = VK_IMAGE_TYPE_2D;
390             imageInfo.extent.width = imageExtent.width;
391             imageInfo.extent.height = imageExtent.height;
392             imageInfo.extent.depth = 1;
393             imageInfo.mipLevels = 1;
394             imageInfo.arrayLayers = 1;
395             imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
396             imageInfo.tiling = memReq.usage == VMA_MEMORY_USAGE_GPU_ONLY ?
397                 VK_IMAGE_TILING_OPTIMAL :
398                 VK_IMAGE_TILING_LINEAR;
399             imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
400             switch(memReq.usage)
401             {
402             case VMA_MEMORY_USAGE_GPU_ONLY:
403                 switch(localRand.Generate() % 3)
404                 {
405                 case 0:
406                     imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
407                     break;
408                 case 1:
409                     imageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
410                     break;
411                 case 2:
412                     imageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
413                     break;
414                 }
415                 break;
416             case VMA_MEMORY_USAGE_CPU_ONLY:
417             case VMA_MEMORY_USAGE_CPU_TO_GPU:
418                 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
419                 break;
420             case VMA_MEMORY_USAGE_GPU_TO_CPU:
421                 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT;
422                 break;
423             }
424             imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
425             imageInfo.flags = 0;
426 
427             {
428                 AllocationTimeRegisterObj timeRegisterObj{outResult};
429                 res = vmaCreateImage(g_hAllocator, &imageInfo, &memReq, &allocation.Image, &allocation.Alloc, &allocationInfo);
430             }
431         }
432 
433         if(res == VK_SUCCESS)
434         {
435             ++allocationCount;
436             totalAllocatedBytes += allocationInfo.size;
437             bool useCommonAllocations = localRand.Generate() % 100 < config.ThreadsUsingCommonAllocationsProbabilityPercent;
438             if(useCommonAllocations)
439             {
440                 std::unique_lock<std::mutex> lock(commonAllocationsMutex);
441                 commonAllocations.push_back(allocation);
442             }
443             else
444                 allocations.push_back(allocation);
445         }
446         else
447         {
448             TEST(0);
449         }
450         return res;
451     };
452 
453     auto GetNextAllocationSize = [&](
454         VkDeviceSize& outBufSize,
455         VkExtent2D& outImageSize,
456         RandomNumberGenerator& localRand)
457     {
458         outBufSize = 0;
459         outImageSize = {0, 0};
460 
461         uint32_t allocSizeIndex = 0;
462         uint32_t r = localRand.Generate() % allocationSizeProbabilitySum;
463         while(r >= config.AllocationSizes[allocSizeIndex].Probability)
464             r -= config.AllocationSizes[allocSizeIndex++].Probability;
465 
466         const AllocationSize& allocSize = config.AllocationSizes[allocSizeIndex];
467         if(allocSize.BufferSizeMax > 0)
468         {
469             assert(allocSize.ImageSizeMax == 0);
470             if(allocSize.BufferSizeMax == allocSize.BufferSizeMin)
471                 outBufSize = allocSize.BufferSizeMin;
472             else
473             {
474                 outBufSize = allocSize.BufferSizeMin + localRand.Generate() % (allocSize.BufferSizeMax - allocSize.BufferSizeMin);
475                 outBufSize = outBufSize / 16 * 16;
476             }
477         }
478         else
479         {
480             if(allocSize.ImageSizeMax == allocSize.ImageSizeMin)
481                 outImageSize.width = outImageSize.height = allocSize.ImageSizeMax;
482             else
483             {
484                 outImageSize.width  = allocSize.ImageSizeMin + localRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
485                 outImageSize.height = allocSize.ImageSizeMin + localRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
486             }
487         }
488     };
489 
490     std::atomic<uint32_t> numThreadsReachedMaxAllocations = 0;
491     HANDLE threadsFinishEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
492 
493     auto ThreadProc = [&](uint32_t randSeed) -> void
494     {
495         RandomNumberGenerator threadRand(randSeed);
496         VkDeviceSize threadTotalAllocatedBytes = 0;
497         std::vector<Allocation> threadAllocations;
498         VkDeviceSize threadBeginBytesToAllocate = config.BeginBytesToAllocate / config.ThreadCount;
499         VkDeviceSize threadMaxBytesToAllocate = config.MaxBytesToAllocate / config.ThreadCount;
500         uint32_t threadAdditionalOperationCount = config.AdditionalOperationCount / config.ThreadCount;
501 
502         // BEGIN ALLOCATIONS
503         for(;;)
504         {
505             VkDeviceSize bufferSize = 0;
506             VkExtent2D imageExtent = {};
507             GetNextAllocationSize(bufferSize, imageExtent, threadRand);
508             if(threadTotalAllocatedBytes + bufferSize + imageExtent.width * imageExtent.height * IMAGE_BYTES_PER_PIXEL <
509                 threadBeginBytesToAllocate)
510             {
511                 if(Allocate(bufferSize, imageExtent, threadRand, threadTotalAllocatedBytes, threadAllocations) != VK_SUCCESS)
512                     break;
513             }
514             else
515                 break;
516         }
517 
518         // ADDITIONAL ALLOCATIONS AND FREES
519         for(size_t i = 0; i < threadAdditionalOperationCount; ++i)
520         {
521             VkDeviceSize bufferSize = 0;
522             VkExtent2D imageExtent = {};
523             GetNextAllocationSize(bufferSize, imageExtent, threadRand);
524 
525             // true = allocate, false = free
526             bool allocate = threadRand.Generate() % 2 != 0;
527 
528             if(allocate)
529             {
530                 if(threadTotalAllocatedBytes +
531                     bufferSize +
532                     imageExtent.width * imageExtent.height * IMAGE_BYTES_PER_PIXEL <
533                     threadMaxBytesToAllocate)
534                 {
535                     if(Allocate(bufferSize, imageExtent, threadRand, threadTotalAllocatedBytes, threadAllocations) != VK_SUCCESS)
536                         break;
537                 }
538             }
539             else
540             {
541                 bool useCommonAllocations = threadRand.Generate() % 100 < config.ThreadsUsingCommonAllocationsProbabilityPercent;
542                 if(useCommonAllocations)
543                 {
544                     std::unique_lock<std::mutex> lock(commonAllocationsMutex);
545                     if(!commonAllocations.empty())
546                     {
547                         size_t indexToFree = threadRand.Generate() % commonAllocations.size();
548                         VmaAllocationInfo allocationInfo;
549                         vmaGetAllocationInfo(g_hAllocator, commonAllocations[indexToFree].Alloc, &allocationInfo);
550                         if(threadTotalAllocatedBytes >= allocationInfo.size)
551                         {
552                             DeallocationTimeRegisterObj timeRegisterObj{outResult};
553                             if(commonAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
554                                 vmaDestroyBuffer(g_hAllocator, commonAllocations[indexToFree].Buffer, commonAllocations[indexToFree].Alloc);
555                             else
556                                 vmaDestroyImage(g_hAllocator, commonAllocations[indexToFree].Image, commonAllocations[indexToFree].Alloc);
557                             threadTotalAllocatedBytes -= allocationInfo.size;
558                             commonAllocations.erase(commonAllocations.begin() + indexToFree);
559                         }
560                     }
561                 }
562                 else
563                 {
564                     if(!threadAllocations.empty())
565                     {
566                         size_t indexToFree = threadRand.Generate() % threadAllocations.size();
567                         VmaAllocationInfo allocationInfo;
568                         vmaGetAllocationInfo(g_hAllocator, threadAllocations[indexToFree].Alloc, &allocationInfo);
569                         if(threadTotalAllocatedBytes >= allocationInfo.size)
570                         {
571                             DeallocationTimeRegisterObj timeRegisterObj{outResult};
572                             if(threadAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
573                                 vmaDestroyBuffer(g_hAllocator, threadAllocations[indexToFree].Buffer, threadAllocations[indexToFree].Alloc);
574                             else
575                                 vmaDestroyImage(g_hAllocator, threadAllocations[indexToFree].Image, threadAllocations[indexToFree].Alloc);
576                             threadTotalAllocatedBytes -= allocationInfo.size;
577                             threadAllocations.erase(threadAllocations.begin() + indexToFree);
578                         }
579                     }
580                 }
581             }
582         }
583 
584         ++numThreadsReachedMaxAllocations;
585 
586         WaitForSingleObject(threadsFinishEvent, INFINITE);
587 
588         // DEALLOCATION
589         while(!threadAllocations.empty())
590         {
591             size_t indexToFree = 0;
592             switch(config.FreeOrder)
593             {
594             case FREE_ORDER::FORWARD:
595                 indexToFree = 0;
596                 break;
597             case FREE_ORDER::BACKWARD:
598                 indexToFree = threadAllocations.size() - 1;
599                 break;
600             case FREE_ORDER::RANDOM:
601                 indexToFree = mainRand.Generate() % threadAllocations.size();
602                 break;
603             }
604 
605             {
606                 DeallocationTimeRegisterObj timeRegisterObj{outResult};
607                 if(threadAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
608                     vmaDestroyBuffer(g_hAllocator, threadAllocations[indexToFree].Buffer, threadAllocations[indexToFree].Alloc);
609                 else
610                     vmaDestroyImage(g_hAllocator, threadAllocations[indexToFree].Image, threadAllocations[indexToFree].Alloc);
611             }
612             threadAllocations.erase(threadAllocations.begin() + indexToFree);
613         }
614     };
615 
616     uint32_t threadRandSeed = mainRand.Generate();
617     std::vector<std::thread> bkgThreads;
618     for(size_t i = 0; i < config.ThreadCount; ++i)
619     {
620         bkgThreads.emplace_back(std::bind(ThreadProc, threadRandSeed + (uint32_t)i));
621     }
622 
623     // Wait for threads reached max allocations
624     while(numThreadsReachedMaxAllocations < config.ThreadCount)
625         Sleep(0);
626 
627     // CALCULATE MEMORY STATISTICS ON FINAL USAGE
628     VmaStats vmaStats = {};
629     vmaCalculateStats(g_hAllocator, &vmaStats);
630     outResult.TotalMemoryAllocated = vmaStats.total.usedBytes + vmaStats.total.unusedBytes;
631     outResult.FreeRangeSizeMax = vmaStats.total.unusedRangeSizeMax;
632     outResult.FreeRangeSizeAvg = vmaStats.total.unusedRangeSizeAvg;
633 
634     // Signal threads to deallocate
635     SetEvent(threadsFinishEvent);
636 
637     // Wait for threads finished
638     for(size_t i = 0; i < bkgThreads.size(); ++i)
639         bkgThreads[i].join();
640     bkgThreads.clear();
641 
642     CloseHandle(threadsFinishEvent);
643 
644     // Deallocate remaining common resources
645     while(!commonAllocations.empty())
646     {
647         size_t indexToFree = 0;
648         switch(config.FreeOrder)
649         {
650         case FREE_ORDER::FORWARD:
651             indexToFree = 0;
652             break;
653         case FREE_ORDER::BACKWARD:
654             indexToFree = commonAllocations.size() - 1;
655             break;
656         case FREE_ORDER::RANDOM:
657             indexToFree = mainRand.Generate() % commonAllocations.size();
658             break;
659         }
660 
661         {
662             DeallocationTimeRegisterObj timeRegisterObj{outResult};
663             if(commonAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
664                 vmaDestroyBuffer(g_hAllocator, commonAllocations[indexToFree].Buffer, commonAllocations[indexToFree].Alloc);
665             else
666                 vmaDestroyImage(g_hAllocator, commonAllocations[indexToFree].Image, commonAllocations[indexToFree].Alloc);
667         }
668         commonAllocations.erase(commonAllocations.begin() + indexToFree);
669     }
670 
671     if(allocationCount)
672     {
673         outResult.AllocationTimeAvg /= allocationCount;
674         outResult.DeallocationTimeAvg /= allocationCount;
675     }
676 
677     outResult.TotalTime = std::chrono::high_resolution_clock::now() - timeBeg;
678 
679     return res;
680 }
681 
SaveAllocatorStatsToFile(const wchar_t * filePath)682 void SaveAllocatorStatsToFile(const wchar_t* filePath)
683 {
684     wprintf(L"Saving JSON dump to file \"%s\"\n", filePath);
685     char* stats;
686     vmaBuildStatsString(g_hAllocator, &stats, VK_TRUE);
687     SaveFile(filePath, stats, strlen(stats));
688     vmaFreeStatsString(g_hAllocator, stats);
689 }
690 
691 struct AllocInfo
692 {
693     VmaAllocation m_Allocation = VK_NULL_HANDLE;
694     VkBuffer m_Buffer = VK_NULL_HANDLE;
695     VkImage m_Image = VK_NULL_HANDLE;
696     VkImageLayout m_ImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
697     uint32_t m_StartValue = 0;
698     union
699     {
700         VkBufferCreateInfo m_BufferInfo;
701         VkImageCreateInfo m_ImageInfo;
702     };
703 
704     // After defragmentation.
705     VkBuffer m_NewBuffer = VK_NULL_HANDLE;
706     VkImage m_NewImage = VK_NULL_HANDLE;
707 
708     void CreateBuffer(
709         const VkBufferCreateInfo& bufCreateInfo,
710         const VmaAllocationCreateInfo& allocCreateInfo);
711     void CreateImage(
712         const VkImageCreateInfo& imageCreateInfo,
713         const VmaAllocationCreateInfo& allocCreateInfo,
714         VkImageLayout layout);
715     void Destroy();
716 };
717 
CreateBuffer(const VkBufferCreateInfo & bufCreateInfo,const VmaAllocationCreateInfo & allocCreateInfo)718 void AllocInfo::CreateBuffer(
719     const VkBufferCreateInfo& bufCreateInfo,
720     const VmaAllocationCreateInfo& allocCreateInfo)
721 {
722     m_BufferInfo = bufCreateInfo;
723     VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &m_Buffer, &m_Allocation, nullptr);
724     TEST(res == VK_SUCCESS);
725 }
CreateImage(const VkImageCreateInfo & imageCreateInfo,const VmaAllocationCreateInfo & allocCreateInfo,VkImageLayout layout)726 void AllocInfo::CreateImage(
727     const VkImageCreateInfo& imageCreateInfo,
728     const VmaAllocationCreateInfo& allocCreateInfo,
729     VkImageLayout layout)
730 {
731     m_ImageInfo = imageCreateInfo;
732     m_ImageLayout = layout;
733     VkResult res = vmaCreateImage(g_hAllocator, &imageCreateInfo, &allocCreateInfo, &m_Image, &m_Allocation, nullptr);
734     TEST(res == VK_SUCCESS);
735 }
736 
Destroy()737 void AllocInfo::Destroy()
738 {
739     if(m_Image)
740     {
741         assert(!m_Buffer);
742         vkDestroyImage(g_hDevice, m_Image, g_Allocs);
743         m_Image = VK_NULL_HANDLE;
744     }
745     if(m_Buffer)
746     {
747         assert(!m_Image);
748         vkDestroyBuffer(g_hDevice, m_Buffer, g_Allocs);
749         m_Buffer = VK_NULL_HANDLE;
750     }
751     if(m_Allocation)
752     {
753         vmaFreeMemory(g_hAllocator, m_Allocation);
754         m_Allocation = VK_NULL_HANDLE;
755     }
756 }
757 
758 class StagingBufferCollection
759 {
760 public:
StagingBufferCollection()761     StagingBufferCollection() { }
762     ~StagingBufferCollection();
763     // Returns false if maximum total size of buffers would be exceeded.
764     bool AcquireBuffer(VkDeviceSize size, VkBuffer& outBuffer, void*& outMappedPtr);
765     void ReleaseAllBuffers();
766 
767 private:
768     static const VkDeviceSize MAX_TOTAL_SIZE = 256ull * 1024 * 1024;
769     struct BufInfo
770     {
771         VmaAllocation Allocation = VK_NULL_HANDLE;
772         VkBuffer Buffer = VK_NULL_HANDLE;
773         VkDeviceSize Size = VK_WHOLE_SIZE;
774         void* MappedPtr = nullptr;
775         bool Used = false;
776     };
777     std::vector<BufInfo> m_Bufs;
778     // Including both used and unused.
779     VkDeviceSize m_TotalSize = 0;
780 };
781 
~StagingBufferCollection()782 StagingBufferCollection::~StagingBufferCollection()
783 {
784     for(size_t i = m_Bufs.size(); i--; )
785     {
786         vmaDestroyBuffer(g_hAllocator, m_Bufs[i].Buffer, m_Bufs[i].Allocation);
787     }
788 }
789 
AcquireBuffer(VkDeviceSize size,VkBuffer & outBuffer,void * & outMappedPtr)790 bool StagingBufferCollection::AcquireBuffer(VkDeviceSize size, VkBuffer& outBuffer, void*& outMappedPtr)
791 {
792     assert(size <= MAX_TOTAL_SIZE);
793 
794     // Try to find existing unused buffer with best size.
795     size_t bestIndex = SIZE_MAX;
796     for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
797     {
798         BufInfo& currBufInfo = m_Bufs[i];
799         if(!currBufInfo.Used && currBufInfo.Size >= size &&
800             (bestIndex == SIZE_MAX || currBufInfo.Size < m_Bufs[bestIndex].Size))
801         {
802             bestIndex = i;
803         }
804     }
805 
806     if(bestIndex != SIZE_MAX)
807     {
808         m_Bufs[bestIndex].Used = true;
809         outBuffer = m_Bufs[bestIndex].Buffer;
810         outMappedPtr = m_Bufs[bestIndex].MappedPtr;
811         return true;
812     }
813 
814     // Allocate new buffer with requested size.
815     if(m_TotalSize + size <= MAX_TOTAL_SIZE)
816     {
817         BufInfo bufInfo;
818         bufInfo.Size = size;
819         bufInfo.Used = true;
820 
821         VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
822         bufCreateInfo.size = size;
823         bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
824 
825         VmaAllocationCreateInfo allocCreateInfo = {};
826         allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
827         allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
828 
829         VmaAllocationInfo allocInfo;
830         VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &bufInfo.Buffer, &bufInfo.Allocation, &allocInfo);
831         bufInfo.MappedPtr = allocInfo.pMappedData;
832         TEST(res == VK_SUCCESS && bufInfo.MappedPtr);
833 
834         outBuffer = bufInfo.Buffer;
835         outMappedPtr = bufInfo.MappedPtr;
836 
837         m_Bufs.push_back(std::move(bufInfo));
838 
839         m_TotalSize += size;
840 
841         return true;
842     }
843 
844     // There are some unused but smaller buffers: Free them and try again.
845     bool hasUnused = false;
846     for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
847     {
848         if(!m_Bufs[i].Used)
849         {
850             hasUnused = true;
851             break;
852         }
853     }
854     if(hasUnused)
855     {
856         for(size_t i = m_Bufs.size(); i--; )
857         {
858             if(!m_Bufs[i].Used)
859             {
860                 m_TotalSize -= m_Bufs[i].Size;
861                 vmaDestroyBuffer(g_hAllocator, m_Bufs[i].Buffer, m_Bufs[i].Allocation);
862                 m_Bufs.erase(m_Bufs.begin() + i);
863             }
864         }
865 
866         return AcquireBuffer(size, outBuffer, outMappedPtr);
867    }
868 
869     return false;
870 }
871 
ReleaseAllBuffers()872 void StagingBufferCollection::ReleaseAllBuffers()
873 {
874     for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
875     {
876         m_Bufs[i].Used = false;
877     }
878 }
879 
UploadGpuData(const AllocInfo * allocInfo,size_t allocInfoCount)880 static void UploadGpuData(const AllocInfo* allocInfo, size_t allocInfoCount)
881 {
882     StagingBufferCollection stagingBufs;
883 
884     bool cmdBufferStarted = false;
885     for(size_t allocInfoIndex = 0; allocInfoIndex < allocInfoCount; ++allocInfoIndex)
886     {
887         const AllocInfo& currAllocInfo = allocInfo[allocInfoIndex];
888         if(currAllocInfo.m_Buffer)
889         {
890             const VkDeviceSize size = currAllocInfo.m_BufferInfo.size;
891 
892             VkBuffer stagingBuf = VK_NULL_HANDLE;
893             void* stagingBufMappedPtr = nullptr;
894             if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
895             {
896                 TEST(cmdBufferStarted);
897                 EndSingleTimeCommands();
898                 stagingBufs.ReleaseAllBuffers();
899                 cmdBufferStarted = false;
900 
901                 bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
902                 TEST(ok);
903             }
904 
905             // Fill staging buffer.
906             {
907                 assert(size % sizeof(uint32_t) == 0);
908                 uint32_t* stagingValPtr = (uint32_t*)stagingBufMappedPtr;
909                 uint32_t val = currAllocInfo.m_StartValue;
910                 for(size_t i = 0; i < size / sizeof(uint32_t); ++i)
911                 {
912                     *stagingValPtr = val;
913                     ++stagingValPtr;
914                     ++val;
915                 }
916             }
917 
918             // Issue copy command from staging buffer to destination buffer.
919             if(!cmdBufferStarted)
920             {
921                 cmdBufferStarted = true;
922                 BeginSingleTimeCommands();
923             }
924 
925             VkBufferCopy copy = {};
926             copy.srcOffset = 0;
927             copy.dstOffset = 0;
928             copy.size = size;
929             vkCmdCopyBuffer(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Buffer, 1, &copy);
930         }
931         else
932         {
933             TEST(currAllocInfo.m_ImageInfo.format == VK_FORMAT_R8G8B8A8_UNORM && "Only RGBA8 images are currently supported.");
934             TEST(currAllocInfo.m_ImageInfo.mipLevels == 1 && "Only single mip images are currently supported.");
935 
936             const VkDeviceSize size = (VkDeviceSize)currAllocInfo.m_ImageInfo.extent.width * currAllocInfo.m_ImageInfo.extent.height * sizeof(uint32_t);
937 
938             VkBuffer stagingBuf = VK_NULL_HANDLE;
939             void* stagingBufMappedPtr = nullptr;
940             if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
941             {
942                 TEST(cmdBufferStarted);
943                 EndSingleTimeCommands();
944                 stagingBufs.ReleaseAllBuffers();
945                 cmdBufferStarted = false;
946 
947                 bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
948                 TEST(ok);
949             }
950 
951             // Fill staging buffer.
952             {
953                 assert(size % sizeof(uint32_t) == 0);
954                 uint32_t *stagingValPtr = (uint32_t *)stagingBufMappedPtr;
955                 uint32_t val = currAllocInfo.m_StartValue;
956                 for(size_t i = 0; i < size / sizeof(uint32_t); ++i)
957                 {
958                     *stagingValPtr = val;
959                     ++stagingValPtr;
960                     ++val;
961                 }
962             }
963 
964             // Issue copy command from staging buffer to destination buffer.
965             if(!cmdBufferStarted)
966             {
967                 cmdBufferStarted = true;
968                 BeginSingleTimeCommands();
969             }
970 
971 
972             // Transfer to transfer dst layout
973             VkImageSubresourceRange subresourceRange = {
974                 VK_IMAGE_ASPECT_COLOR_BIT,
975                 0, VK_REMAINING_MIP_LEVELS,
976                 0, VK_REMAINING_ARRAY_LAYERS
977             };
978 
979             VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
980             barrier.srcAccessMask = 0;
981             barrier.dstAccessMask = 0;
982             barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
983             barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
984             barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
985             barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
986             barrier.image = currAllocInfo.m_Image;
987             barrier.subresourceRange = subresourceRange;
988 
989             vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,
990                 0, nullptr,
991                 0, nullptr,
992                 1, &barrier);
993 
994             // Copy image date
995             VkBufferImageCopy copy = {};
996             copy.bufferOffset = 0;
997             copy.bufferRowLength = 0;
998             copy.bufferImageHeight = 0;
999             copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1000             copy.imageSubresource.layerCount = 1;
1001             copy.imageExtent = currAllocInfo.m_ImageInfo.extent;
1002 
1003             vkCmdCopyBufferToImage(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &copy);
1004 
1005             // Transfer to desired layout
1006             barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1007             barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
1008             barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1009             barrier.newLayout = currAllocInfo.m_ImageLayout;
1010 
1011             vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,
1012                 0, nullptr,
1013                 0, nullptr,
1014                 1, &barrier);
1015         }
1016     }
1017 
1018     if(cmdBufferStarted)
1019     {
1020         EndSingleTimeCommands();
1021         stagingBufs.ReleaseAllBuffers();
1022     }
1023 }
1024 
ValidateGpuData(const AllocInfo * allocInfo,size_t allocInfoCount)1025 static void ValidateGpuData(const AllocInfo* allocInfo, size_t allocInfoCount)
1026 {
1027     StagingBufferCollection stagingBufs;
1028 
1029     bool cmdBufferStarted = false;
1030     size_t validateAllocIndexOffset = 0;
1031     std::vector<void*> validateStagingBuffers;
1032     for(size_t allocInfoIndex = 0; allocInfoIndex < allocInfoCount; ++allocInfoIndex)
1033     {
1034         const AllocInfo& currAllocInfo = allocInfo[allocInfoIndex];
1035         if(currAllocInfo.m_Buffer)
1036         {
1037             const VkDeviceSize size = currAllocInfo.m_BufferInfo.size;
1038 
1039             VkBuffer stagingBuf = VK_NULL_HANDLE;
1040             void* stagingBufMappedPtr = nullptr;
1041             if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
1042             {
1043                 TEST(cmdBufferStarted);
1044                 EndSingleTimeCommands();
1045                 cmdBufferStarted = false;
1046 
1047                 for(size_t validateIndex = 0;
1048                     validateIndex < validateStagingBuffers.size();
1049                     ++validateIndex)
1050                 {
1051                     const size_t validateAllocIndex = validateIndex + validateAllocIndexOffset;
1052                     const VkDeviceSize validateSize = allocInfo[validateAllocIndex].m_BufferInfo.size;
1053                     TEST(validateSize % sizeof(uint32_t) == 0);
1054                     const uint32_t* stagingValPtr = (const uint32_t*)validateStagingBuffers[validateIndex];
1055                     uint32_t val = allocInfo[validateAllocIndex].m_StartValue;
1056                     bool valid = true;
1057                     for(size_t i = 0; i < validateSize / sizeof(uint32_t); ++i)
1058                     {
1059                         if(*stagingValPtr != val)
1060                         {
1061                             valid = false;
1062                             break;
1063                         }
1064                         ++stagingValPtr;
1065                         ++val;
1066                     }
1067                     TEST(valid);
1068                 }
1069 
1070                 stagingBufs.ReleaseAllBuffers();
1071 
1072                 validateAllocIndexOffset = allocInfoIndex;
1073                 validateStagingBuffers.clear();
1074 
1075                 bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
1076                 TEST(ok);
1077             }
1078 
1079             // Issue copy command from staging buffer to destination buffer.
1080             if(!cmdBufferStarted)
1081             {
1082                 cmdBufferStarted = true;
1083                 BeginSingleTimeCommands();
1084             }
1085 
1086             VkBufferCopy copy = {};
1087             copy.srcOffset = 0;
1088             copy.dstOffset = 0;
1089             copy.size = size;
1090             vkCmdCopyBuffer(g_hTemporaryCommandBuffer, currAllocInfo.m_Buffer, stagingBuf, 1, &copy);
1091 
1092             // Sava mapped pointer for later validation.
1093             validateStagingBuffers.push_back(stagingBufMappedPtr);
1094         }
1095         else
1096         {
1097             TEST(0 && "Images not currently supported.");
1098         }
1099     }
1100 
1101     if(cmdBufferStarted)
1102     {
1103         EndSingleTimeCommands();
1104 
1105         for(size_t validateIndex = 0;
1106             validateIndex < validateStagingBuffers.size();
1107             ++validateIndex)
1108         {
1109             const size_t validateAllocIndex = validateIndex + validateAllocIndexOffset;
1110             const VkDeviceSize validateSize = allocInfo[validateAllocIndex].m_BufferInfo.size;
1111             TEST(validateSize % sizeof(uint32_t) == 0);
1112             const uint32_t* stagingValPtr = (const uint32_t*)validateStagingBuffers[validateIndex];
1113             uint32_t val = allocInfo[validateAllocIndex].m_StartValue;
1114             bool valid = true;
1115             for(size_t i = 0; i < validateSize / sizeof(uint32_t); ++i)
1116             {
1117                 if(*stagingValPtr != val)
1118                 {
1119                     valid = false;
1120                     break;
1121                 }
1122                 ++stagingValPtr;
1123                 ++val;
1124             }
1125             TEST(valid);
1126         }
1127 
1128         stagingBufs.ReleaseAllBuffers();
1129     }
1130 }
1131 
GetMemReq(VmaAllocationCreateInfo & outMemReq)1132 static void GetMemReq(VmaAllocationCreateInfo& outMemReq)
1133 {
1134     outMemReq = {};
1135     outMemReq.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
1136     //outMemReq.flags = VMA_ALLOCATION_CREATE_PERSISTENT_MAP_BIT;
1137 }
1138 
CreateBuffer(VmaPool pool,const VkBufferCreateInfo & bufCreateInfo,bool persistentlyMapped,AllocInfo & outAllocInfo)1139 static void CreateBuffer(
1140     VmaPool pool,
1141     const VkBufferCreateInfo& bufCreateInfo,
1142     bool persistentlyMapped,
1143     AllocInfo& outAllocInfo)
1144 {
1145     outAllocInfo = {};
1146     outAllocInfo.m_BufferInfo = bufCreateInfo;
1147 
1148     VmaAllocationCreateInfo allocCreateInfo = {};
1149     allocCreateInfo.pool = pool;
1150     if(persistentlyMapped)
1151         allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
1152 
1153     VmaAllocationInfo vmaAllocInfo = {};
1154     ERR_GUARD_VULKAN( vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &outAllocInfo.m_Buffer, &outAllocInfo.m_Allocation, &vmaAllocInfo) );
1155 
1156     // Setup StartValue and fill.
1157     {
1158         outAllocInfo.m_StartValue = (uint32_t)rand();
1159         uint32_t* data = (uint32_t*)vmaAllocInfo.pMappedData;
1160         TEST((data != nullptr) == persistentlyMapped);
1161         if(!persistentlyMapped)
1162         {
1163             ERR_GUARD_VULKAN( vmaMapMemory(g_hAllocator, outAllocInfo.m_Allocation, (void**)&data) );
1164         }
1165 
1166         uint32_t value = outAllocInfo.m_StartValue;
1167         TEST(bufCreateInfo.size % 4 == 0);
1168         for(size_t i = 0; i < bufCreateInfo.size / sizeof(uint32_t); ++i)
1169             data[i] = value++;
1170 
1171         if(!persistentlyMapped)
1172             vmaUnmapMemory(g_hAllocator, outAllocInfo.m_Allocation);
1173     }
1174 }
1175 
CreateAllocation(AllocInfo & outAllocation)1176 static void CreateAllocation(AllocInfo& outAllocation)
1177 {
1178     outAllocation.m_Allocation = nullptr;
1179     outAllocation.m_Buffer = nullptr;
1180     outAllocation.m_Image = nullptr;
1181     outAllocation.m_StartValue = (uint32_t)rand();
1182 
1183     VmaAllocationCreateInfo vmaMemReq;
1184     GetMemReq(vmaMemReq);
1185 
1186     VmaAllocationInfo allocInfo;
1187 
1188     const bool isBuffer = true;//(rand() & 0x1) != 0;
1189     const bool isLarge = (rand() % 16) == 0;
1190     if(isBuffer)
1191     {
1192         const uint32_t bufferSize = isLarge ?
1193             (rand() % 10 + 1) * (1024 * 1024) : // 1 MB ... 10 MB
1194             (rand() % 1024 + 1) * 1024; // 1 KB ... 1 MB
1195 
1196         VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1197         bufferInfo.size = bufferSize;
1198         bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1199 
1200         VkResult res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &vmaMemReq, &outAllocation.m_Buffer, &outAllocation.m_Allocation, &allocInfo);
1201         outAllocation.m_BufferInfo = bufferInfo;
1202         TEST(res == VK_SUCCESS);
1203     }
1204     else
1205     {
1206         const uint32_t imageSizeX = isLarge ?
1207             1024 + rand() % (4096 - 1024) : // 1024 ... 4096
1208             rand() % 1024 + 1; // 1 ... 1024
1209         const uint32_t imageSizeY = isLarge ?
1210             1024 + rand() % (4096 - 1024) : // 1024 ... 4096
1211             rand() % 1024 + 1; // 1 ... 1024
1212 
1213         VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
1214         imageInfo.imageType = VK_IMAGE_TYPE_2D;
1215         imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
1216         imageInfo.extent.width = imageSizeX;
1217         imageInfo.extent.height = imageSizeY;
1218         imageInfo.extent.depth = 1;
1219         imageInfo.mipLevels = 1;
1220         imageInfo.arrayLayers = 1;
1221         imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
1222         imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
1223         imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
1224         imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
1225 
1226         VkResult res = vmaCreateImage(g_hAllocator, &imageInfo, &vmaMemReq, &outAllocation.m_Image, &outAllocation.m_Allocation, &allocInfo);
1227         outAllocation.m_ImageInfo = imageInfo;
1228         TEST(res == VK_SUCCESS);
1229     }
1230 
1231     uint32_t* data = (uint32_t*)allocInfo.pMappedData;
1232     if(allocInfo.pMappedData == nullptr)
1233     {
1234         VkResult res = vmaMapMemory(g_hAllocator, outAllocation.m_Allocation, (void**)&data);
1235         TEST(res == VK_SUCCESS);
1236     }
1237 
1238     uint32_t value = outAllocation.m_StartValue;
1239     TEST(allocInfo.size % 4 == 0);
1240     for(size_t i = 0; i < allocInfo.size / sizeof(uint32_t); ++i)
1241         data[i] = value++;
1242 
1243     if(allocInfo.pMappedData == nullptr)
1244         vmaUnmapMemory(g_hAllocator, outAllocation.m_Allocation);
1245 }
1246 
DestroyAllocation(const AllocInfo & allocation)1247 static void DestroyAllocation(const AllocInfo& allocation)
1248 {
1249     if(allocation.m_Buffer)
1250         vmaDestroyBuffer(g_hAllocator, allocation.m_Buffer, allocation.m_Allocation);
1251     else
1252         vmaDestroyImage(g_hAllocator, allocation.m_Image, allocation.m_Allocation);
1253 }
1254 
DestroyAllAllocations(std::vector<AllocInfo> & allocations)1255 static void DestroyAllAllocations(std::vector<AllocInfo>& allocations)
1256 {
1257     for(size_t i = allocations.size(); i--; )
1258         DestroyAllocation(allocations[i]);
1259     allocations.clear();
1260 }
1261 
ValidateAllocationData(const AllocInfo & allocation)1262 static void ValidateAllocationData(const AllocInfo& allocation)
1263 {
1264     VmaAllocationInfo allocInfo;
1265     vmaGetAllocationInfo(g_hAllocator, allocation.m_Allocation, &allocInfo);
1266 
1267     uint32_t* data = (uint32_t*)allocInfo.pMappedData;
1268     if(allocInfo.pMappedData == nullptr)
1269     {
1270         VkResult res = vmaMapMemory(g_hAllocator, allocation.m_Allocation, (void**)&data);
1271         TEST(res == VK_SUCCESS);
1272     }
1273 
1274     uint32_t value = allocation.m_StartValue;
1275     bool ok = true;
1276     size_t i;
1277     TEST(allocInfo.size % 4 == 0);
1278     for(i = 0; i < allocInfo.size / sizeof(uint32_t); ++i)
1279     {
1280         if(data[i] != value++)
1281         {
1282             ok = false;
1283             break;
1284         }
1285     }
1286     TEST(ok);
1287 
1288     if(allocInfo.pMappedData == nullptr)
1289         vmaUnmapMemory(g_hAllocator, allocation.m_Allocation);
1290 }
1291 
RecreateAllocationResource(AllocInfo & allocation)1292 static void RecreateAllocationResource(AllocInfo& allocation)
1293 {
1294     VmaAllocationInfo allocInfo;
1295     vmaGetAllocationInfo(g_hAllocator, allocation.m_Allocation, &allocInfo);
1296 
1297     if(allocation.m_Buffer)
1298     {
1299         vkDestroyBuffer(g_hDevice, allocation.m_Buffer, g_Allocs);
1300 
1301         VkResult res = vkCreateBuffer(g_hDevice, &allocation.m_BufferInfo, g_Allocs, &allocation.m_Buffer);
1302         TEST(res == VK_SUCCESS);
1303 
1304         // Just to silence validation layer warnings.
1305         VkMemoryRequirements vkMemReq;
1306         vkGetBufferMemoryRequirements(g_hDevice, allocation.m_Buffer, &vkMemReq);
1307         TEST(vkMemReq.size >= allocation.m_BufferInfo.size);
1308 
1309         res = vmaBindBufferMemory(g_hAllocator, allocation.m_Allocation, allocation.m_Buffer);
1310         TEST(res == VK_SUCCESS);
1311     }
1312     else
1313     {
1314         vkDestroyImage(g_hDevice, allocation.m_Image, g_Allocs);
1315 
1316         VkResult res = vkCreateImage(g_hDevice, &allocation.m_ImageInfo, g_Allocs, &allocation.m_Image);
1317         TEST(res == VK_SUCCESS);
1318 
1319         // Just to silence validation layer warnings.
1320         VkMemoryRequirements vkMemReq;
1321         vkGetImageMemoryRequirements(g_hDevice, allocation.m_Image, &vkMemReq);
1322 
1323         res = vmaBindImageMemory(g_hAllocator, allocation.m_Allocation, allocation.m_Image);
1324         TEST(res == VK_SUCCESS);
1325     }
1326 }
1327 
Defragment(AllocInfo * allocs,size_t allocCount,const VmaDefragmentationInfo * defragmentationInfo=nullptr,VmaDefragmentationStats * defragmentationStats=nullptr)1328 static void Defragment(AllocInfo* allocs, size_t allocCount,
1329     const VmaDefragmentationInfo* defragmentationInfo = nullptr,
1330     VmaDefragmentationStats* defragmentationStats = nullptr)
1331 {
1332     std::vector<VmaAllocation> vmaAllocs(allocCount);
1333     for(size_t i = 0; i < allocCount; ++i)
1334         vmaAllocs[i] = allocs[i].m_Allocation;
1335 
1336     std::vector<VkBool32> allocChanged(allocCount);
1337 
1338     ERR_GUARD_VULKAN( vmaDefragment(g_hAllocator, vmaAllocs.data(), allocCount, allocChanged.data(),
1339         defragmentationInfo, defragmentationStats) );
1340 
1341     for(size_t i = 0; i < allocCount; ++i)
1342     {
1343         if(allocChanged[i])
1344         {
1345             RecreateAllocationResource(allocs[i]);
1346         }
1347     }
1348 }
1349 
ValidateAllocationsData(const AllocInfo * allocs,size_t allocCount)1350 static void ValidateAllocationsData(const AllocInfo* allocs, size_t allocCount)
1351 {
1352     std::for_each(allocs, allocs + allocCount, [](const AllocInfo& allocInfo) {
1353         ValidateAllocationData(allocInfo);
1354     });
1355 }
1356 
TestDefragmentationSimple()1357 void TestDefragmentationSimple()
1358 {
1359     wprintf(L"Test defragmentation simple\n");
1360 
1361     RandomNumberGenerator rand(667);
1362 
1363     const VkDeviceSize BUF_SIZE = 0x10000;
1364     const VkDeviceSize BLOCK_SIZE = BUF_SIZE * 8;
1365 
1366     const VkDeviceSize MIN_BUF_SIZE = 32;
1367     const VkDeviceSize MAX_BUF_SIZE = BUF_SIZE * 4;
1368     auto RandomBufSize = [&]() -> VkDeviceSize {
1369         return align_up<VkDeviceSize>(rand.Generate() % (MAX_BUF_SIZE - MIN_BUF_SIZE + 1) + MIN_BUF_SIZE, 32);
1370     };
1371 
1372     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1373     bufCreateInfo.size = BUF_SIZE;
1374     bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1375 
1376     VmaAllocationCreateInfo exampleAllocCreateInfo = {};
1377     exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
1378 
1379     uint32_t memTypeIndex = UINT32_MAX;
1380     vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex);
1381 
1382     VmaPoolCreateInfo poolCreateInfo = {};
1383     poolCreateInfo.blockSize = BLOCK_SIZE;
1384     poolCreateInfo.memoryTypeIndex = memTypeIndex;
1385 
1386     VmaPool pool;
1387     ERR_GUARD_VULKAN( vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool) );
1388 
1389     // Defragmentation of empty pool.
1390     {
1391         VmaDefragmentationInfo2 defragInfo = {};
1392         defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE;
1393         defragInfo.maxCpuAllocationsToMove = UINT32_MAX;
1394         defragInfo.poolCount = 1;
1395         defragInfo.pPools = &pool;
1396 
1397         VmaDefragmentationStats defragStats = {};
1398         VmaDefragmentationContext defragCtx = nullptr;
1399         VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &defragStats, &defragCtx);
1400         TEST(res >= VK_SUCCESS);
1401         vmaDefragmentationEnd(g_hAllocator, defragCtx);
1402         TEST(defragStats.allocationsMoved == 0 && defragStats.bytesFreed == 0 &&
1403             defragStats.bytesMoved == 0 && defragStats.deviceMemoryBlocksFreed == 0);
1404     }
1405 
1406     std::vector<AllocInfo> allocations;
1407 
1408     // persistentlyMappedOption = 0 - not persistently mapped.
1409     // persistentlyMappedOption = 1 - persistently mapped.
1410     for(uint32_t persistentlyMappedOption = 0; persistentlyMappedOption < 2; ++persistentlyMappedOption)
1411     {
1412         wprintf(L"  Persistently mapped option = %u\n", persistentlyMappedOption);
1413         const bool persistentlyMapped = persistentlyMappedOption != 0;
1414 
1415         // # Test 1
1416         // Buffers of fixed size.
1417         // Fill 2 blocks. Remove odd buffers. Defragment everything.
1418         // Expected result: at least 1 block freed.
1419         {
1420             for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
1421             {
1422                 AllocInfo allocInfo;
1423                 CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
1424                 allocations.push_back(allocInfo);
1425             }
1426 
1427             for(size_t i = 1; i < allocations.size(); ++i)
1428             {
1429                 DestroyAllocation(allocations[i]);
1430                 allocations.erase(allocations.begin() + i);
1431             }
1432 
1433             VmaDefragmentationStats defragStats;
1434             Defragment(allocations.data(), allocations.size(), nullptr, &defragStats);
1435             TEST(defragStats.allocationsMoved > 0 && defragStats.bytesMoved > 0);
1436             TEST(defragStats.deviceMemoryBlocksFreed >= 1);
1437 
1438             ValidateAllocationsData(allocations.data(), allocations.size());
1439 
1440             DestroyAllAllocations(allocations);
1441         }
1442 
1443         // # Test 2
1444         // Buffers of fixed size.
1445         // Fill 2 blocks. Remove odd buffers. Defragment one buffer at time.
1446         // Expected result: Each of 4 interations makes some progress.
1447         {
1448             for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
1449             {
1450                 AllocInfo allocInfo;
1451                 CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
1452                 allocations.push_back(allocInfo);
1453             }
1454 
1455             for(size_t i = 1; i < allocations.size(); ++i)
1456             {
1457                 DestroyAllocation(allocations[i]);
1458                 allocations.erase(allocations.begin() + i);
1459             }
1460 
1461             VmaDefragmentationInfo defragInfo = {};
1462             defragInfo.maxAllocationsToMove = 1;
1463             defragInfo.maxBytesToMove = BUF_SIZE;
1464 
1465             for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE / 2; ++i)
1466             {
1467                 VmaDefragmentationStats defragStats;
1468                 Defragment(allocations.data(), allocations.size(), &defragInfo, &defragStats);
1469                 TEST(defragStats.allocationsMoved > 0 && defragStats.bytesMoved > 0);
1470             }
1471 
1472             ValidateAllocationsData(allocations.data(), allocations.size());
1473 
1474             DestroyAllAllocations(allocations);
1475         }
1476 
1477         // # Test 3
1478         // Buffers of variable size.
1479         // Create a number of buffers. Remove some percent of them.
1480         // Defragment while having some percent of them unmovable.
1481         // Expected result: Just simple validation.
1482         {
1483             for(size_t i = 0; i < 100; ++i)
1484             {
1485                 VkBufferCreateInfo localBufCreateInfo = bufCreateInfo;
1486                 localBufCreateInfo.size = RandomBufSize();
1487 
1488                 AllocInfo allocInfo;
1489                 CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
1490                 allocations.push_back(allocInfo);
1491             }
1492 
1493             const uint32_t percentToDelete = 60;
1494             const size_t numberToDelete = allocations.size() * percentToDelete / 100;
1495             for(size_t i = 0; i < numberToDelete; ++i)
1496             {
1497                 size_t indexToDelete = rand.Generate() % (uint32_t)allocations.size();
1498                 DestroyAllocation(allocations[indexToDelete]);
1499                 allocations.erase(allocations.begin() + indexToDelete);
1500             }
1501 
1502             // Non-movable allocations will be at the beginning of allocations array.
1503             const uint32_t percentNonMovable = 20;
1504             const size_t numberNonMovable = allocations.size() * percentNonMovable / 100;
1505             for(size_t i = 0; i < numberNonMovable; ++i)
1506             {
1507                 size_t indexNonMovable = i + rand.Generate() % (uint32_t)(allocations.size() - i);
1508                 if(indexNonMovable != i)
1509                     std::swap(allocations[i], allocations[indexNonMovable]);
1510             }
1511 
1512             VmaDefragmentationStats defragStats;
1513             Defragment(
1514                 allocations.data() + numberNonMovable,
1515                 allocations.size() - numberNonMovable,
1516                 nullptr, &defragStats);
1517 
1518             ValidateAllocationsData(allocations.data(), allocations.size());
1519 
1520             DestroyAllAllocations(allocations);
1521         }
1522     }
1523 
1524     /*
1525     Allocation that must be move to an overlapping place using memmove().
1526     Create 2 buffers, second slightly bigger than the first. Delete first. Then defragment.
1527     */
1528     if(VMA_DEBUG_MARGIN == 0) // FAST algorithm works only when DEBUG_MARGIN disabled.
1529     {
1530         AllocInfo allocInfo[2];
1531 
1532         bufCreateInfo.size = BUF_SIZE;
1533         CreateBuffer(pool, bufCreateInfo, false, allocInfo[0]);
1534         const VkDeviceSize biggerBufSize = BUF_SIZE + BUF_SIZE / 256;
1535         bufCreateInfo.size = biggerBufSize;
1536         CreateBuffer(pool, bufCreateInfo, false, allocInfo[1]);
1537 
1538         DestroyAllocation(allocInfo[0]);
1539 
1540         VmaDefragmentationStats defragStats;
1541         Defragment(&allocInfo[1], 1, nullptr, &defragStats);
1542         // If this fails, it means we couldn't do memmove with overlapping regions.
1543         TEST(defragStats.allocationsMoved == 1 && defragStats.bytesMoved > 0);
1544 
1545         ValidateAllocationsData(&allocInfo[1], 1);
1546         DestroyAllocation(allocInfo[1]);
1547     }
1548 
1549     vmaDestroyPool(g_hAllocator, pool);
1550 }
1551 
TestDefragmentationWholePool()1552 void TestDefragmentationWholePool()
1553 {
1554     wprintf(L"Test defragmentation whole pool\n");
1555 
1556     RandomNumberGenerator rand(668);
1557 
1558     const VkDeviceSize BUF_SIZE = 0x10000;
1559     const VkDeviceSize BLOCK_SIZE = BUF_SIZE * 8;
1560 
1561     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1562     bufCreateInfo.size = BUF_SIZE;
1563     bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1564 
1565     VmaAllocationCreateInfo exampleAllocCreateInfo = {};
1566     exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
1567 
1568     uint32_t memTypeIndex = UINT32_MAX;
1569     vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex);
1570 
1571     VmaPoolCreateInfo poolCreateInfo = {};
1572     poolCreateInfo.blockSize = BLOCK_SIZE;
1573     poolCreateInfo.memoryTypeIndex = memTypeIndex;
1574 
1575     VmaDefragmentationStats defragStats[2];
1576     for(size_t caseIndex = 0; caseIndex < 2; ++caseIndex)
1577     {
1578         VmaPool pool;
1579         ERR_GUARD_VULKAN( vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool) );
1580 
1581         std::vector<AllocInfo> allocations;
1582 
1583         // Buffers of fixed size.
1584         // Fill 2 blocks. Remove odd buffers. Defragment all of them.
1585         for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
1586         {
1587             AllocInfo allocInfo;
1588             CreateBuffer(pool, bufCreateInfo, false, allocInfo);
1589             allocations.push_back(allocInfo);
1590         }
1591 
1592         for(size_t i = 1; i < allocations.size(); ++i)
1593         {
1594             DestroyAllocation(allocations[i]);
1595             allocations.erase(allocations.begin() + i);
1596         }
1597 
1598         VmaDefragmentationInfo2 defragInfo = {};
1599         defragInfo.maxCpuAllocationsToMove = UINT32_MAX;
1600         defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE;
1601         std::vector<VmaAllocation> allocationsToDefrag;
1602         if(caseIndex == 0)
1603         {
1604             defragInfo.poolCount = 1;
1605             defragInfo.pPools = &pool;
1606         }
1607         else
1608         {
1609             const size_t allocCount = allocations.size();
1610             allocationsToDefrag.resize(allocCount);
1611             std::transform(
1612                 allocations.begin(), allocations.end(),
1613                 allocationsToDefrag.begin(),
1614                 [](const AllocInfo& allocInfo) { return allocInfo.m_Allocation; });
1615             defragInfo.allocationCount = (uint32_t)allocCount;
1616             defragInfo.pAllocations = allocationsToDefrag.data();
1617         }
1618 
1619         VmaDefragmentationContext defragCtx = VK_NULL_HANDLE;
1620         VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &defragStats[caseIndex], &defragCtx);
1621         TEST(res >= VK_SUCCESS);
1622         vmaDefragmentationEnd(g_hAllocator, defragCtx);
1623 
1624         TEST(defragStats[caseIndex].allocationsMoved > 0 && defragStats[caseIndex].bytesMoved > 0);
1625 
1626         ValidateAllocationsData(allocations.data(), allocations.size());
1627 
1628         DestroyAllAllocations(allocations);
1629 
1630         vmaDestroyPool(g_hAllocator, pool);
1631     }
1632 
1633     TEST(defragStats[0].bytesMoved == defragStats[1].bytesMoved);
1634     TEST(defragStats[0].allocationsMoved == defragStats[1].allocationsMoved);
1635     TEST(defragStats[0].bytesFreed == defragStats[1].bytesFreed);
1636     TEST(defragStats[0].deviceMemoryBlocksFreed == defragStats[1].deviceMemoryBlocksFreed);
1637 }
1638 
TestDefragmentationFull()1639 void TestDefragmentationFull()
1640 {
1641     std::vector<AllocInfo> allocations;
1642 
1643     // Create initial allocations.
1644     for(size_t i = 0; i < 400; ++i)
1645     {
1646         AllocInfo allocation;
1647         CreateAllocation(allocation);
1648         allocations.push_back(allocation);
1649     }
1650 
1651     // Delete random allocations
1652     const size_t allocationsToDeletePercent = 80;
1653     size_t allocationsToDelete = allocations.size() * allocationsToDeletePercent / 100;
1654     for(size_t i = 0; i < allocationsToDelete; ++i)
1655     {
1656         size_t index = (size_t)rand() % allocations.size();
1657         DestroyAllocation(allocations[index]);
1658         allocations.erase(allocations.begin() + index);
1659     }
1660 
1661     for(size_t i = 0; i < allocations.size(); ++i)
1662         ValidateAllocationData(allocations[i]);
1663 
1664     //SaveAllocatorStatsToFile(L"Before.csv");
1665 
1666     {
1667         std::vector<VmaAllocation> vmaAllocations(allocations.size());
1668         for(size_t i = 0; i < allocations.size(); ++i)
1669             vmaAllocations[i] = allocations[i].m_Allocation;
1670 
1671         const size_t nonMovablePercent = 0;
1672         size_t nonMovableCount = vmaAllocations.size() * nonMovablePercent / 100;
1673         for(size_t i = 0; i < nonMovableCount; ++i)
1674         {
1675             size_t index = (size_t)rand() % vmaAllocations.size();
1676             vmaAllocations.erase(vmaAllocations.begin() + index);
1677         }
1678 
1679         const uint32_t defragCount = 1;
1680         for(uint32_t defragIndex = 0; defragIndex < defragCount; ++defragIndex)
1681         {
1682             std::vector<VkBool32> allocationsChanged(vmaAllocations.size());
1683 
1684             VmaDefragmentationInfo defragmentationInfo;
1685             defragmentationInfo.maxAllocationsToMove = UINT_MAX;
1686             defragmentationInfo.maxBytesToMove = SIZE_MAX;
1687 
1688             wprintf(L"Defragmentation #%u\n", defragIndex);
1689 
1690             time_point begTime = std::chrono::high_resolution_clock::now();
1691 
1692             VmaDefragmentationStats stats;
1693             VkResult res = vmaDefragment(g_hAllocator, vmaAllocations.data(), vmaAllocations.size(), allocationsChanged.data(), &defragmentationInfo, &stats);
1694             TEST(res >= 0);
1695 
1696             float defragmentDuration = ToFloatSeconds(std::chrono::high_resolution_clock::now() - begTime);
1697 
1698             wprintf(L"Moved allocations %u, bytes %llu\n", stats.allocationsMoved, stats.bytesMoved);
1699             wprintf(L"Freed blocks %u, bytes %llu\n", stats.deviceMemoryBlocksFreed, stats.bytesFreed);
1700             wprintf(L"Time: %.2f s\n", defragmentDuration);
1701 
1702             for(size_t i = 0; i < vmaAllocations.size(); ++i)
1703             {
1704                 if(allocationsChanged[i])
1705                 {
1706                     RecreateAllocationResource(allocations[i]);
1707                 }
1708             }
1709 
1710             for(size_t i = 0; i < allocations.size(); ++i)
1711                 ValidateAllocationData(allocations[i]);
1712 
1713             //wchar_t fileName[MAX_PATH];
1714             //swprintf(fileName, MAX_PATH, L"After_%02u.csv", defragIndex);
1715             //SaveAllocatorStatsToFile(fileName);
1716         }
1717     }
1718 
1719     // Destroy all remaining allocations.
1720     DestroyAllAllocations(allocations);
1721 }
1722 
TestDefragmentationGpu()1723 static void TestDefragmentationGpu()
1724 {
1725     wprintf(L"Test defragmentation GPU\n");
1726 
1727     std::vector<AllocInfo> allocations;
1728 
1729     // Create that many allocations to surely fill 3 new blocks of 256 MB.
1730     const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
1731     const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
1732     const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
1733     const size_t bufCount = (size_t)(totalSize / bufSizeMin);
1734     const size_t percentToLeave = 30;
1735     const size_t percentNonMovable = 3;
1736     RandomNumberGenerator rand = { 234522 };
1737 
1738     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1739 
1740     VmaAllocationCreateInfo allocCreateInfo = {};
1741     allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
1742     allocCreateInfo.flags = 0;
1743 
1744     // Create all intended buffers.
1745     for(size_t i = 0; i < bufCount; ++i)
1746     {
1747         bufCreateInfo.size = align_up(rand.Generate() % (bufSizeMax - bufSizeMin) + bufSizeMin, 32ull);
1748 
1749         if(rand.Generate() % 100 < percentNonMovable)
1750         {
1751             bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
1752                 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1753                 VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1754             allocCreateInfo.pUserData = (void*)(uintptr_t)2;
1755         }
1756         else
1757         {
1758             // Different usage just to see different color in output from VmaDumpVis.
1759             bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
1760                 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1761                 VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1762             // And in JSON dump.
1763             allocCreateInfo.pUserData = (void*)(uintptr_t)1;
1764         }
1765 
1766         AllocInfo alloc;
1767         alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
1768         alloc.m_StartValue = rand.Generate();
1769         allocations.push_back(alloc);
1770     }
1771 
1772     // Destroy some percentage of them.
1773     {
1774         const size_t buffersToDestroy = round_div<size_t>(bufCount * (100 - percentToLeave), 100);
1775         for(size_t i = 0; i < buffersToDestroy; ++i)
1776         {
1777             const size_t index = rand.Generate() % allocations.size();
1778             allocations[index].Destroy();
1779             allocations.erase(allocations.begin() + index);
1780         }
1781     }
1782 
1783     // Fill them with meaningful data.
1784     UploadGpuData(allocations.data(), allocations.size());
1785 
1786     wchar_t fileName[MAX_PATH];
1787     swprintf_s(fileName, L"GPU_defragmentation_A_before.json");
1788     SaveAllocatorStatsToFile(fileName);
1789 
1790     // Defragment using GPU only.
1791     {
1792         const size_t allocCount = allocations.size();
1793 
1794         std::vector<VmaAllocation> allocationPtrs;
1795         std::vector<VkBool32> allocationChanged;
1796         std::vector<size_t> allocationOriginalIndex;
1797 
1798         for(size_t i = 0; i < allocCount; ++i)
1799         {
1800             VmaAllocationInfo allocInfo = {};
1801             vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo);
1802             if((uintptr_t)allocInfo.pUserData == 1) // Movable
1803             {
1804                 allocationPtrs.push_back(allocations[i].m_Allocation);
1805                 allocationChanged.push_back(VK_FALSE);
1806                 allocationOriginalIndex.push_back(i);
1807             }
1808         }
1809 
1810         const size_t movableAllocCount = allocationPtrs.size();
1811 
1812         BeginSingleTimeCommands();
1813 
1814         VmaDefragmentationInfo2 defragInfo = {};
1815         defragInfo.flags = 0;
1816         defragInfo.allocationCount = (uint32_t)movableAllocCount;
1817         defragInfo.pAllocations = allocationPtrs.data();
1818         defragInfo.pAllocationsChanged = allocationChanged.data();
1819         defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
1820         defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
1821         defragInfo.commandBuffer = g_hTemporaryCommandBuffer;
1822 
1823         VmaDefragmentationStats stats = {};
1824         VmaDefragmentationContext ctx = VK_NULL_HANDLE;
1825         VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
1826         TEST(res >= VK_SUCCESS);
1827 
1828         EndSingleTimeCommands();
1829 
1830         vmaDefragmentationEnd(g_hAllocator, ctx);
1831 
1832         for(size_t i = 0; i < movableAllocCount; ++i)
1833         {
1834             if(allocationChanged[i])
1835             {
1836                 const size_t origAllocIndex = allocationOriginalIndex[i];
1837                 RecreateAllocationResource(allocations[origAllocIndex]);
1838             }
1839         }
1840 
1841         // If corruption detection is enabled, GPU defragmentation may not work on
1842         // memory types that have this detection active, e.g. on Intel.
1843         #if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
1844             TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
1845             TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
1846         #endif
1847     }
1848 
1849     ValidateGpuData(allocations.data(), allocations.size());
1850 
1851     swprintf_s(fileName, L"GPU_defragmentation_B_after.json");
1852     SaveAllocatorStatsToFile(fileName);
1853 
1854     // Destroy all remaining buffers.
1855     for(size_t i = allocations.size(); i--; )
1856     {
1857         allocations[i].Destroy();
1858     }
1859 }
1860 
ProcessDefragmentationStepInfo(VmaDefragmentationPassInfo & stepInfo)1861 static void ProcessDefragmentationStepInfo(VmaDefragmentationPassInfo &stepInfo)
1862 {
1863     std::vector<VkImageMemoryBarrier> beginImageBarriers;
1864     std::vector<VkImageMemoryBarrier> finalizeImageBarriers;
1865 
1866     VkPipelineStageFlags beginSrcStageMask = 0;
1867     VkPipelineStageFlags beginDstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
1868 
1869     VkPipelineStageFlags finalizeSrcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
1870     VkPipelineStageFlags finalizeDstStageMask = 0;
1871 
1872     bool wantsMemoryBarrier = false;
1873 
1874     VkMemoryBarrier beginMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
1875     VkMemoryBarrier finalizeMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
1876 
1877     for(uint32_t i = 0; i < stepInfo.moveCount; ++i)
1878     {
1879         VmaAllocationInfo info;
1880         vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info);
1881 
1882         AllocInfo *allocInfo = (AllocInfo *)info.pUserData;
1883 
1884         if(allocInfo->m_Image)
1885         {
1886             VkImage newImage;
1887 
1888             const VkResult result = vkCreateImage(g_hDevice, &allocInfo->m_ImageInfo, g_Allocs, &newImage);
1889             TEST(result >= VK_SUCCESS);
1890 
1891             vkBindImageMemory(g_hDevice, newImage, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset);
1892             allocInfo->m_NewImage = newImage;
1893 
1894             // Keep track of our pipeline stages that we need to wait/signal on
1895             beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1896             finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1897 
1898             // We need one pipeline barrier and two image layout transitions here
1899             // First we'll have to turn our newly created image into VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
1900             // And the second one is turning the old image into VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
1901 
1902             VkImageSubresourceRange subresourceRange = {
1903                 VK_IMAGE_ASPECT_COLOR_BIT,
1904                 0, VK_REMAINING_MIP_LEVELS,
1905                 0, VK_REMAINING_ARRAY_LAYERS
1906             };
1907 
1908             VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
1909             barrier.srcAccessMask = 0;
1910             barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
1911             barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
1912             barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1913             barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1914             barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1915             barrier.image = newImage;
1916             barrier.subresourceRange = subresourceRange;
1917 
1918             beginImageBarriers.push_back(barrier);
1919 
1920             // Second barrier to convert the existing image. This one actually needs a real barrier
1921             barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
1922             barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
1923             barrier.oldLayout = allocInfo->m_ImageLayout;
1924             barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1925             barrier.image = allocInfo->m_Image;
1926 
1927             beginImageBarriers.push_back(barrier);
1928 
1929             // And lastly we need a barrier that turns our new image into the layout of the old one
1930             barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1931             barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
1932             barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1933             barrier.newLayout = allocInfo->m_ImageLayout;
1934             barrier.image = newImage;
1935 
1936             finalizeImageBarriers.push_back(barrier);
1937         }
1938         else if(allocInfo->m_Buffer)
1939         {
1940             VkBuffer newBuffer;
1941 
1942             const VkResult result = vkCreateBuffer(g_hDevice, &allocInfo->m_BufferInfo, g_Allocs, &newBuffer);
1943             TEST(result >= VK_SUCCESS);
1944 
1945             vkBindBufferMemory(g_hDevice, newBuffer, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset);
1946             allocInfo->m_NewBuffer = newBuffer;
1947 
1948             // Keep track of our pipeline stages that we need to wait/signal on
1949             beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1950             finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1951 
1952             beginMemoryBarrier.srcAccessMask |= VK_ACCESS_MEMORY_WRITE_BIT;
1953             beginMemoryBarrier.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT;
1954 
1955             finalizeMemoryBarrier.srcAccessMask |= VK_ACCESS_TRANSFER_WRITE_BIT;
1956             finalizeMemoryBarrier.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT;
1957 
1958             wantsMemoryBarrier = true;
1959         }
1960     }
1961 
1962     if(!beginImageBarriers.empty() || wantsMemoryBarrier)
1963     {
1964         const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0;
1965 
1966         vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, beginSrcStageMask, beginDstStageMask, 0,
1967             memoryBarrierCount, &beginMemoryBarrier,
1968             0, nullptr,
1969             (uint32_t)beginImageBarriers.size(), beginImageBarriers.data());
1970     }
1971 
1972     for(uint32_t i = 0; i < stepInfo.moveCount; ++ i)
1973     {
1974         VmaAllocationInfo info;
1975         vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info);
1976 
1977         AllocInfo *allocInfo = (AllocInfo *)info.pUserData;
1978 
1979         if(allocInfo->m_Image)
1980         {
1981             std::vector<VkImageCopy> imageCopies;
1982 
1983             // Copy all mips of the source image into the target image
1984             VkOffset3D offset = { 0, 0, 0 };
1985             VkExtent3D extent = allocInfo->m_ImageInfo.extent;
1986 
1987             VkImageSubresourceLayers subresourceLayers = {
1988                 VK_IMAGE_ASPECT_COLOR_BIT,
1989                 0,
1990                 0, 1
1991             };
1992 
1993             for(uint32_t mip = 0; mip < allocInfo->m_ImageInfo.mipLevels; ++ mip)
1994             {
1995                 subresourceLayers.mipLevel = mip;
1996 
1997                 VkImageCopy imageCopy{
1998                     subresourceLayers,
1999                     offset,
2000                     subresourceLayers,
2001                     offset,
2002                     extent
2003                 };
2004 
2005                 imageCopies.push_back(imageCopy);
2006 
2007                 extent.width = std::max(uint32_t(1), extent.width >> 1);
2008                 extent.height = std::max(uint32_t(1), extent.height >> 1);
2009                 extent.depth = std::max(uint32_t(1), extent.depth >> 1);
2010             }
2011 
2012             vkCmdCopyImage(
2013                 g_hTemporaryCommandBuffer,
2014                 allocInfo->m_Image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2015                 allocInfo->m_NewImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
2016                 (uint32_t)imageCopies.size(), imageCopies.data());
2017         }
2018         else if(allocInfo->m_Buffer)
2019         {
2020             VkBufferCopy region = {
2021                 0,
2022                 0,
2023                 allocInfo->m_BufferInfo.size };
2024 
2025             vkCmdCopyBuffer(g_hTemporaryCommandBuffer,
2026                 allocInfo->m_Buffer, allocInfo->m_NewBuffer,
2027                 1, &region);
2028         }
2029     }
2030 
2031     if(!finalizeImageBarriers.empty() || wantsMemoryBarrier)
2032     {
2033         const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0;
2034 
2035         vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, finalizeSrcStageMask, finalizeDstStageMask, 0,
2036             memoryBarrierCount, &finalizeMemoryBarrier,
2037             0, nullptr,
2038             (uint32_t)finalizeImageBarriers.size(), finalizeImageBarriers.data());
2039     }
2040 }
2041 
2042 
TestDefragmentationIncrementalBasic()2043 static void TestDefragmentationIncrementalBasic()
2044 {
2045     wprintf(L"Test defragmentation incremental basic\n");
2046 
2047     std::vector<AllocInfo> allocations;
2048 
2049     // Create that many allocations to surely fill 3 new blocks of 256 MB.
2050     const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 };
2051     const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
2052     const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
2053     const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
2054     const size_t imageCount = totalSize / ((size_t)imageSizes[0] * imageSizes[0] * 4) / 2;
2055     const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2;
2056     const size_t percentToLeave = 30;
2057     RandomNumberGenerator rand = { 234522 };
2058 
2059     VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
2060     imageInfo.imageType = VK_IMAGE_TYPE_2D;
2061     imageInfo.extent.depth = 1;
2062     imageInfo.mipLevels = 1;
2063     imageInfo.arrayLayers = 1;
2064     imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
2065     imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
2066     imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
2067     imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
2068     imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
2069 
2070     VmaAllocationCreateInfo allocCreateInfo = {};
2071     allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2072     allocCreateInfo.flags = 0;
2073 
2074     // Create all intended images.
2075     for(size_t i = 0; i < imageCount; ++i)
2076     {
2077         const uint32_t size = imageSizes[rand.Generate() % 3];
2078 
2079         imageInfo.extent.width = size;
2080         imageInfo.extent.height = size;
2081 
2082         AllocInfo alloc;
2083         alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
2084         alloc.m_StartValue = 0;
2085 
2086         allocations.push_back(alloc);
2087     }
2088 
2089     // And all buffers
2090     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2091 
2092     for(size_t i = 0; i < bufCount; ++i)
2093     {
2094         bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
2095         bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2096 
2097         AllocInfo alloc;
2098         alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
2099         alloc.m_StartValue = 0;
2100 
2101         allocations.push_back(alloc);
2102     }
2103 
2104     // Destroy some percentage of them.
2105     {
2106         const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100);
2107         for(size_t i = 0; i < allocationsToDestroy; ++i)
2108         {
2109             const size_t index = rand.Generate() % allocations.size();
2110             allocations[index].Destroy();
2111             allocations.erase(allocations.begin() + index);
2112         }
2113     }
2114 
2115     {
2116         // Set our user data pointers. A real application should probably be more clever here
2117         const size_t allocationCount = allocations.size();
2118         for(size_t i = 0; i < allocationCount; ++i)
2119         {
2120             AllocInfo &alloc = allocations[i];
2121             vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc);
2122         }
2123     }
2124 
2125     // Fill them with meaningful data.
2126     UploadGpuData(allocations.data(), allocations.size());
2127 
2128     wchar_t fileName[MAX_PATH];
2129     swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_A_before.json");
2130     SaveAllocatorStatsToFile(fileName);
2131 
2132     // Defragment using GPU only.
2133     {
2134         const size_t allocCount = allocations.size();
2135 
2136         std::vector<VmaAllocation> allocationPtrs;
2137 
2138         for(size_t i = 0; i < allocCount; ++i)
2139         {
2140             allocationPtrs.push_back(allocations[i].m_Allocation);
2141         }
2142 
2143         const size_t movableAllocCount = allocationPtrs.size();
2144 
2145         VmaDefragmentationInfo2 defragInfo = {};
2146         defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL;
2147         defragInfo.allocationCount = (uint32_t)movableAllocCount;
2148         defragInfo.pAllocations = allocationPtrs.data();
2149         defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
2150         defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
2151 
2152         VmaDefragmentationStats stats = {};
2153         VmaDefragmentationContext ctx = VK_NULL_HANDLE;
2154         VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
2155         TEST(res >= VK_SUCCESS);
2156 
2157         res = VK_NOT_READY;
2158 
2159         std::vector<VmaDefragmentationPassMoveInfo> moveInfo;
2160         moveInfo.resize(movableAllocCount);
2161 
2162         while(res == VK_NOT_READY)
2163         {
2164             VmaDefragmentationPassInfo stepInfo = {};
2165             stepInfo.pMoves = moveInfo.data();
2166             stepInfo.moveCount = (uint32_t)moveInfo.size();
2167 
2168             res = vmaBeginDefragmentationPass(g_hAllocator, ctx, &stepInfo);
2169             TEST(res >= VK_SUCCESS);
2170 
2171             BeginSingleTimeCommands();
2172             std::vector<void*> newHandles;
2173             ProcessDefragmentationStepInfo(stepInfo);
2174             EndSingleTimeCommands();
2175 
2176             res = vmaEndDefragmentationPass(g_hAllocator, ctx);
2177 
2178             // Destroy old buffers/images and replace them with new handles.
2179             for(size_t i = 0; i < stepInfo.moveCount; ++i)
2180             {
2181                 VmaAllocation const alloc = stepInfo.pMoves[i].allocation;
2182                 VmaAllocationInfo vmaAllocInfo;
2183                 vmaGetAllocationInfo(g_hAllocator, alloc, &vmaAllocInfo);
2184                 AllocInfo* allocInfo = (AllocInfo*)vmaAllocInfo.pUserData;
2185                 if(allocInfo->m_Buffer)
2186                 {
2187                     assert(allocInfo->m_NewBuffer && !allocInfo->m_Image && !allocInfo->m_NewImage);
2188                     vkDestroyBuffer(g_hDevice, allocInfo->m_Buffer, g_Allocs);
2189                     allocInfo->m_Buffer = allocInfo->m_NewBuffer;
2190                     allocInfo->m_NewBuffer = VK_NULL_HANDLE;
2191                 }
2192                 else if(allocInfo->m_Image)
2193                 {
2194                     assert(allocInfo->m_NewImage && !allocInfo->m_Buffer && !allocInfo->m_NewBuffer);
2195                     vkDestroyImage(g_hDevice, allocInfo->m_Image, g_Allocs);
2196                     allocInfo->m_Image = allocInfo->m_NewImage;
2197                     allocInfo->m_NewImage = VK_NULL_HANDLE;
2198                 }
2199                 else
2200                     assert(0);
2201             }
2202         }
2203 
2204         TEST(res >= VK_SUCCESS);
2205         vmaDefragmentationEnd(g_hAllocator, ctx);
2206 
2207         // If corruption detection is enabled, GPU defragmentation may not work on
2208         // memory types that have this detection active, e.g. on Intel.
2209 #if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
2210         TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
2211         TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
2212 #endif
2213     }
2214 
2215     //ValidateGpuData(allocations.data(), allocations.size());
2216 
2217     swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_B_after.json");
2218     SaveAllocatorStatsToFile(fileName);
2219 
2220     // Destroy all remaining buffers and images.
2221     for(size_t i = allocations.size(); i--; )
2222     {
2223         allocations[i].Destroy();
2224     }
2225 }
2226 
TestDefragmentationIncrementalComplex()2227 void TestDefragmentationIncrementalComplex()
2228 {
2229     wprintf(L"Test defragmentation incremental complex\n");
2230 
2231     std::vector<AllocInfo> allocations;
2232 
2233     // Create that many allocations to surely fill 3 new blocks of 256 MB.
2234     const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 };
2235     const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
2236     const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
2237     const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
2238     const size_t imageCount = (size_t)(totalSize / (imageSizes[0] * imageSizes[0] * 4)) / 2;
2239     const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2;
2240     const size_t percentToLeave = 30;
2241     RandomNumberGenerator rand = { 234522 };
2242 
2243     VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
2244     imageInfo.imageType = VK_IMAGE_TYPE_2D;
2245     imageInfo.extent.depth = 1;
2246     imageInfo.mipLevels = 1;
2247     imageInfo.arrayLayers = 1;
2248     imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
2249     imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
2250     imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
2251     imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
2252     imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
2253 
2254     VmaAllocationCreateInfo allocCreateInfo = {};
2255     allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2256     allocCreateInfo.flags = 0;
2257 
2258     // Create all intended images.
2259     for(size_t i = 0; i < imageCount; ++i)
2260     {
2261         const uint32_t size = imageSizes[rand.Generate() % 3];
2262 
2263         imageInfo.extent.width = size;
2264         imageInfo.extent.height = size;
2265 
2266         AllocInfo alloc;
2267         alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
2268         alloc.m_StartValue = 0;
2269 
2270         allocations.push_back(alloc);
2271     }
2272 
2273     // And all buffers
2274     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2275 
2276     for(size_t i = 0; i < bufCount; ++i)
2277     {
2278         bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
2279         bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2280 
2281         AllocInfo alloc;
2282         alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
2283         alloc.m_StartValue = 0;
2284 
2285         allocations.push_back(alloc);
2286     }
2287 
2288     // Destroy some percentage of them.
2289     {
2290         const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100);
2291         for(size_t i = 0; i < allocationsToDestroy; ++i)
2292         {
2293             const size_t index = rand.Generate() % allocations.size();
2294             allocations[index].Destroy();
2295             allocations.erase(allocations.begin() + index);
2296         }
2297     }
2298 
2299     {
2300         // Set our user data pointers. A real application should probably be more clever here
2301         const size_t allocationCount = allocations.size();
2302         for(size_t i = 0; i < allocationCount; ++i)
2303         {
2304             AllocInfo &alloc = allocations[i];
2305             vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc);
2306         }
2307     }
2308 
2309     // Fill them with meaningful data.
2310     UploadGpuData(allocations.data(), allocations.size());
2311 
2312     wchar_t fileName[MAX_PATH];
2313     swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_A_before.json");
2314     SaveAllocatorStatsToFile(fileName);
2315 
2316     std::vector<AllocInfo> additionalAllocations;
2317 
2318 #define MakeAdditionalAllocation() \
2319     do { \
2320         { \
2321             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16); \
2322             bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; \
2323             \
2324             AllocInfo alloc; \
2325             alloc.CreateBuffer(bufCreateInfo, allocCreateInfo); \
2326             \
2327             additionalAllocations.push_back(alloc); \
2328         } \
2329     } while(0)
2330 
2331     // Defragment using GPU only.
2332     {
2333         const size_t allocCount = allocations.size();
2334 
2335         std::vector<VmaAllocation> allocationPtrs;
2336 
2337         for(size_t i = 0; i < allocCount; ++i)
2338         {
2339             VmaAllocationInfo allocInfo = {};
2340             vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo);
2341 
2342             allocationPtrs.push_back(allocations[i].m_Allocation);
2343         }
2344 
2345         const size_t movableAllocCount = allocationPtrs.size();
2346 
2347         VmaDefragmentationInfo2 defragInfo = {};
2348         defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL;
2349         defragInfo.allocationCount = (uint32_t)movableAllocCount;
2350         defragInfo.pAllocations = allocationPtrs.data();
2351         defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
2352         defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
2353 
2354         VmaDefragmentationStats stats = {};
2355         VmaDefragmentationContext ctx = VK_NULL_HANDLE;
2356         VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
2357         TEST(res >= VK_SUCCESS);
2358 
2359         res = VK_NOT_READY;
2360 
2361         std::vector<VmaDefragmentationPassMoveInfo> moveInfo;
2362         moveInfo.resize(movableAllocCount);
2363 
2364         MakeAdditionalAllocation();
2365 
2366         while(res == VK_NOT_READY)
2367         {
2368             VmaDefragmentationPassInfo stepInfo = {};
2369             stepInfo.pMoves = moveInfo.data();
2370             stepInfo.moveCount = (uint32_t)moveInfo.size();
2371 
2372             res = vmaBeginDefragmentationPass(g_hAllocator, ctx, &stepInfo);
2373             TEST(res >= VK_SUCCESS);
2374 
2375             MakeAdditionalAllocation();
2376 
2377             BeginSingleTimeCommands();
2378             ProcessDefragmentationStepInfo(stepInfo);
2379             EndSingleTimeCommands();
2380 
2381             res = vmaEndDefragmentationPass(g_hAllocator, ctx);
2382 
2383             // Destroy old buffers/images and replace them with new handles.
2384             for(size_t i = 0; i < stepInfo.moveCount; ++i)
2385             {
2386                 VmaAllocation const alloc = stepInfo.pMoves[i].allocation;
2387                 VmaAllocationInfo vmaAllocInfo;
2388                 vmaGetAllocationInfo(g_hAllocator, alloc, &vmaAllocInfo);
2389                 AllocInfo* allocInfo = (AllocInfo*)vmaAllocInfo.pUserData;
2390                 if(allocInfo->m_Buffer)
2391                 {
2392                     assert(allocInfo->m_NewBuffer && !allocInfo->m_Image && !allocInfo->m_NewImage);
2393                     vkDestroyBuffer(g_hDevice, allocInfo->m_Buffer, g_Allocs);
2394                     allocInfo->m_Buffer = allocInfo->m_NewBuffer;
2395                     allocInfo->m_NewBuffer = VK_NULL_HANDLE;
2396                 }
2397                 else if(allocInfo->m_Image)
2398                 {
2399                     assert(allocInfo->m_NewImage && !allocInfo->m_Buffer && !allocInfo->m_NewBuffer);
2400                     vkDestroyImage(g_hDevice, allocInfo->m_Image, g_Allocs);
2401                     allocInfo->m_Image = allocInfo->m_NewImage;
2402                     allocInfo->m_NewImage = VK_NULL_HANDLE;
2403                 }
2404                 else
2405                     assert(0);
2406             }
2407 
2408             MakeAdditionalAllocation();
2409         }
2410 
2411         TEST(res >= VK_SUCCESS);
2412         vmaDefragmentationEnd(g_hAllocator, ctx);
2413 
2414         // If corruption detection is enabled, GPU defragmentation may not work on
2415         // memory types that have this detection active, e.g. on Intel.
2416 #if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
2417         TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
2418         TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
2419 #endif
2420     }
2421 
2422     //ValidateGpuData(allocations.data(), allocations.size());
2423 
2424     swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_B_after.json");
2425     SaveAllocatorStatsToFile(fileName);
2426 
2427     // Destroy all remaining buffers.
2428     for(size_t i = allocations.size(); i--; )
2429     {
2430         allocations[i].Destroy();
2431     }
2432 
2433     for(size_t i = additionalAllocations.size(); i--; )
2434     {
2435         additionalAllocations[i].Destroy();
2436     }
2437 }
2438 
2439 
TestUserData()2440 static void TestUserData()
2441 {
2442     VkResult res;
2443 
2444     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2445     bufCreateInfo.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
2446     bufCreateInfo.size = 0x10000;
2447 
2448     for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
2449     {
2450         // Opaque pointer
2451         {
2452 
2453             void* numberAsPointer = (void*)(size_t)0xC2501FF3u;
2454             void* pointerToSomething = &res;
2455 
2456             VmaAllocationCreateInfo allocCreateInfo = {};
2457             allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2458             allocCreateInfo.pUserData = numberAsPointer;
2459             if(testIndex == 1)
2460                 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2461 
2462             VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo;
2463             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2464             TEST(res == VK_SUCCESS);
2465             TEST(allocInfo.pUserData = numberAsPointer);
2466 
2467             vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2468             TEST(allocInfo.pUserData == numberAsPointer);
2469 
2470             vmaSetAllocationUserData(g_hAllocator, alloc, pointerToSomething);
2471             vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2472             TEST(allocInfo.pUserData == pointerToSomething);
2473 
2474             vmaDestroyBuffer(g_hAllocator, buf, alloc);
2475         }
2476 
2477         // String
2478         {
2479             const char* name1 = "Buffer name \\\"\'<>&% \nSecond line .,;=";
2480             const char* name2 = "2";
2481             const size_t name1Len = strlen(name1);
2482 
2483             char* name1Buf = new char[name1Len + 1];
2484             strcpy_s(name1Buf, name1Len + 1, name1);
2485 
2486             VmaAllocationCreateInfo allocCreateInfo = {};
2487             allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2488             allocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT;
2489             allocCreateInfo.pUserData = name1Buf;
2490             if(testIndex == 1)
2491                 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2492 
2493             VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo;
2494             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2495             TEST(res == VK_SUCCESS);
2496             TEST(allocInfo.pUserData != nullptr && allocInfo.pUserData != name1Buf);
2497             TEST(strcmp(name1, (const char*)allocInfo.pUserData) == 0);
2498 
2499             delete[] name1Buf;
2500 
2501             vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2502             TEST(strcmp(name1, (const char*)allocInfo.pUserData) == 0);
2503 
2504             vmaSetAllocationUserData(g_hAllocator, alloc, (void*)name2);
2505             vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2506             TEST(strcmp(name2, (const char*)allocInfo.pUserData) == 0);
2507 
2508             vmaSetAllocationUserData(g_hAllocator, alloc, nullptr);
2509             vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2510             TEST(allocInfo.pUserData == nullptr);
2511 
2512             vmaDestroyBuffer(g_hAllocator, buf, alloc);
2513         }
2514     }
2515 }
2516 
TestInvalidAllocations()2517 static void TestInvalidAllocations()
2518 {
2519     VkResult res;
2520 
2521     VmaAllocationCreateInfo allocCreateInfo = {};
2522     allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2523 
2524     // Try to allocate 0 bytes.
2525     {
2526         VkMemoryRequirements memReq = {};
2527         memReq.size = 0; // !!!
2528         memReq.alignment = 4;
2529         memReq.memoryTypeBits = UINT32_MAX;
2530         VmaAllocation alloc = VK_NULL_HANDLE;
2531         res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
2532         TEST(res == VK_ERROR_INITIALIZATION_FAILED && alloc == VK_NULL_HANDLE);
2533     }
2534 
2535     // Try to create buffer with size = 0.
2536     {
2537         VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2538         bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2539         bufCreateInfo.size = 0; // !!!
2540         VkBuffer buf = VK_NULL_HANDLE;
2541         VmaAllocation alloc = VK_NULL_HANDLE;
2542         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr);
2543         TEST(res == VK_ERROR_INITIALIZATION_FAILED && buf == VK_NULL_HANDLE && alloc == VK_NULL_HANDLE);
2544     }
2545 
2546     // Try to create image with one dimension = 0.
2547     {
2548         VkImageCreateInfo imageCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2549         imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
2550         imageCreateInfo.format = VK_FORMAT_B8G8R8A8_UNORM;
2551         imageCreateInfo.extent.width = 128;
2552         imageCreateInfo.extent.height = 0; // !!!
2553         imageCreateInfo.extent.depth = 1;
2554         imageCreateInfo.mipLevels = 1;
2555         imageCreateInfo.arrayLayers = 1;
2556         imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
2557         imageCreateInfo.tiling = VK_IMAGE_TILING_LINEAR;
2558         imageCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
2559         imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
2560         VkImage image = VK_NULL_HANDLE;
2561         VmaAllocation alloc = VK_NULL_HANDLE;
2562         res = vmaCreateImage(g_hAllocator, &imageCreateInfo, &allocCreateInfo, &image, &alloc, nullptr);
2563         TEST(res == VK_ERROR_INITIALIZATION_FAILED && image == VK_NULL_HANDLE && alloc == VK_NULL_HANDLE);
2564     }
2565 }
2566 
TestMemoryRequirements()2567 static void TestMemoryRequirements()
2568 {
2569     VkResult res;
2570     VkBuffer buf;
2571     VmaAllocation alloc;
2572     VmaAllocationInfo allocInfo;
2573 
2574     const VkPhysicalDeviceMemoryProperties* memProps;
2575     vmaGetMemoryProperties(g_hAllocator, &memProps);
2576 
2577     VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2578     bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2579     bufInfo.size = 128;
2580 
2581     VmaAllocationCreateInfo allocCreateInfo = {};
2582 
2583     // No requirements.
2584     res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2585     TEST(res == VK_SUCCESS);
2586     vmaDestroyBuffer(g_hAllocator, buf, alloc);
2587 
2588     // Usage.
2589     allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2590     allocCreateInfo.requiredFlags = 0;
2591     allocCreateInfo.preferredFlags = 0;
2592     allocCreateInfo.memoryTypeBits = UINT32_MAX;
2593 
2594     res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2595     TEST(res == VK_SUCCESS);
2596     TEST(memProps->memoryTypes[allocInfo.memoryType].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
2597     vmaDestroyBuffer(g_hAllocator, buf, alloc);
2598 
2599     // Required flags, preferred flags.
2600     allocCreateInfo.usage = VMA_MEMORY_USAGE_UNKNOWN;
2601     allocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
2602     allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
2603     allocCreateInfo.memoryTypeBits = 0;
2604 
2605     res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2606     TEST(res == VK_SUCCESS);
2607     TEST(memProps->memoryTypes[allocInfo.memoryType].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
2608     TEST(memProps->memoryTypes[allocInfo.memoryType].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
2609     vmaDestroyBuffer(g_hAllocator, buf, alloc);
2610 
2611     // memoryTypeBits.
2612     const uint32_t memType = allocInfo.memoryType;
2613     allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2614     allocCreateInfo.requiredFlags = 0;
2615     allocCreateInfo.preferredFlags = 0;
2616     allocCreateInfo.memoryTypeBits = 1u << memType;
2617 
2618     res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2619     TEST(res == VK_SUCCESS);
2620     TEST(allocInfo.memoryType == memType);
2621     vmaDestroyBuffer(g_hAllocator, buf, alloc);
2622 
2623 }
2624 
TestGetAllocatorInfo()2625 static void TestGetAllocatorInfo()
2626 {
2627     wprintf(L"Test vnaGetAllocatorInfo\n");
2628 
2629     VmaAllocatorInfo allocInfo = {};
2630     vmaGetAllocatorInfo(g_hAllocator, &allocInfo);
2631     TEST(allocInfo.instance == g_hVulkanInstance);
2632     TEST(allocInfo.physicalDevice == g_hPhysicalDevice);
2633     TEST(allocInfo.device == g_hDevice);
2634 }
2635 
TestBasics()2636 static void TestBasics()
2637 {
2638     wprintf(L"Test basics\n");
2639 
2640     VkResult res;
2641 
2642     TestGetAllocatorInfo();
2643 
2644     TestMemoryRequirements();
2645 
2646     // Lost allocation
2647     {
2648         VmaAllocation alloc = VK_NULL_HANDLE;
2649         vmaCreateLostAllocation(g_hAllocator, &alloc);
2650         TEST(alloc != VK_NULL_HANDLE);
2651 
2652         VmaAllocationInfo allocInfo;
2653         vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2654         TEST(allocInfo.deviceMemory == VK_NULL_HANDLE);
2655         TEST(allocInfo.size == 0);
2656 
2657         vmaFreeMemory(g_hAllocator, alloc);
2658     }
2659 
2660     // Allocation that is MAPPED and not necessarily HOST_VISIBLE.
2661     {
2662         VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2663         bufCreateInfo.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
2664         bufCreateInfo.size = 128;
2665 
2666         VmaAllocationCreateInfo allocCreateInfo = {};
2667         allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2668         allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
2669 
2670         VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo;
2671         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2672         TEST(res == VK_SUCCESS);
2673 
2674         vmaDestroyBuffer(g_hAllocator, buf, alloc);
2675 
2676         // Same with OWN_MEMORY.
2677         allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2678 
2679         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2680         TEST(res == VK_SUCCESS);
2681 
2682         vmaDestroyBuffer(g_hAllocator, buf, alloc);
2683     }
2684 
2685     TestUserData();
2686 
2687     TestInvalidAllocations();
2688 }
2689 
TestVirtualBlocks()2690 static void TestVirtualBlocks()
2691 {
2692     wprintf(L"Test virtual blocks\n");
2693 
2694     const VkDeviceSize blockSize = 16 * MEGABYTE;
2695     const VkDeviceSize alignment = 256;
2696 
2697     // # Create block 16 MB
2698 
2699     VmaVirtualBlockCreateInfo blockCreateInfo = {};
2700     blockCreateInfo.pAllocationCallbacks = g_Allocs;
2701     blockCreateInfo.size = blockSize;
2702     VmaVirtualBlock block;
2703     TEST(vmaCreateVirtualBlock(&blockCreateInfo, &block) == VK_SUCCESS && block);
2704 
2705     // # Allocate 8 MB
2706 
2707     VmaVirtualAllocationCreateInfo allocCreateInfo = {};
2708     allocCreateInfo.alignment = alignment;
2709     allocCreateInfo.pUserData = (void*)(uintptr_t)1;
2710     allocCreateInfo.size = 8 * MEGABYTE;
2711     VkDeviceSize alloc0Offset;
2712     TEST(vmaVirtualAllocate(block, &allocCreateInfo, &alloc0Offset) == VK_SUCCESS);
2713     TEST(alloc0Offset < blockSize);
2714 
2715     // # Validate the allocation
2716 
2717     VmaVirtualAllocationInfo allocInfo = {};
2718     vmaGetVirtualAllocationInfo(block, alloc0Offset, &allocInfo);
2719     TEST(allocInfo.size == allocCreateInfo.size);
2720     TEST(allocInfo.pUserData = allocCreateInfo.pUserData);
2721 
2722     // # Check SetUserData
2723 
2724     vmaSetVirtualAllocationUserData(block, alloc0Offset, (void*)(uintptr_t)2);
2725     vmaGetVirtualAllocationInfo(block, alloc0Offset, &allocInfo);
2726     TEST(allocInfo.pUserData = (void*)(uintptr_t)2);
2727 
2728     // # Allocate 4 MB
2729 
2730     allocCreateInfo.size = 4 * MEGABYTE;
2731     UINT64 alloc1Offset;
2732     TEST(vmaVirtualAllocate(block, &allocCreateInfo, &alloc1Offset) == VK_SUCCESS);
2733     TEST(alloc1Offset < blockSize);
2734     TEST(alloc1Offset + 4 * MEGABYTE <= alloc0Offset || alloc0Offset + 8 * MEGABYTE <= alloc1Offset); // Check if they don't overlap.
2735 
2736     // # Allocate another 8 MB - it should fail
2737 
2738     allocCreateInfo.size = 8 * MEGABYTE;
2739     UINT64 alloc2Offset;
2740     TEST(vmaVirtualAllocate(block, &allocCreateInfo, &alloc2Offset) < 0);
2741     TEST(alloc2Offset == VK_WHOLE_SIZE);
2742 
2743     // # Free the 4 MB block. Now allocation of 8 MB should succeed.
2744 
2745     vmaVirtualFree(block, alloc1Offset);
2746     TEST(vmaVirtualAllocate(block, &allocCreateInfo, &alloc2Offset) == VK_SUCCESS);
2747     TEST(alloc2Offset < blockSize);
2748     TEST(alloc2Offset + 4 * MEGABYTE <= alloc0Offset || alloc0Offset + 8 * MEGABYTE <= alloc2Offset); // Check if they don't overlap.
2749 
2750     // # Calculate statistics
2751 
2752     VmaStatInfo statInfo = {};
2753     vmaCalculateVirtualBlockStats(block, &statInfo);
2754     TEST(statInfo.allocationCount == 2);
2755     TEST(statInfo.blockCount == 1);
2756     TEST(statInfo.usedBytes == blockSize);
2757     TEST(statInfo.unusedBytes + statInfo.usedBytes == blockSize);
2758 
2759     // # Generate JSON dump
2760 
2761     char* json = nullptr;
2762     vmaBuildVirtualBlockStatsString(block, &json, VK_TRUE);
2763     {
2764         std::string str(json);
2765         TEST( str.find("\"UserData\": \"0000000000000001\"") != std::string::npos );
2766         TEST( str.find("\"UserData\": \"0000000000000002\"") != std::string::npos );
2767     }
2768     vmaFreeVirtualBlockStatsString(block, json);
2769 
2770     // # Free alloc0, leave alloc2 unfreed.
2771 
2772     vmaVirtualFree(block, alloc0Offset);
2773 
2774     // # Test alignment
2775 
2776     {
2777         constexpr size_t allocCount = 10;
2778         VkDeviceSize allocOffset[allocCount] = {};
2779         for(size_t i = 0; i < allocCount; ++i)
2780         {
2781             const bool alignment0 = i == allocCount - 1;
2782             allocCreateInfo.size = i * 3 + 15;
2783             allocCreateInfo.alignment = alignment0 ? 0 : 8;
2784             TEST(vmaVirtualAllocate(block, &allocCreateInfo, &allocOffset[i]) == VK_SUCCESS);
2785             if(!alignment0)
2786             {
2787                 TEST(allocOffset[i] % allocCreateInfo.alignment == 0);
2788             }
2789         }
2790 
2791         for(size_t i = allocCount; i--; )
2792         {
2793             vmaVirtualFree(block, allocOffset[i]);
2794         }
2795     }
2796 
2797     // # Final cleanup
2798 
2799     vmaVirtualFree(block, alloc2Offset);
2800     vmaDestroyVirtualBlock(block);
2801 
2802     {
2803         // Another virtual block, using Clear this time.
2804         TEST(vmaCreateVirtualBlock(&blockCreateInfo, &block) == VK_SUCCESS);
2805 
2806         allocCreateInfo = VmaVirtualAllocationCreateInfo{};
2807         allocCreateInfo.size = MEGABYTE;
2808 
2809         for(size_t i = 0; i < 8; ++i)
2810         {
2811             VkDeviceSize offset = 0;
2812             TEST(vmaVirtualAllocate(block, &allocCreateInfo, &offset) == VK_SUCCESS);
2813         }
2814 
2815         vmaClearVirtualBlock(block);
2816         vmaDestroyVirtualBlock(block);
2817     }
2818 }
2819 
TestVirtualBlocksAlgorithms()2820 static void TestVirtualBlocksAlgorithms()
2821 {
2822     wprintf(L"Test virtual blocks algorithms\n");
2823 
2824     RandomNumberGenerator rand{3454335};
2825     auto calcRandomAllocSize = [&rand]() -> VkDeviceSize { return rand.Generate() % 20 + 5; };
2826 
2827     for(size_t algorithmIndex = 0; algorithmIndex < 3; ++algorithmIndex)
2828     {
2829         // Create the block
2830         VmaVirtualBlockCreateInfo blockCreateInfo = {};
2831         blockCreateInfo.pAllocationCallbacks = g_Allocs;
2832         blockCreateInfo.size = 10'000;
2833         switch(algorithmIndex)
2834         {
2835         case 1: blockCreateInfo.flags = VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT; break;
2836         case 2: blockCreateInfo.flags = VMA_VIRTUAL_BLOCK_CREATE_BUDDY_ALGORITHM_BIT; break;
2837         }
2838         VmaVirtualBlock block = nullptr;
2839         VkResult res = vmaCreateVirtualBlock(&blockCreateInfo, &block);
2840         TEST(res == VK_SUCCESS);
2841 
2842         struct AllocData
2843         {
2844             VkDeviceSize offset, requestedSize, allocationSize;
2845         };
2846         std::vector<AllocData> allocations;
2847 
2848         // Make some allocations
2849         for(size_t i = 0; i < 20; ++i)
2850         {
2851             VmaVirtualAllocationCreateInfo allocCreateInfo = {};
2852             allocCreateInfo.size = calcRandomAllocSize();
2853             allocCreateInfo.pUserData = (void*)(uintptr_t)(allocCreateInfo.size * 10);
2854             if(i < 10) { }
2855             else if(i < 12) allocCreateInfo.flags = VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT;
2856             else if(i < 14) allocCreateInfo.flags = VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT;
2857             else if(i < 16) allocCreateInfo.flags = VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_FRAGMENTATION_BIT;
2858             else if(i < 18 && algorithmIndex == 1) allocCreateInfo.flags = VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
2859 
2860             AllocData alloc = {};
2861             alloc.requestedSize = allocCreateInfo.size;
2862             res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc.offset);
2863             TEST(res == VK_SUCCESS);
2864 
2865             VmaVirtualAllocationInfo allocInfo;
2866             vmaGetVirtualAllocationInfo(block, alloc.offset, &allocInfo);
2867             TEST(allocInfo.size >= allocCreateInfo.size);
2868             alloc.allocationSize = allocInfo.size;
2869 
2870             allocations.push_back(alloc);
2871         }
2872 
2873         // Free some of the allocations
2874         for(size_t i = 0; i < 5; ++i)
2875         {
2876             const size_t index = rand.Generate() % allocations.size();
2877             vmaVirtualFree(block, allocations[index].offset);
2878             allocations.erase(allocations.begin() + index);
2879         }
2880 
2881         // Allocate some more
2882         for(size_t i = 0; i < 6; ++i)
2883         {
2884             VmaVirtualAllocationCreateInfo allocCreateInfo = {};
2885             allocCreateInfo.size = calcRandomAllocSize();
2886             allocCreateInfo.pUserData = (void*)(uintptr_t)(allocCreateInfo.size * 10);
2887 
2888             AllocData alloc = {};
2889             alloc.requestedSize = allocCreateInfo.size;
2890             res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc.offset);
2891             TEST(res == VK_SUCCESS);
2892 
2893             VmaVirtualAllocationInfo allocInfo;
2894             vmaGetVirtualAllocationInfo(block, alloc.offset, &allocInfo);
2895             TEST(allocInfo.size >= allocCreateInfo.size);
2896             alloc.allocationSize = allocInfo.size;
2897 
2898             allocations.push_back(alloc);
2899         }
2900 
2901         // Allocate some with extra alignment
2902         for(size_t i = 0; i < 3; ++i)
2903         {
2904             VmaVirtualAllocationCreateInfo allocCreateInfo = {};
2905             allocCreateInfo.size = calcRandomAllocSize();
2906             allocCreateInfo.alignment = 16;
2907             allocCreateInfo.pUserData = (void*)(uintptr_t)(allocCreateInfo.size * 10);
2908 
2909             AllocData alloc = {};
2910             alloc.requestedSize = allocCreateInfo.size;
2911             res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc.offset);
2912             TEST(res == VK_SUCCESS);
2913             TEST(alloc.offset % 16 == 0);
2914 
2915             VmaVirtualAllocationInfo allocInfo;
2916             vmaGetVirtualAllocationInfo(block, alloc.offset, &allocInfo);
2917             TEST(allocInfo.size >= allocCreateInfo.size);
2918             alloc.allocationSize = allocInfo.size;
2919 
2920             allocations.push_back(alloc);
2921         }
2922 
2923         // Check if the allocations don't overlap
2924         std::sort(allocations.begin(), allocations.end(), [](const AllocData& lhs, const AllocData& rhs) {
2925             return lhs.offset < rhs.offset; });
2926         for(size_t i = 0; i < allocations.size() - 1; ++i)
2927         {
2928             TEST(allocations[i+1].offset >= allocations[i].offset + allocations[i].allocationSize);
2929         }
2930 
2931         // Check pUserData
2932         {
2933             const AllocData& alloc = allocations.back();
2934             VmaVirtualAllocationInfo allocInfo = {};
2935             vmaGetVirtualAllocationInfo(block, alloc.offset, &allocInfo);
2936             TEST((uintptr_t)allocInfo.pUserData == alloc.requestedSize * 10);
2937 
2938             vmaSetVirtualAllocationUserData(block, alloc.offset, (void*)(uintptr_t)666);
2939             vmaGetVirtualAllocationInfo(block, alloc.offset, &allocInfo);
2940             TEST((uintptr_t)allocInfo.pUserData == 666);
2941         }
2942 
2943         // Calculate statistics
2944         {
2945             VkDeviceSize actualAllocSizeMin = VK_WHOLE_SIZE, actualAllocSizeMax = 0, actualAllocSizeSum = 0;
2946             std::for_each(allocations.begin(), allocations.end(), [&](const AllocData& a) {
2947                 actualAllocSizeMin = std::min(actualAllocSizeMin, a.allocationSize);
2948                 actualAllocSizeMax = std::max(actualAllocSizeMax, a.allocationSize);
2949                 actualAllocSizeSum += a.allocationSize;
2950             });
2951 
2952             VmaStatInfo statInfo = {};
2953             vmaCalculateVirtualBlockStats(block, &statInfo);
2954             TEST(statInfo.allocationCount == allocations.size());
2955             TEST(statInfo.blockCount == 1);
2956             TEST(statInfo.usedBytes + statInfo.unusedBytes == blockCreateInfo.size);
2957             TEST(statInfo.allocationSizeMax == actualAllocSizeMax);
2958             TEST(statInfo.allocationSizeMin == actualAllocSizeMin);
2959             TEST(statInfo.usedBytes >= actualAllocSizeSum);
2960         }
2961 
2962         // Build JSON dump string
2963         {
2964             char* json = nullptr;
2965             vmaBuildVirtualBlockStatsString(block, &json, VK_TRUE);
2966             int I = 0; // put a breakpoint here to debug
2967             vmaFreeVirtualBlockStatsString(block, json);
2968         }
2969 
2970         // Final cleanup
2971         vmaClearVirtualBlock(block);
2972         vmaDestroyVirtualBlock(block);
2973     }
2974 }
2975 
TestAllocationVersusResourceSize()2976 static void TestAllocationVersusResourceSize()
2977 {
2978     wprintf(L"Test allocation versus resource size\n");
2979 
2980     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2981     bufCreateInfo.size = 22921; // Prime number
2982     bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
2983 
2984     VmaAllocationCreateInfo allocCreateInfo = {};
2985     allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2986 
2987     for(uint32_t i = 0; i < 2; ++i)
2988     {
2989         allocCreateInfo.flags = (i == 1) ? VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT : 0;
2990 
2991         AllocInfo info;
2992         info.CreateBuffer(bufCreateInfo, allocCreateInfo);
2993 
2994         VmaAllocationInfo allocInfo = {};
2995         vmaGetAllocationInfo(g_hAllocator, info.m_Allocation, &allocInfo);
2996         //wprintf(L"  Buffer size = %llu, allocation size = %llu\n", bufCreateInfo.size, allocInfo.size);
2997 
2998         // Map and test accessing entire area of the allocation, not only the buffer.
2999         void* mappedPtr = nullptr;
3000         VkResult res = vmaMapMemory(g_hAllocator, info.m_Allocation, &mappedPtr);
3001         TEST(res == VK_SUCCESS);
3002 
3003         memset(mappedPtr, 0xCC, (size_t)allocInfo.size);
3004 
3005         vmaUnmapMemory(g_hAllocator, info.m_Allocation);
3006 
3007         info.Destroy();
3008     }
3009 }
3010 
TestPool_MinBlockCount()3011 static void TestPool_MinBlockCount()
3012 {
3013 #if defined(VMA_DEBUG_MARGIN) && VMA_DEBUG_MARGIN > 0
3014     return;
3015 #endif
3016 
3017     wprintf(L"Test Pool MinBlockCount\n");
3018     VkResult res;
3019 
3020     static const VkDeviceSize ALLOC_SIZE = 512ull * 1024;
3021     static const VkDeviceSize BLOCK_SIZE = ALLOC_SIZE * 2; // Each block can fit 2 allocations.
3022 
3023     VmaAllocationCreateInfo allocCreateInfo = {};
3024     allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_COPY;
3025 
3026     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3027     bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
3028     bufCreateInfo.size = ALLOC_SIZE;
3029 
3030     VmaPoolCreateInfo poolCreateInfo = {};
3031     poolCreateInfo.blockSize = BLOCK_SIZE;
3032     poolCreateInfo.minBlockCount = 2; // At least 2 blocks always present.
3033     res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3034     TEST(res == VK_SUCCESS);
3035 
3036     VmaPool pool = VK_NULL_HANDLE;
3037     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3038     TEST(res == VK_SUCCESS && pool != VK_NULL_HANDLE);
3039 
3040     // Check that there are 2 blocks preallocated as requested.
3041     VmaPoolStats begPoolStats = {};
3042     vmaGetPoolStats(g_hAllocator, pool, &begPoolStats);
3043     TEST(begPoolStats.blockCount == 2 && begPoolStats.allocationCount == 0 && begPoolStats.size == BLOCK_SIZE * 2);
3044 
3045     // Allocate 5 buffers to create 3 blocks.
3046     static const uint32_t BUF_COUNT = 5;
3047     allocCreateInfo.pool = pool;
3048     std::vector<AllocInfo> allocs(BUF_COUNT);
3049     for(uint32_t i = 0; i < BUF_COUNT; ++i)
3050     {
3051         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &allocs[i].m_Buffer, &allocs[i].m_Allocation, nullptr);
3052         TEST(res == VK_SUCCESS && allocs[i].m_Buffer != VK_NULL_HANDLE && allocs[i].m_Allocation != VK_NULL_HANDLE);
3053     }
3054 
3055     // Check that there are really 3 blocks.
3056     VmaPoolStats poolStats2 = {};
3057     vmaGetPoolStats(g_hAllocator, pool, &poolStats2);
3058     TEST(poolStats2.blockCount == 3 && poolStats2.allocationCount == BUF_COUNT && poolStats2.size == BLOCK_SIZE * 3);
3059 
3060     // Free two first allocations to make one block empty.
3061     allocs[0].Destroy();
3062     allocs[1].Destroy();
3063 
3064     // Check that there are still 3 blocks due to hysteresis.
3065     VmaPoolStats poolStats3 = {};
3066     vmaGetPoolStats(g_hAllocator, pool, &poolStats3);
3067     TEST(poolStats3.blockCount == 3 && poolStats3.allocationCount == BUF_COUNT - 2 && poolStats2.size == BLOCK_SIZE * 3);
3068 
3069     // Free the last allocation to make second block empty.
3070     allocs[BUF_COUNT - 1].Destroy();
3071 
3072     // Check that there are now 2 blocks only.
3073     VmaPoolStats poolStats4 = {};
3074     vmaGetPoolStats(g_hAllocator, pool, &poolStats4);
3075     TEST(poolStats4.blockCount == 2 && poolStats4.allocationCount == BUF_COUNT - 3 && poolStats4.size == BLOCK_SIZE * 2);
3076 
3077     // Cleanup.
3078     for(size_t i = allocs.size(); i--; )
3079     {
3080         allocs[i].Destroy();
3081     }
3082     vmaDestroyPool(g_hAllocator, pool);
3083 }
3084 
TestPool_MinAllocationAlignment()3085 static void TestPool_MinAllocationAlignment()
3086 {
3087     wprintf(L"Test Pool MinAllocationAlignment\n");
3088     VkResult res;
3089 
3090     static const VkDeviceSize ALLOC_SIZE = 32;
3091     static const VkDeviceSize BLOCK_SIZE = 1024 * 1024;
3092     static const VkDeviceSize MIN_ALLOCATION_ALIGNMENT = 64 * 1024;
3093 
3094     VmaAllocationCreateInfo allocCreateInfo = {};
3095     allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_COPY;
3096 
3097     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3098     bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
3099     bufCreateInfo.size = ALLOC_SIZE;
3100 
3101     VmaPoolCreateInfo poolCreateInfo = {};
3102     poolCreateInfo.blockSize = BLOCK_SIZE;
3103     poolCreateInfo.minAllocationAlignment = MIN_ALLOCATION_ALIGNMENT;
3104     res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3105     TEST(res == VK_SUCCESS);
3106 
3107     VmaPool pool = VK_NULL_HANDLE;
3108     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3109     TEST(res == VK_SUCCESS && pool != VK_NULL_HANDLE);
3110 
3111     static const uint32_t BUF_COUNT = 4;
3112     allocCreateInfo = {};
3113     allocCreateInfo.pool = pool;
3114     std::vector<AllocInfo> allocs(BUF_COUNT);
3115     for(uint32_t i = 0; i < BUF_COUNT; ++i)
3116     {
3117         VmaAllocationInfo allocInfo = {};
3118         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &allocs[i].m_Buffer, &allocs[i].m_Allocation, &allocInfo);
3119         TEST(res == VK_SUCCESS && allocs[i].m_Buffer != VK_NULL_HANDLE && allocs[i].m_Allocation != VK_NULL_HANDLE);
3120         TEST(allocInfo.offset % MIN_ALLOCATION_ALIGNMENT == 0);
3121     }
3122 
3123     // Cleanup.
3124     for(size_t i = allocs.size(); i--; )
3125     {
3126         allocs[i].Destroy();
3127     }
3128     vmaDestroyPool(g_hAllocator, pool);
3129 }
3130 
TestHeapSizeLimit()3131 void TestHeapSizeLimit()
3132 {
3133     const VkDeviceSize HEAP_SIZE_LIMIT = 100ull * 1024 * 1024; // 100 MB
3134     const VkDeviceSize BLOCK_SIZE      =  10ull * 1024 * 1024; // 10 MB
3135 
3136     VkDeviceSize heapSizeLimit[VK_MAX_MEMORY_HEAPS];
3137     for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i)
3138     {
3139         heapSizeLimit[i] = HEAP_SIZE_LIMIT;
3140     }
3141 
3142     VmaAllocatorCreateInfo allocatorCreateInfo = {};
3143     allocatorCreateInfo.physicalDevice = g_hPhysicalDevice;
3144     allocatorCreateInfo.device = g_hDevice;
3145     allocatorCreateInfo.instance = g_hVulkanInstance;
3146     allocatorCreateInfo.pHeapSizeLimit = heapSizeLimit;
3147 
3148     VmaAllocator hAllocator;
3149     VkResult res = vmaCreateAllocator(&allocatorCreateInfo, &hAllocator);
3150     TEST(res == VK_SUCCESS);
3151 
3152     struct Item
3153     {
3154         VkBuffer hBuf;
3155         VmaAllocation hAlloc;
3156     };
3157     std::vector<Item> items;
3158 
3159     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3160     bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
3161 
3162     // 1. Allocate two blocks of dedicated memory, half the size of BLOCK_SIZE.
3163     VmaAllocationInfo dedicatedAllocInfo;
3164     {
3165         VmaAllocationCreateInfo allocCreateInfo = {};
3166         allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
3167         allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
3168 
3169         bufCreateInfo.size = BLOCK_SIZE / 2;
3170 
3171         for(size_t i = 0; i < 2; ++i)
3172         {
3173             Item item;
3174             res = vmaCreateBuffer(hAllocator, &bufCreateInfo, &allocCreateInfo, &item.hBuf, &item.hAlloc, &dedicatedAllocInfo);
3175             TEST(res == VK_SUCCESS);
3176             items.push_back(item);
3177         }
3178     }
3179 
3180     // Create pool to make sure allocations must be out of this memory type.
3181     VmaPoolCreateInfo poolCreateInfo = {};
3182     poolCreateInfo.memoryTypeIndex = dedicatedAllocInfo.memoryType;
3183     poolCreateInfo.blockSize = BLOCK_SIZE;
3184 
3185     VmaPool hPool;
3186     res = vmaCreatePool(hAllocator, &poolCreateInfo, &hPool);
3187     TEST(res == VK_SUCCESS);
3188 
3189     // 2. Allocate normal buffers from all the remaining memory.
3190     {
3191         VmaAllocationCreateInfo allocCreateInfo = {};
3192         allocCreateInfo.pool = hPool;
3193 
3194         bufCreateInfo.size = BLOCK_SIZE / 2;
3195 
3196         const size_t bufCount = ((HEAP_SIZE_LIMIT / BLOCK_SIZE) - 1) * 2;
3197         for(size_t i = 0; i < bufCount; ++i)
3198         {
3199             Item item;
3200             res = vmaCreateBuffer(hAllocator, &bufCreateInfo, &allocCreateInfo, &item.hBuf, &item.hAlloc, nullptr);
3201             TEST(res == VK_SUCCESS);
3202             items.push_back(item);
3203         }
3204     }
3205 
3206     // 3. Allocation of one more (even small) buffer should fail.
3207     {
3208         VmaAllocationCreateInfo allocCreateInfo = {};
3209         allocCreateInfo.pool = hPool;
3210 
3211         bufCreateInfo.size = 128;
3212 
3213         VkBuffer hBuf;
3214         VmaAllocation hAlloc;
3215         res = vmaCreateBuffer(hAllocator, &bufCreateInfo, &allocCreateInfo, &hBuf, &hAlloc, nullptr);
3216         TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY);
3217     }
3218 
3219     // Destroy everything.
3220     for(size_t i = items.size(); i--; )
3221     {
3222         vmaDestroyBuffer(hAllocator, items[i].hBuf, items[i].hAlloc);
3223     }
3224 
3225     vmaDestroyPool(hAllocator, hPool);
3226 
3227     vmaDestroyAllocator(hAllocator);
3228 }
3229 
3230 #if VMA_DEBUG_MARGIN
TestDebugMargin()3231 static void TestDebugMargin()
3232 {
3233     if(VMA_DEBUG_MARGIN == 0)
3234     {
3235         return;
3236     }
3237 
3238     VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3239     bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
3240 
3241     VmaAllocationCreateInfo allocCreateInfo = {};
3242     allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
3243 
3244     // Create few buffers of different size.
3245     const size_t BUF_COUNT = 10;
3246     BufferInfo buffers[BUF_COUNT];
3247     VmaAllocationInfo allocInfo[BUF_COUNT];
3248     for(size_t i = 0; i < 10; ++i)
3249     {
3250         bufInfo.size = (VkDeviceSize)(i + 1) * 64;
3251         // Last one will be mapped.
3252         allocCreateInfo.flags = (i == BUF_COUNT - 1) ? VMA_ALLOCATION_CREATE_MAPPED_BIT : 0;
3253 
3254         VkResult res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buffers[i].Buffer, &buffers[i].Allocation, &allocInfo[i]);
3255         TEST(res == VK_SUCCESS);
3256         // Margin is preserved also at the beginning of a block.
3257         TEST(allocInfo[i].offset >= VMA_DEBUG_MARGIN);
3258 
3259         if(i == BUF_COUNT - 1)
3260         {
3261             // Fill with data.
3262             TEST(allocInfo[i].pMappedData != nullptr);
3263             // Uncomment this "+ 1" to overwrite past end of allocation and check corruption detection.
3264             memset(allocInfo[i].pMappedData, 0xFF, bufInfo.size /* + 1 */);
3265         }
3266     }
3267 
3268     // Check if their offsets preserve margin between them.
3269     std::sort(allocInfo, allocInfo + BUF_COUNT, [](const VmaAllocationInfo& lhs, const VmaAllocationInfo& rhs) -> bool
3270     {
3271         if(lhs.deviceMemory != rhs.deviceMemory)
3272         {
3273             return lhs.deviceMemory < rhs.deviceMemory;
3274         }
3275         return lhs.offset < rhs.offset;
3276     });
3277     for(size_t i = 1; i < BUF_COUNT; ++i)
3278     {
3279         if(allocInfo[i].deviceMemory == allocInfo[i - 1].deviceMemory)
3280         {
3281             TEST(allocInfo[i].offset >= allocInfo[i - 1].offset + VMA_DEBUG_MARGIN);
3282         }
3283     }
3284 
3285     VkResult res = vmaCheckCorruption(g_hAllocator, UINT32_MAX);
3286     TEST(res == VK_SUCCESS);
3287 
3288     // Destroy all buffers.
3289     for(size_t i = BUF_COUNT; i--; )
3290     {
3291         vmaDestroyBuffer(g_hAllocator, buffers[i].Buffer, buffers[i].Allocation);
3292     }
3293 }
3294 #endif
3295 
TestLinearAllocator()3296 static void TestLinearAllocator()
3297 {
3298     wprintf(L"Test linear allocator\n");
3299 
3300     RandomNumberGenerator rand{645332};
3301 
3302     VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3303     sampleBufCreateInfo.size = 1024; // Whatever.
3304     sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
3305 
3306     VmaAllocationCreateInfo sampleAllocCreateInfo = {};
3307     sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
3308 
3309     VmaPoolCreateInfo poolCreateInfo = {};
3310     VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3311     TEST(res == VK_SUCCESS);
3312 
3313     poolCreateInfo.blockSize = 1024 * 300;
3314     poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
3315     poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
3316 
3317     VmaPool pool = nullptr;
3318     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3319     TEST(res == VK_SUCCESS);
3320 
3321     VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
3322 
3323     VmaAllocationCreateInfo allocCreateInfo = {};
3324     allocCreateInfo.pool = pool;
3325 
3326     constexpr size_t maxBufCount = 100;
3327     std::vector<BufferInfo> bufInfo;
3328 
3329     constexpr VkDeviceSize bufSizeMin = 16;
3330     constexpr VkDeviceSize bufSizeMax = 1024;
3331     VmaAllocationInfo allocInfo;
3332     VkDeviceSize prevOffset = 0;
3333 
3334     // Test one-time free.
3335     for(size_t i = 0; i < 2; ++i)
3336     {
3337         // Allocate number of buffers of varying size that surely fit into this block.
3338         VkDeviceSize bufSumSize = 0;
3339         for(size_t i = 0; i < maxBufCount; ++i)
3340         {
3341 			bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3342             BufferInfo newBufInfo;
3343             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3344                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3345             TEST(res == VK_SUCCESS);
3346             TEST(i == 0 || allocInfo.offset > prevOffset);
3347             bufInfo.push_back(newBufInfo);
3348             prevOffset = allocInfo.offset;
3349             bufSumSize += bufCreateInfo.size;
3350         }
3351 
3352         // Validate pool stats.
3353         VmaPoolStats stats;
3354         vmaGetPoolStats(g_hAllocator, pool, &stats);
3355         TEST(stats.size == poolCreateInfo.blockSize);
3356         TEST(stats.unusedSize = poolCreateInfo.blockSize - bufSumSize);
3357         TEST(stats.allocationCount == bufInfo.size());
3358 
3359         // Destroy the buffers in random order.
3360         while(!bufInfo.empty())
3361         {
3362             const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3363             const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3364             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3365             bufInfo.erase(bufInfo.begin() + indexToDestroy);
3366         }
3367     }
3368 
3369     // Test stack.
3370     {
3371         // Allocate number of buffers of varying size that surely fit into this block.
3372         for(size_t i = 0; i < maxBufCount; ++i)
3373         {
3374             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3375             BufferInfo newBufInfo;
3376             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3377                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3378             TEST(res == VK_SUCCESS);
3379             TEST(i == 0 || allocInfo.offset > prevOffset);
3380             bufInfo.push_back(newBufInfo);
3381             prevOffset = allocInfo.offset;
3382         }
3383 
3384         // Destroy few buffers from top of the stack.
3385         for(size_t i = 0; i < maxBufCount / 5; ++i)
3386         {
3387             const BufferInfo& currBufInfo = bufInfo.back();
3388             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3389             bufInfo.pop_back();
3390         }
3391 
3392         // Create some more
3393         for(size_t i = 0; i < maxBufCount / 5; ++i)
3394         {
3395             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3396             BufferInfo newBufInfo;
3397             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3398                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3399             TEST(res == VK_SUCCESS);
3400             TEST(i == 0 || allocInfo.offset > prevOffset);
3401             bufInfo.push_back(newBufInfo);
3402             prevOffset = allocInfo.offset;
3403         }
3404 
3405         // Destroy the buffers in reverse order.
3406         while(!bufInfo.empty())
3407         {
3408             const BufferInfo& currBufInfo = bufInfo.back();
3409             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3410             bufInfo.pop_back();
3411         }
3412     }
3413 
3414     // Test ring buffer.
3415     {
3416         // Allocate number of buffers that surely fit into this block.
3417         bufCreateInfo.size = bufSizeMax;
3418         for(size_t i = 0; i < maxBufCount; ++i)
3419         {
3420             BufferInfo newBufInfo;
3421             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3422                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3423             TEST(res == VK_SUCCESS);
3424             TEST(i == 0 || allocInfo.offset > prevOffset);
3425             bufInfo.push_back(newBufInfo);
3426             prevOffset = allocInfo.offset;
3427         }
3428 
3429         // Free and allocate new buffers so many times that we make sure we wrap-around at least once.
3430         const size_t buffersPerIter = maxBufCount / 10 - 1;
3431         const size_t iterCount = poolCreateInfo.blockSize / bufCreateInfo.size / buffersPerIter * 2;
3432         for(size_t iter = 0; iter < iterCount; ++iter)
3433         {
3434             for(size_t bufPerIter = 0; bufPerIter < buffersPerIter; ++bufPerIter)
3435             {
3436                 const BufferInfo& currBufInfo = bufInfo.front();
3437                 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3438                 bufInfo.erase(bufInfo.begin());
3439             }
3440             for(size_t bufPerIter = 0; bufPerIter < buffersPerIter; ++bufPerIter)
3441             {
3442                 BufferInfo newBufInfo;
3443                 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3444                     &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3445                 TEST(res == VK_SUCCESS);
3446                 bufInfo.push_back(newBufInfo);
3447             }
3448         }
3449 
3450         // Allocate buffers until we reach out-of-memory.
3451         uint32_t debugIndex = 0;
3452         while(res == VK_SUCCESS)
3453         {
3454             BufferInfo newBufInfo;
3455             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3456                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3457             if(res == VK_SUCCESS)
3458             {
3459                 bufInfo.push_back(newBufInfo);
3460             }
3461             else
3462             {
3463                 TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY);
3464             }
3465             ++debugIndex;
3466         }
3467 
3468         // Destroy the buffers in random order.
3469         while(!bufInfo.empty())
3470         {
3471             const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3472             const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3473             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3474             bufInfo.erase(bufInfo.begin() + indexToDestroy);
3475         }
3476     }
3477 
3478     // Test double stack.
3479     {
3480         // Allocate number of buffers of varying size that surely fit into this block, alternate from bottom/top.
3481         VkDeviceSize prevOffsetLower = 0;
3482         VkDeviceSize prevOffsetUpper = poolCreateInfo.blockSize;
3483         for(size_t i = 0; i < maxBufCount; ++i)
3484         {
3485             const bool upperAddress = (i % 2) != 0;
3486             if(upperAddress)
3487                 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3488             else
3489                 allocCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3490             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3491             BufferInfo newBufInfo;
3492             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3493                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3494             TEST(res == VK_SUCCESS);
3495             if(upperAddress)
3496             {
3497                 TEST(allocInfo.offset < prevOffsetUpper);
3498                 prevOffsetUpper = allocInfo.offset;
3499             }
3500             else
3501             {
3502                 TEST(allocInfo.offset >= prevOffsetLower);
3503                 prevOffsetLower = allocInfo.offset;
3504             }
3505             TEST(prevOffsetLower < prevOffsetUpper);
3506             bufInfo.push_back(newBufInfo);
3507         }
3508 
3509         // Destroy few buffers from top of the stack.
3510         for(size_t i = 0; i < maxBufCount / 5; ++i)
3511         {
3512             const BufferInfo& currBufInfo = bufInfo.back();
3513             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3514             bufInfo.pop_back();
3515         }
3516 
3517         // Create some more
3518         for(size_t i = 0; i < maxBufCount / 5; ++i)
3519         {
3520             const bool upperAddress = (i % 2) != 0;
3521             if(upperAddress)
3522                 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3523             else
3524                 allocCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3525             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3526             BufferInfo newBufInfo;
3527             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3528                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3529             TEST(res == VK_SUCCESS);
3530             bufInfo.push_back(newBufInfo);
3531         }
3532 
3533         // Destroy the buffers in reverse order.
3534         while(!bufInfo.empty())
3535         {
3536             const BufferInfo& currBufInfo = bufInfo.back();
3537             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3538             bufInfo.pop_back();
3539         }
3540 
3541         // Create buffers on both sides until we reach out of memory.
3542         prevOffsetLower = 0;
3543         prevOffsetUpper = poolCreateInfo.blockSize;
3544         res = VK_SUCCESS;
3545         for(size_t i = 0; res == VK_SUCCESS; ++i)
3546         {
3547             const bool upperAddress = (i % 2) != 0;
3548             if(upperAddress)
3549                 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3550             else
3551                 allocCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3552             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3553             BufferInfo newBufInfo;
3554             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3555                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3556             if(res == VK_SUCCESS)
3557             {
3558                 if(upperAddress)
3559                 {
3560                     TEST(allocInfo.offset < prevOffsetUpper);
3561                     prevOffsetUpper = allocInfo.offset;
3562                 }
3563                 else
3564                 {
3565                     TEST(allocInfo.offset >= prevOffsetLower);
3566                     prevOffsetLower = allocInfo.offset;
3567                 }
3568                 TEST(prevOffsetLower < prevOffsetUpper);
3569                 bufInfo.push_back(newBufInfo);
3570             }
3571         }
3572 
3573         // Destroy the buffers in random order.
3574         while(!bufInfo.empty())
3575         {
3576             const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3577             const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3578             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3579             bufInfo.erase(bufInfo.begin() + indexToDestroy);
3580         }
3581 
3582         // Create buffers on upper side only, constant size, until we reach out of memory.
3583         prevOffsetUpper = poolCreateInfo.blockSize;
3584         res = VK_SUCCESS;
3585         allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3586         bufCreateInfo.size = bufSizeMax;
3587         for(size_t i = 0; res == VK_SUCCESS; ++i)
3588         {
3589             BufferInfo newBufInfo;
3590             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3591                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3592             if(res == VK_SUCCESS)
3593             {
3594                 TEST(allocInfo.offset < prevOffsetUpper);
3595                 prevOffsetUpper = allocInfo.offset;
3596                 bufInfo.push_back(newBufInfo);
3597             }
3598         }
3599 
3600         // Destroy the buffers in reverse order.
3601         while(!bufInfo.empty())
3602         {
3603             const BufferInfo& currBufInfo = bufInfo.back();
3604             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3605             bufInfo.pop_back();
3606         }
3607     }
3608 
3609     // Test ring buffer with lost allocations.
3610     {
3611         // Allocate number of buffers until pool is full.
3612         // Notice CAN_BECOME_LOST flag and call to vmaSetCurrentFrameIndex.
3613         allocCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT;
3614         res = VK_SUCCESS;
3615         for(size_t i = 0; res == VK_SUCCESS; ++i)
3616         {
3617             vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3618 
3619             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3620 
3621             BufferInfo newBufInfo;
3622             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3623                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3624             if(res == VK_SUCCESS)
3625                 bufInfo.push_back(newBufInfo);
3626         }
3627 
3628         // Free first half of it.
3629         {
3630             const size_t buffersToDelete = bufInfo.size() / 2;
3631             for(size_t i = 0; i < buffersToDelete; ++i)
3632             {
3633                 vmaDestroyBuffer(g_hAllocator, bufInfo[i].Buffer, bufInfo[i].Allocation);
3634             }
3635             bufInfo.erase(bufInfo.begin(), bufInfo.begin() + buffersToDelete);
3636         }
3637 
3638         // Allocate number of buffers until pool is full again.
3639         // This way we make sure ring buffers wraps around, front in in the middle.
3640         res = VK_SUCCESS;
3641         for(size_t i = 0; res == VK_SUCCESS; ++i)
3642         {
3643             vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3644 
3645             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3646 
3647             BufferInfo newBufInfo;
3648             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3649                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3650             if(res == VK_SUCCESS)
3651                 bufInfo.push_back(newBufInfo);
3652         }
3653 
3654         VkDeviceSize firstNewOffset;
3655         {
3656             vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3657 
3658             // Allocate a large buffer with CAN_MAKE_OTHER_LOST.
3659             allocCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
3660             bufCreateInfo.size = bufSizeMax;
3661 
3662             BufferInfo newBufInfo;
3663             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3664                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3665             TEST(res == VK_SUCCESS);
3666             bufInfo.push_back(newBufInfo);
3667             firstNewOffset = allocInfo.offset;
3668 
3669             // Make sure at least one buffer from the beginning became lost.
3670             vmaGetAllocationInfo(g_hAllocator, bufInfo[0].Allocation, &allocInfo);
3671             TEST(allocInfo.deviceMemory == VK_NULL_HANDLE);
3672         }
3673 
3674 #if 0 // TODO Fix and uncomment. Failing on Intel.
3675         // Allocate more buffers that CAN_MAKE_OTHER_LOST until we wrap-around with this.
3676         size_t newCount = 1;
3677         for(;;)
3678         {
3679             vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3680 
3681             bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3682 
3683             BufferInfo newBufInfo;
3684             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3685                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3686 
3687             TEST(res == VK_SUCCESS);
3688             bufInfo.push_back(newBufInfo);
3689             ++newCount;
3690             if(allocInfo.offset < firstNewOffset)
3691                 break;
3692         }
3693 #endif
3694 
3695         // Delete buffers that are lost.
3696         for(size_t i = bufInfo.size(); i--; )
3697         {
3698             vmaGetAllocationInfo(g_hAllocator, bufInfo[i].Allocation, &allocInfo);
3699             if(allocInfo.deviceMemory == VK_NULL_HANDLE)
3700             {
3701                 vmaDestroyBuffer(g_hAllocator, bufInfo[i].Buffer, bufInfo[i].Allocation);
3702                 bufInfo.erase(bufInfo.begin() + i);
3703             }
3704         }
3705 
3706         // Test vmaMakePoolAllocationsLost
3707         {
3708             vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3709 
3710             size_t lostAllocCount = 0;
3711             vmaMakePoolAllocationsLost(g_hAllocator, pool, &lostAllocCount);
3712             TEST(lostAllocCount > 0);
3713 
3714             size_t realLostAllocCount = 0;
3715             for(size_t i = 0; i < bufInfo.size(); ++i)
3716             {
3717                 vmaGetAllocationInfo(g_hAllocator, bufInfo[i].Allocation, &allocInfo);
3718                 if(allocInfo.deviceMemory == VK_NULL_HANDLE)
3719                     ++realLostAllocCount;
3720             }
3721             TEST(realLostAllocCount == lostAllocCount);
3722         }
3723 
3724         // Destroy all the buffers in forward order.
3725         for(size_t i = 0; i < bufInfo.size(); ++i)
3726             vmaDestroyBuffer(g_hAllocator, bufInfo[i].Buffer, bufInfo[i].Allocation);
3727         bufInfo.clear();
3728     }
3729 
3730     vmaDestroyPool(g_hAllocator, pool);
3731 }
3732 
TestLinearAllocatorMultiBlock()3733 static void TestLinearAllocatorMultiBlock()
3734 {
3735     wprintf(L"Test linear allocator multi block\n");
3736 
3737     RandomNumberGenerator rand{345673};
3738 
3739     VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3740     sampleBufCreateInfo.size = 1024 * 1024;
3741     sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
3742 
3743     VmaAllocationCreateInfo sampleAllocCreateInfo = {};
3744     sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
3745 
3746     VmaPoolCreateInfo poolCreateInfo = {};
3747     poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
3748     VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3749     TEST(res == VK_SUCCESS);
3750 
3751     VmaPool pool = nullptr;
3752     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3753     TEST(res == VK_SUCCESS);
3754 
3755     VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
3756 
3757     VmaAllocationCreateInfo allocCreateInfo = {};
3758     allocCreateInfo.pool = pool;
3759 
3760     std::vector<BufferInfo> bufInfo;
3761     VmaAllocationInfo allocInfo;
3762 
3763     // Test one-time free.
3764     {
3765         // Allocate buffers until we move to a second block.
3766         VkDeviceMemory lastMem = VK_NULL_HANDLE;
3767         for(uint32_t i = 0; ; ++i)
3768         {
3769             BufferInfo newBufInfo;
3770             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3771                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3772             TEST(res == VK_SUCCESS);
3773             bufInfo.push_back(newBufInfo);
3774             if(lastMem && allocInfo.deviceMemory != lastMem)
3775             {
3776                 break;
3777             }
3778             lastMem = allocInfo.deviceMemory;
3779         }
3780 
3781         TEST(bufInfo.size() > 2);
3782 
3783         // Make sure that pool has now two blocks.
3784         VmaPoolStats poolStats = {};
3785         vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3786         TEST(poolStats.blockCount == 2);
3787 
3788         // Destroy all the buffers in random order.
3789         while(!bufInfo.empty())
3790         {
3791             const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3792             const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3793             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3794             bufInfo.erase(bufInfo.begin() + indexToDestroy);
3795         }
3796 
3797         // Make sure that pool has now at most one block.
3798         vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3799         TEST(poolStats.blockCount <= 1);
3800     }
3801 
3802     // Test stack.
3803     {
3804         // Allocate buffers until we move to a second block.
3805         VkDeviceMemory lastMem = VK_NULL_HANDLE;
3806         for(uint32_t i = 0; ; ++i)
3807         {
3808             BufferInfo newBufInfo;
3809             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3810                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3811             TEST(res == VK_SUCCESS);
3812             bufInfo.push_back(newBufInfo);
3813             if(lastMem && allocInfo.deviceMemory != lastMem)
3814             {
3815                 break;
3816             }
3817             lastMem = allocInfo.deviceMemory;
3818         }
3819 
3820         TEST(bufInfo.size() > 2);
3821 
3822         // Add few more buffers.
3823         for(uint32_t i = 0; i < 5; ++i)
3824         {
3825             BufferInfo newBufInfo;
3826             res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3827                 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3828             TEST(res == VK_SUCCESS);
3829             bufInfo.push_back(newBufInfo);
3830         }
3831 
3832         // Make sure that pool has now two blocks.
3833         VmaPoolStats poolStats = {};
3834         vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3835         TEST(poolStats.blockCount == 2);
3836 
3837         // Delete half of buffers, LIFO.
3838         for(size_t i = 0, countToDelete = bufInfo.size() / 2; i < countToDelete; ++i)
3839         {
3840             const BufferInfo& currBufInfo = bufInfo.back();
3841             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3842             bufInfo.pop_back();
3843         }
3844 
3845         // Add one more buffer.
3846         BufferInfo newBufInfo;
3847         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3848             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3849         TEST(res == VK_SUCCESS);
3850         bufInfo.push_back(newBufInfo);
3851 
3852         // Make sure that pool has now one block.
3853         vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3854         TEST(poolStats.blockCount == 1);
3855 
3856         // Delete all the remaining buffers, LIFO.
3857         while(!bufInfo.empty())
3858         {
3859             const BufferInfo& currBufInfo = bufInfo.back();
3860             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3861             bufInfo.pop_back();
3862         }
3863     }
3864 
3865     vmaDestroyPool(g_hAllocator, pool);
3866 }
3867 
ManuallyTestLinearAllocator()3868 static void ManuallyTestLinearAllocator()
3869 {
3870     VmaStats origStats;
3871     vmaCalculateStats(g_hAllocator, &origStats);
3872 
3873     wprintf(L"Manually test linear allocator\n");
3874 
3875     RandomNumberGenerator rand{645332};
3876 
3877     VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3878     sampleBufCreateInfo.size = 1024; // Whatever.
3879     sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
3880 
3881     VmaAllocationCreateInfo sampleAllocCreateInfo = {};
3882     sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
3883 
3884     VmaPoolCreateInfo poolCreateInfo = {};
3885     VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3886     TEST(res == VK_SUCCESS);
3887 
3888     poolCreateInfo.blockSize = 10 * 1024;
3889     poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
3890     poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
3891 
3892     VmaPool pool = nullptr;
3893     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3894     TEST(res == VK_SUCCESS);
3895 
3896     VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
3897 
3898     VmaAllocationCreateInfo allocCreateInfo = {};
3899     allocCreateInfo.pool = pool;
3900 
3901     std::vector<BufferInfo> bufInfo;
3902     VmaAllocationInfo allocInfo;
3903     BufferInfo newBufInfo;
3904 
3905     // Test double stack.
3906     {
3907         /*
3908         Lower: Buffer 32 B, Buffer 1024 B, Buffer 32 B
3909         Upper: Buffer 16 B, Buffer 1024 B, Buffer 128 B
3910 
3911         Totally:
3912         1 block allocated
3913         10240 Vulkan bytes
3914         6 new allocations
3915         2256 bytes in allocations
3916         */
3917 
3918         bufCreateInfo.size = 32;
3919         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3920             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3921         TEST(res == VK_SUCCESS);
3922         bufInfo.push_back(newBufInfo);
3923 
3924         bufCreateInfo.size = 1024;
3925         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3926             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3927         TEST(res == VK_SUCCESS);
3928         bufInfo.push_back(newBufInfo);
3929 
3930         bufCreateInfo.size = 32;
3931         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3932             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3933         TEST(res == VK_SUCCESS);
3934         bufInfo.push_back(newBufInfo);
3935 
3936         allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3937 
3938         bufCreateInfo.size = 128;
3939         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3940             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3941         TEST(res == VK_SUCCESS);
3942         bufInfo.push_back(newBufInfo);
3943 
3944         bufCreateInfo.size = 1024;
3945         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3946             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3947         TEST(res == VK_SUCCESS);
3948         bufInfo.push_back(newBufInfo);
3949 
3950         bufCreateInfo.size = 16;
3951         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3952             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3953         TEST(res == VK_SUCCESS);
3954         bufInfo.push_back(newBufInfo);
3955 
3956         VmaStats currStats;
3957         vmaCalculateStats(g_hAllocator, &currStats);
3958         VmaPoolStats poolStats;
3959         vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3960 
3961         char* statsStr = nullptr;
3962         vmaBuildStatsString(g_hAllocator, &statsStr, VK_TRUE);
3963 
3964         // PUT BREAKPOINT HERE TO CHECK.
3965         // Inspect: currStats versus origStats, poolStats, statsStr.
3966         int I = 0;
3967 
3968         vmaFreeStatsString(g_hAllocator, statsStr);
3969 
3970         // Destroy the buffers in reverse order.
3971         while(!bufInfo.empty())
3972         {
3973             const BufferInfo& currBufInfo = bufInfo.back();
3974             vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3975             bufInfo.pop_back();
3976         }
3977     }
3978 
3979     vmaDestroyPool(g_hAllocator, pool);
3980 }
3981 
BenchmarkAlgorithmsCase(FILE * file,uint32_t algorithm,bool empty,VmaAllocationCreateFlags allocStrategy,FREE_ORDER freeOrder)3982 static void BenchmarkAlgorithmsCase(FILE* file,
3983     uint32_t algorithm,
3984     bool empty,
3985     VmaAllocationCreateFlags allocStrategy,
3986     FREE_ORDER freeOrder)
3987 {
3988     RandomNumberGenerator rand{16223};
3989 
3990     const VkDeviceSize bufSizeMin = 32;
3991     const VkDeviceSize bufSizeMax = 1024;
3992     const size_t maxBufCapacity = 10000;
3993     const uint32_t iterationCount = 10;
3994 
3995     VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3996     sampleBufCreateInfo.size = bufSizeMax;
3997     sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
3998 
3999     VmaAllocationCreateInfo sampleAllocCreateInfo = {};
4000     sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
4001 
4002     VmaPoolCreateInfo poolCreateInfo = {};
4003     VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
4004     TEST(res == VK_SUCCESS);
4005 
4006     poolCreateInfo.blockSize = bufSizeMax * maxBufCapacity;
4007     poolCreateInfo.flags |= algorithm;
4008     poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
4009 
4010     VmaPool pool = nullptr;
4011     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
4012     TEST(res == VK_SUCCESS);
4013 
4014     // Buffer created just to get memory requirements. Never bound to any memory.
4015     VkBuffer dummyBuffer = VK_NULL_HANDLE;
4016     res = vkCreateBuffer(g_hDevice, &sampleBufCreateInfo, g_Allocs, &dummyBuffer);
4017     TEST(res == VK_SUCCESS && dummyBuffer);
4018 
4019     VkMemoryRequirements memReq = {};
4020     vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
4021 
4022     vkDestroyBuffer(g_hDevice, dummyBuffer, g_Allocs);
4023 
4024     VmaAllocationCreateInfo allocCreateInfo = {};
4025     allocCreateInfo.pool = pool;
4026     allocCreateInfo.flags = allocStrategy;
4027 
4028     VmaAllocation alloc;
4029     std::vector<VmaAllocation> baseAllocations;
4030 
4031     if(!empty)
4032     {
4033         // Make allocations up to 1/3 of pool size.
4034         VkDeviceSize totalSize = 0;
4035         while(totalSize < poolCreateInfo.blockSize / 3)
4036         {
4037             // This test intentionally allows sizes that are aligned to 4 or 16 bytes.
4038             // This is theoretically allowed and already uncovered one bug.
4039             memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);
4040             res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
4041             TEST(res == VK_SUCCESS);
4042             baseAllocations.push_back(alloc);
4043             totalSize += memReq.size;
4044         }
4045 
4046         // Delete half of them, choose randomly.
4047         size_t allocsToDelete = baseAllocations.size() / 2;
4048         for(size_t i = 0; i < allocsToDelete; ++i)
4049         {
4050             const size_t index = (size_t)rand.Generate() % baseAllocations.size();
4051             vmaFreeMemory(g_hAllocator, baseAllocations[index]);
4052             baseAllocations.erase(baseAllocations.begin() + index);
4053         }
4054     }
4055 
4056     // BENCHMARK
4057     const size_t allocCount = maxBufCapacity / 3;
4058     std::vector<VmaAllocation> testAllocations;
4059     testAllocations.reserve(allocCount);
4060     duration allocTotalDuration = duration::zero();
4061     duration freeTotalDuration = duration::zero();
4062     for(uint32_t iterationIndex = 0; iterationIndex < iterationCount; ++iterationIndex)
4063     {
4064         // Allocations
4065         time_point allocTimeBeg = std::chrono::high_resolution_clock::now();
4066         for(size_t i = 0; i < allocCount; ++i)
4067         {
4068             memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);
4069             res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
4070             TEST(res == VK_SUCCESS);
4071             testAllocations.push_back(alloc);
4072         }
4073         allocTotalDuration += std::chrono::high_resolution_clock::now() - allocTimeBeg;
4074 
4075         // Deallocations
4076         switch(freeOrder)
4077         {
4078         case FREE_ORDER::FORWARD:
4079             // Leave testAllocations unchanged.
4080             break;
4081         case FREE_ORDER::BACKWARD:
4082             std::reverse(testAllocations.begin(), testAllocations.end());
4083             break;
4084         case FREE_ORDER::RANDOM:
4085             std::shuffle(testAllocations.begin(), testAllocations.end(), MyUniformRandomNumberGenerator(rand));
4086             break;
4087         default: assert(0);
4088         }
4089 
4090         time_point freeTimeBeg = std::chrono::high_resolution_clock::now();
4091         for(size_t i = 0; i < allocCount; ++i)
4092             vmaFreeMemory(g_hAllocator, testAllocations[i]);
4093         freeTotalDuration += std::chrono::high_resolution_clock::now() - freeTimeBeg;
4094 
4095         testAllocations.clear();
4096     }
4097 
4098     // Delete baseAllocations
4099     while(!baseAllocations.empty())
4100     {
4101         vmaFreeMemory(g_hAllocator, baseAllocations.back());
4102         baseAllocations.pop_back();
4103     }
4104 
4105     vmaDestroyPool(g_hAllocator, pool);
4106 
4107     const float allocTotalSeconds = ToFloatSeconds(allocTotalDuration);
4108     const float freeTotalSeconds  = ToFloatSeconds(freeTotalDuration);
4109 
4110     printf("    Algorithm=%s %s Allocation=%s FreeOrder=%s: allocations %g s, free %g s\n",
4111         AlgorithmToStr(algorithm),
4112         empty ? "Empty" : "Not empty",
4113         GetAllocationStrategyName(allocStrategy),
4114         FREE_ORDER_NAMES[(size_t)freeOrder],
4115         allocTotalSeconds,
4116         freeTotalSeconds);
4117 
4118     if(file)
4119     {
4120         std::string currTime;
4121         CurrentTimeToStr(currTime);
4122 
4123         fprintf(file, "%s,%s,%s,%u,%s,%s,%g,%g\n",
4124             CODE_DESCRIPTION, currTime.c_str(),
4125             AlgorithmToStr(algorithm),
4126             empty ? 1 : 0,
4127             GetAllocationStrategyName(allocStrategy),
4128             FREE_ORDER_NAMES[(uint32_t)freeOrder],
4129             allocTotalSeconds,
4130             freeTotalSeconds);
4131     }
4132 }
4133 
TestBufferDeviceAddress()4134 static void TestBufferDeviceAddress()
4135 {
4136     wprintf(L"Test buffer device address\n");
4137 
4138     assert(VK_KHR_buffer_device_address_enabled);
4139 
4140     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4141     bufCreateInfo.size = 0x10000;
4142     bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
4143         VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; // !!!
4144 
4145     VmaAllocationCreateInfo allocCreateInfo = {};
4146     allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
4147 
4148     for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
4149     {
4150         // 1st is placed, 2nd is dedicated.
4151         if(testIndex == 1)
4152             allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
4153 
4154         BufferInfo bufInfo = {};
4155         VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
4156             &bufInfo.Buffer, &bufInfo.Allocation, nullptr);
4157         TEST(res == VK_SUCCESS);
4158 
4159         VkBufferDeviceAddressInfoEXT bufferDeviceAddressInfo = { VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT };
4160         bufferDeviceAddressInfo.buffer = bufInfo.Buffer;
4161         TEST(g_vkGetBufferDeviceAddressKHR != nullptr);
4162         VkDeviceAddress addr = g_vkGetBufferDeviceAddressKHR(g_hDevice, &bufferDeviceAddressInfo);
4163         TEST(addr != 0);
4164 
4165         vmaDestroyBuffer(g_hAllocator, bufInfo.Buffer, bufInfo.Allocation);
4166     }
4167 }
4168 
TestMemoryPriority()4169 static void TestMemoryPriority()
4170 {
4171     wprintf(L"Test memory priority\n");
4172 
4173     assert(VK_EXT_memory_priority_enabled);
4174 
4175     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4176     bufCreateInfo.size = 0x10000;
4177     bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
4178 
4179     VmaAllocationCreateInfo allocCreateInfo = {};
4180     allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
4181     allocCreateInfo.priority = 1.f;
4182 
4183     for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
4184     {
4185         // 1st is placed, 2nd is dedicated.
4186         if(testIndex == 1)
4187             allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
4188 
4189         BufferInfo bufInfo = {};
4190         VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
4191             &bufInfo.Buffer, &bufInfo.Allocation, nullptr);
4192         TEST(res == VK_SUCCESS);
4193 
4194         // There is nothing we can do to validate the priority.
4195 
4196         vmaDestroyBuffer(g_hAllocator, bufInfo.Buffer, bufInfo.Allocation);
4197     }
4198 }
4199 
BenchmarkAlgorithms(FILE * file)4200 static void BenchmarkAlgorithms(FILE* file)
4201 {
4202     wprintf(L"Benchmark algorithms\n");
4203 
4204     if(file)
4205     {
4206         fprintf(file,
4207             "Code,Time,"
4208             "Algorithm,Empty,Allocation strategy,Free order,"
4209             "Allocation time (s),Deallocation time (s)\n");
4210     }
4211 
4212     uint32_t freeOrderCount = 1;
4213     if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_LARGE)
4214         freeOrderCount = 3;
4215     else if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL)
4216         freeOrderCount = 2;
4217 
4218     const uint32_t emptyCount = ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL ? 2 : 1;
4219     const uint32_t allocStrategyCount = GetAllocationStrategyCount();
4220 
4221     for(uint32_t freeOrderIndex = 0; freeOrderIndex < freeOrderCount; ++freeOrderIndex)
4222     {
4223         FREE_ORDER freeOrder = FREE_ORDER::COUNT;
4224         switch(freeOrderIndex)
4225         {
4226         case 0: freeOrder = FREE_ORDER::BACKWARD; break;
4227         case 1: freeOrder = FREE_ORDER::FORWARD; break;
4228         case 2: freeOrder = FREE_ORDER::RANDOM; break;
4229         default: assert(0);
4230         }
4231 
4232         for(uint32_t emptyIndex = 0; emptyIndex < emptyCount; ++emptyIndex)
4233         {
4234             for(uint32_t algorithmIndex = 0; algorithmIndex < 3; ++algorithmIndex)
4235             {
4236                 uint32_t algorithm = 0;
4237                 switch(algorithmIndex)
4238                 {
4239                 case 0:
4240                     break;
4241                 case 1:
4242                     algorithm = VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT;
4243                     break;
4244                 case 2:
4245                     algorithm = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
4246                     break;
4247                 default:
4248                     assert(0);
4249                 }
4250 
4251                 uint32_t currAllocStrategyCount = algorithm != 0 ? 1 : allocStrategyCount;
4252                 for(uint32_t allocStrategyIndex = 0; allocStrategyIndex < currAllocStrategyCount; ++allocStrategyIndex)
4253                 {
4254                     VmaAllocatorCreateFlags strategy = 0;
4255                     if(currAllocStrategyCount > 1)
4256                     {
4257                         switch(allocStrategyIndex)
4258                         {
4259                         case 0: strategy = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT; break;
4260                         case 1: strategy = VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT; break;
4261                         case 2: strategy = VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT; break;
4262                         default: assert(0);
4263                         }
4264                     }
4265 
4266                     BenchmarkAlgorithmsCase(
4267                         file,
4268                         algorithm,
4269                         (emptyIndex == 0), // empty
4270                         strategy,
4271                         freeOrder); // freeOrder
4272                 }
4273             }
4274         }
4275     }
4276 }
4277 
TestPool_SameSize()4278 static void TestPool_SameSize()
4279 {
4280     const VkDeviceSize BUF_SIZE = 1024 * 1024;
4281     const size_t BUF_COUNT = 100;
4282     VkResult res;
4283 
4284     RandomNumberGenerator rand{123};
4285 
4286     VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4287     bufferInfo.size = BUF_SIZE;
4288     bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4289 
4290     uint32_t memoryTypeBits = UINT32_MAX;
4291     {
4292         VkBuffer dummyBuffer;
4293         res = vkCreateBuffer(g_hDevice, &bufferInfo, g_Allocs, &dummyBuffer);
4294         TEST(res == VK_SUCCESS);
4295 
4296         VkMemoryRequirements memReq;
4297         vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
4298         memoryTypeBits = memReq.memoryTypeBits;
4299 
4300         vkDestroyBuffer(g_hDevice, dummyBuffer, g_Allocs);
4301     }
4302 
4303     VmaAllocationCreateInfo poolAllocInfo = {};
4304     poolAllocInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
4305     uint32_t memTypeIndex;
4306     res = vmaFindMemoryTypeIndex(
4307         g_hAllocator,
4308         memoryTypeBits,
4309         &poolAllocInfo,
4310         &memTypeIndex);
4311 
4312     VmaPoolCreateInfo poolCreateInfo = {};
4313     poolCreateInfo.memoryTypeIndex = memTypeIndex;
4314     poolCreateInfo.blockSize = BUF_SIZE * BUF_COUNT / 4;
4315     poolCreateInfo.minBlockCount = 1;
4316     poolCreateInfo.maxBlockCount = 4;
4317     poolCreateInfo.frameInUseCount = 0;
4318 
4319     VmaPool pool;
4320     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
4321     TEST(res == VK_SUCCESS);
4322 
4323     // Test pool name
4324     {
4325         static const char* const POOL_NAME = "Pool name";
4326         vmaSetPoolName(g_hAllocator, pool, POOL_NAME);
4327 
4328         const char* fetchedPoolName = nullptr;
4329         vmaGetPoolName(g_hAllocator, pool, &fetchedPoolName);
4330         TEST(strcmp(fetchedPoolName, POOL_NAME) == 0);
4331 
4332         vmaSetPoolName(g_hAllocator, pool, nullptr);
4333     }
4334 
4335     vmaSetCurrentFrameIndex(g_hAllocator, 1);
4336 
4337     VmaAllocationCreateInfo allocInfo = {};
4338     allocInfo.pool = pool;
4339     allocInfo.flags = VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT |
4340         VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
4341 
4342     struct BufItem
4343     {
4344         VkBuffer Buf;
4345         VmaAllocation Alloc;
4346     };
4347     std::vector<BufItem> items;
4348 
4349     // Fill entire pool.
4350     for(size_t i = 0; i < BUF_COUNT; ++i)
4351     {
4352         BufItem item;
4353         res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4354         TEST(res == VK_SUCCESS);
4355         items.push_back(item);
4356     }
4357 
4358     // Make sure that another allocation would fail.
4359     {
4360         BufItem item;
4361         res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4362         TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY);
4363     }
4364 
4365     // Validate that no buffer is lost. Also check that they are not mapped.
4366     for(size_t i = 0; i < items.size(); ++i)
4367     {
4368         VmaAllocationInfo allocInfo;
4369         vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4370         TEST(allocInfo.deviceMemory != VK_NULL_HANDLE);
4371         TEST(allocInfo.pMappedData == nullptr);
4372     }
4373 
4374     // Free some percent of random items.
4375     {
4376         const size_t PERCENT_TO_FREE = 10;
4377         size_t itemsToFree = items.size() * PERCENT_TO_FREE / 100;
4378         for(size_t i = 0; i < itemsToFree; ++i)
4379         {
4380             size_t index = (size_t)rand.Generate() % items.size();
4381             vmaDestroyBuffer(g_hAllocator, items[index].Buf, items[index].Alloc);
4382             items.erase(items.begin() + index);
4383         }
4384     }
4385 
4386     // Randomly allocate and free items.
4387     {
4388         const size_t OPERATION_COUNT = BUF_COUNT;
4389         for(size_t i = 0; i < OPERATION_COUNT; ++i)
4390         {
4391             bool allocate = rand.Generate() % 2 != 0;
4392             if(allocate)
4393             {
4394                 if(items.size() < BUF_COUNT)
4395                 {
4396                     BufItem item;
4397                     res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4398                     TEST(res == VK_SUCCESS);
4399                     items.push_back(item);
4400                }
4401             }
4402             else // Free
4403             {
4404                 if(!items.empty())
4405                 {
4406                     size_t index = (size_t)rand.Generate() % items.size();
4407                     vmaDestroyBuffer(g_hAllocator, items[index].Buf, items[index].Alloc);
4408                     items.erase(items.begin() + index);
4409                 }
4410             }
4411         }
4412     }
4413 
4414     // Allocate up to maximum.
4415     while(items.size() < BUF_COUNT)
4416     {
4417         BufItem item;
4418         res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4419         TEST(res == VK_SUCCESS);
4420         items.push_back(item);
4421     }
4422 
4423     // Validate that no buffer is lost.
4424     for(size_t i = 0; i < items.size(); ++i)
4425     {
4426         VmaAllocationInfo allocInfo;
4427         vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4428         TEST(allocInfo.deviceMemory != VK_NULL_HANDLE);
4429     }
4430 
4431     // Next frame.
4432     vmaSetCurrentFrameIndex(g_hAllocator, 2);
4433 
4434     // Allocate another BUF_COUNT buffers.
4435     for(size_t i = 0; i < BUF_COUNT; ++i)
4436     {
4437         BufItem item;
4438         res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4439         TEST(res == VK_SUCCESS);
4440         items.push_back(item);
4441     }
4442 
4443     // Make sure the first BUF_COUNT is lost. Delete them.
4444     for(size_t i = 0; i < BUF_COUNT; ++i)
4445     {
4446         VmaAllocationInfo allocInfo;
4447         vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4448         TEST(allocInfo.deviceMemory == VK_NULL_HANDLE);
4449         vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4450     }
4451     items.erase(items.begin(), items.begin() + BUF_COUNT);
4452 
4453     // Validate that no buffer is lost.
4454     for(size_t i = 0; i < items.size(); ++i)
4455     {
4456         VmaAllocationInfo allocInfo;
4457         vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4458         TEST(allocInfo.deviceMemory != VK_NULL_HANDLE);
4459     }
4460 
4461     // Free one item.
4462     vmaDestroyBuffer(g_hAllocator, items.back().Buf, items.back().Alloc);
4463     items.pop_back();
4464 
4465     // Validate statistics.
4466     {
4467         VmaPoolStats poolStats = {};
4468         vmaGetPoolStats(g_hAllocator, pool, &poolStats);
4469         TEST(poolStats.allocationCount == items.size());
4470         TEST(poolStats.size = BUF_COUNT * BUF_SIZE);
4471         TEST(poolStats.unusedRangeCount == 1);
4472         TEST(poolStats.unusedRangeSizeMax == BUF_SIZE);
4473         TEST(poolStats.unusedSize == BUF_SIZE);
4474     }
4475 
4476     // Free all remaining items.
4477     for(size_t i = items.size(); i--; )
4478         vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4479     items.clear();
4480 
4481     // Allocate maximum items again.
4482     for(size_t i = 0; i < BUF_COUNT; ++i)
4483     {
4484         BufItem item;
4485         res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4486         TEST(res == VK_SUCCESS);
4487         items.push_back(item);
4488     }
4489 
4490     // Delete every other item.
4491     for(size_t i = 0; i < BUF_COUNT / 2; ++i)
4492     {
4493         vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4494         items.erase(items.begin() + i);
4495     }
4496 
4497     // Defragment!
4498     {
4499         std::vector<VmaAllocation> allocationsToDefragment(items.size());
4500         for(size_t i = 0; i < items.size(); ++i)
4501             allocationsToDefragment[i] = items[i].Alloc;
4502 
4503         VmaDefragmentationStats defragmentationStats;
4504         res = vmaDefragment(g_hAllocator, allocationsToDefragment.data(), items.size(), nullptr, nullptr, &defragmentationStats);
4505         TEST(res == VK_SUCCESS);
4506         TEST(defragmentationStats.deviceMemoryBlocksFreed == 2);
4507     }
4508 
4509     // Free all remaining items.
4510     for(size_t i = items.size(); i--; )
4511         vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4512     items.clear();
4513 
4514     ////////////////////////////////////////////////////////////////////////////////
4515     // Test for vmaMakePoolAllocationsLost
4516 
4517     // Allocate 4 buffers on frame 10.
4518     vmaSetCurrentFrameIndex(g_hAllocator, 10);
4519     for(size_t i = 0; i < 4; ++i)
4520     {
4521         BufItem item;
4522         res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4523         TEST(res == VK_SUCCESS);
4524         items.push_back(item);
4525     }
4526 
4527     // Touch first 2 of them on frame 11.
4528     vmaSetCurrentFrameIndex(g_hAllocator, 11);
4529     for(size_t i = 0; i < 2; ++i)
4530     {
4531         VmaAllocationInfo allocInfo;
4532         vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4533     }
4534 
4535     // vmaMakePoolAllocationsLost. Only remaining 2 should be lost.
4536     size_t lostCount = 0xDEADC0DE;
4537     vmaMakePoolAllocationsLost(g_hAllocator, pool, &lostCount);
4538     TEST(lostCount == 2);
4539 
4540     // Make another call. Now 0 should be lost.
4541     vmaMakePoolAllocationsLost(g_hAllocator, pool, &lostCount);
4542     TEST(lostCount == 0);
4543 
4544     // Make another call, with null count. Should not crash.
4545     vmaMakePoolAllocationsLost(g_hAllocator, pool, nullptr);
4546 
4547     // END: Free all remaining items.
4548     for(size_t i = items.size(); i--; )
4549         vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4550 
4551     items.clear();
4552 
4553     ////////////////////////////////////////////////////////////////////////////////
4554     // Test for allocation too large for pool
4555 
4556     {
4557         VmaAllocationCreateInfo allocCreateInfo = {};
4558         allocCreateInfo.pool = pool;
4559 
4560         VkMemoryRequirements memReq;
4561         memReq.memoryTypeBits = UINT32_MAX;
4562         memReq.alignment = 1;
4563         memReq.size = poolCreateInfo.blockSize + 4;
4564 
4565         VmaAllocation alloc = nullptr;
4566         res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
4567         TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY && alloc == nullptr);
4568     }
4569 
4570     vmaDestroyPool(g_hAllocator, pool);
4571 }
4572 
ValidatePattern(const void * pMemory,size_t size,uint8_t pattern)4573 static bool ValidatePattern(const void* pMemory, size_t size, uint8_t pattern)
4574 {
4575     const uint8_t* pBytes = (const uint8_t*)pMemory;
4576     for(size_t i = 0; i < size; ++i)
4577     {
4578         if(pBytes[i] != pattern)
4579         {
4580             return false;
4581         }
4582     }
4583     return true;
4584 }
4585 
TestAllocationsInitialization()4586 static void TestAllocationsInitialization()
4587 {
4588     VkResult res;
4589 
4590     const size_t BUF_SIZE = 1024;
4591 
4592     // Create pool.
4593 
4594     VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4595     bufInfo.size = BUF_SIZE;
4596     bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
4597 
4598     VmaAllocationCreateInfo dummyBufAllocCreateInfo = {};
4599     dummyBufAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
4600 
4601     VmaPoolCreateInfo poolCreateInfo = {};
4602     poolCreateInfo.blockSize = BUF_SIZE * 10;
4603     poolCreateInfo.minBlockCount = 1; // To keep memory alive while pool exists.
4604     poolCreateInfo.maxBlockCount = 1;
4605     res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufInfo, &dummyBufAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
4606     TEST(res == VK_SUCCESS);
4607 
4608     VmaAllocationCreateInfo bufAllocCreateInfo = {};
4609     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &bufAllocCreateInfo.pool);
4610     TEST(res == VK_SUCCESS);
4611 
4612     // Create one persistently mapped buffer to keep memory of this block mapped,
4613     // so that pointer to mapped data will remain (more or less...) valid even
4614     // after destruction of other allocations.
4615 
4616     bufAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
4617     VkBuffer firstBuf;
4618     VmaAllocation firstAlloc;
4619     res = vmaCreateBuffer(g_hAllocator, &bufInfo, &bufAllocCreateInfo, &firstBuf, &firstAlloc, nullptr);
4620     TEST(res == VK_SUCCESS);
4621 
4622     // Test buffers.
4623 
4624     for(uint32_t i = 0; i < 2; ++i)
4625     {
4626         const bool persistentlyMapped = i == 0;
4627         bufAllocCreateInfo.flags = persistentlyMapped ? VMA_ALLOCATION_CREATE_MAPPED_BIT : 0;
4628         VkBuffer buf;
4629         VmaAllocation alloc;
4630         VmaAllocationInfo allocInfo;
4631         res = vmaCreateBuffer(g_hAllocator, &bufInfo, &bufAllocCreateInfo, &buf, &alloc, &allocInfo);
4632         TEST(res == VK_SUCCESS);
4633 
4634         void* pMappedData;
4635         if(!persistentlyMapped)
4636         {
4637             res = vmaMapMemory(g_hAllocator, alloc, &pMappedData);
4638             TEST(res == VK_SUCCESS);
4639         }
4640         else
4641         {
4642             pMappedData = allocInfo.pMappedData;
4643         }
4644 
4645         // Validate initialized content
4646         bool valid = ValidatePattern(pMappedData, BUF_SIZE, 0xDC);
4647         TEST(valid);
4648 
4649         if(!persistentlyMapped)
4650         {
4651             vmaUnmapMemory(g_hAllocator, alloc);
4652         }
4653 
4654         vmaDestroyBuffer(g_hAllocator, buf, alloc);
4655 
4656         // Validate freed content
4657         valid = ValidatePattern(pMappedData, BUF_SIZE, 0xEF);
4658         TEST(valid);
4659     }
4660 
4661     vmaDestroyBuffer(g_hAllocator, firstBuf, firstAlloc);
4662     vmaDestroyPool(g_hAllocator, bufAllocCreateInfo.pool);
4663 }
4664 
TestPool_Benchmark(PoolTestResult & outResult,const PoolTestConfig & config)4665 static void TestPool_Benchmark(
4666     PoolTestResult& outResult,
4667     const PoolTestConfig& config)
4668 {
4669     TEST(config.ThreadCount > 0);
4670 
4671     RandomNumberGenerator mainRand{config.RandSeed};
4672 
4673     uint32_t allocationSizeProbabilitySum = std::accumulate(
4674         config.AllocationSizes.begin(),
4675         config.AllocationSizes.end(),
4676         0u,
4677         [](uint32_t sum, const AllocationSize& allocSize) {
4678             return sum + allocSize.Probability;
4679         });
4680 
4681     VkBufferCreateInfo bufferTemplateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4682     bufferTemplateInfo.size = 256; // Whatever.
4683     bufferTemplateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4684 
4685     VkImageCreateInfo imageTemplateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
4686     imageTemplateInfo.imageType = VK_IMAGE_TYPE_2D;
4687     imageTemplateInfo.extent.width = 256; // Whatever.
4688     imageTemplateInfo.extent.height = 256; // Whatever.
4689     imageTemplateInfo.extent.depth = 1;
4690     imageTemplateInfo.mipLevels = 1;
4691     imageTemplateInfo.arrayLayers = 1;
4692     imageTemplateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
4693     imageTemplateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; // LINEAR if CPU memory.
4694     imageTemplateInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
4695     imageTemplateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; // TRANSFER_SRC if CPU memory.
4696     imageTemplateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
4697 
4698     uint32_t bufferMemoryTypeBits = UINT32_MAX;
4699     {
4700         VkBuffer dummyBuffer;
4701         VkResult res = vkCreateBuffer(g_hDevice, &bufferTemplateInfo, g_Allocs, &dummyBuffer);
4702         TEST(res == VK_SUCCESS);
4703 
4704         VkMemoryRequirements memReq;
4705         vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
4706         bufferMemoryTypeBits = memReq.memoryTypeBits;
4707 
4708         vkDestroyBuffer(g_hDevice, dummyBuffer, g_Allocs);
4709     }
4710 
4711     uint32_t imageMemoryTypeBits = UINT32_MAX;
4712     {
4713         VkImage dummyImage;
4714         VkResult res = vkCreateImage(g_hDevice, &imageTemplateInfo, g_Allocs, &dummyImage);
4715         TEST(res == VK_SUCCESS);
4716 
4717         VkMemoryRequirements memReq;
4718         vkGetImageMemoryRequirements(g_hDevice, dummyImage, &memReq);
4719         imageMemoryTypeBits = memReq.memoryTypeBits;
4720 
4721         vkDestroyImage(g_hDevice, dummyImage, g_Allocs);
4722     }
4723 
4724     uint32_t memoryTypeBits = 0;
4725     if(config.UsesBuffers() && config.UsesImages())
4726     {
4727         memoryTypeBits = bufferMemoryTypeBits & imageMemoryTypeBits;
4728         if(memoryTypeBits == 0)
4729         {
4730             PrintWarning(L"Cannot test buffers + images in the same memory pool on this GPU.");
4731             return;
4732         }
4733     }
4734     else if(config.UsesBuffers())
4735         memoryTypeBits = bufferMemoryTypeBits;
4736     else if(config.UsesImages())
4737         memoryTypeBits = imageMemoryTypeBits;
4738     else
4739         TEST(0);
4740 
4741     VmaPoolCreateInfo poolCreateInfo = {};
4742     poolCreateInfo.minBlockCount = 1;
4743     poolCreateInfo.maxBlockCount = 1;
4744     poolCreateInfo.blockSize = config.PoolSize;
4745     poolCreateInfo.frameInUseCount = 1;
4746 
4747     const VkPhysicalDeviceMemoryProperties* memProps = nullptr;
4748     vmaGetMemoryProperties(g_hAllocator, &memProps);
4749 
4750     VmaPool pool = VK_NULL_HANDLE;
4751     VkResult res;
4752     // Loop over memory types because we sometimes allocate a big block here,
4753     // while the most eligible DEVICE_LOCAL heap may be only 256 MB on some GPUs.
4754     while(memoryTypeBits)
4755     {
4756         VmaAllocationCreateInfo dummyAllocCreateInfo = {};
4757         dummyAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
4758         vmaFindMemoryTypeIndex(g_hAllocator, memoryTypeBits, &dummyAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
4759 
4760         const uint32_t heapIndex = memProps->memoryTypes[poolCreateInfo.memoryTypeIndex].heapIndex;
4761         // Protection against validation layer error when trying to allocate a block larger than entire heap size,
4762         // which may be only 256 MB on some platforms.
4763         if(poolCreateInfo.blockSize * poolCreateInfo.minBlockCount < memProps->memoryHeaps[heapIndex].size)
4764         {
4765             res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
4766             if(res == VK_SUCCESS)
4767                 break;
4768         }
4769         memoryTypeBits &= ~(1u << poolCreateInfo.memoryTypeIndex);
4770     }
4771     TEST(pool);
4772 
4773     // Start time measurement - after creating pool and initializing data structures.
4774     time_point timeBeg = std::chrono::high_resolution_clock::now();
4775 
4776     ////////////////////////////////////////////////////////////////////////////////
4777     // ThreadProc
4778     auto ThreadProc = [&config, allocationSizeProbabilitySum, pool](
4779         PoolTestThreadResult* outThreadResult,
4780         uint32_t randSeed,
4781         HANDLE frameStartEvent,
4782         HANDLE frameEndEvent) -> void
4783     {
4784         RandomNumberGenerator threadRand{randSeed};
4785         VkResult res = VK_SUCCESS;
4786 
4787         VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4788         bufferInfo.size = 256; // Whatever.
4789         bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4790 
4791         VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
4792         imageInfo.imageType = VK_IMAGE_TYPE_2D;
4793         imageInfo.extent.width = 256; // Whatever.
4794         imageInfo.extent.height = 256; // Whatever.
4795         imageInfo.extent.depth = 1;
4796         imageInfo.mipLevels = 1;
4797         imageInfo.arrayLayers = 1;
4798         imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
4799         imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; // LINEAR if CPU memory.
4800         imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
4801         imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; // TRANSFER_SRC if CPU memory.
4802         imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
4803 
4804         outThreadResult->AllocationTimeMin = duration::max();
4805         outThreadResult->AllocationTimeSum = duration::zero();
4806         outThreadResult->AllocationTimeMax = duration::min();
4807         outThreadResult->DeallocationTimeMin = duration::max();
4808         outThreadResult->DeallocationTimeSum = duration::zero();
4809         outThreadResult->DeallocationTimeMax = duration::min();
4810         outThreadResult->AllocationCount = 0;
4811         outThreadResult->DeallocationCount = 0;
4812         outThreadResult->LostAllocationCount = 0;
4813         outThreadResult->LostAllocationTotalSize = 0;
4814         outThreadResult->FailedAllocationCount = 0;
4815         outThreadResult->FailedAllocationTotalSize = 0;
4816 
4817         struct Item
4818         {
4819             VkDeviceSize BufferSize = 0;
4820             VkExtent2D ImageSize = { 0, 0 };
4821             VkBuffer Buf = VK_NULL_HANDLE;
4822             VkImage Image = VK_NULL_HANDLE;
4823             VmaAllocation Alloc = VK_NULL_HANDLE;
4824 
4825             Item() { }
4826             Item(Item&& src) :
4827                 BufferSize(src.BufferSize), ImageSize(src.ImageSize), Buf(src.Buf), Image(src.Image), Alloc(src.Alloc)
4828             {
4829                 src.BufferSize = 0;
4830                 src.ImageSize = {0, 0};
4831                 src.Buf = VK_NULL_HANDLE;
4832                 src.Image = VK_NULL_HANDLE;
4833                 src.Alloc = VK_NULL_HANDLE;
4834             }
4835             Item(const Item& src) = delete;
4836             ~Item()
4837             {
4838                 DestroyResources();
4839             }
4840             Item& operator=(Item&& src)
4841             {
4842                 if(&src != this)
4843                 {
4844                     DestroyResources();
4845                     BufferSize = src.BufferSize; ImageSize = src.ImageSize;
4846                     Buf = src.Buf; Image = src.Image; Alloc = src.Alloc;
4847                     src.BufferSize = 0;
4848                     src.ImageSize = {0, 0};
4849                     src.Buf = VK_NULL_HANDLE;
4850                     src.Image = VK_NULL_HANDLE;
4851                     src.Alloc = VK_NULL_HANDLE;
4852                 }
4853                 return *this;
4854             }
4855             Item& operator=(const Item& src) = delete;
4856             void DestroyResources()
4857             {
4858                 if(Buf)
4859                 {
4860                     assert(Image == VK_NULL_HANDLE);
4861                     vmaDestroyBuffer(g_hAllocator, Buf, Alloc);
4862                     Buf = VK_NULL_HANDLE;
4863                 }
4864                 else
4865                 {
4866                     vmaDestroyImage(g_hAllocator, Image, Alloc);
4867                     Image = VK_NULL_HANDLE;
4868                 }
4869                 Alloc = VK_NULL_HANDLE;
4870             }
4871             VkDeviceSize CalcSizeBytes() const
4872             {
4873                 return BufferSize +
4874                     4ull * ImageSize.width * ImageSize.height;
4875             }
4876         };
4877         std::vector<Item> unusedItems, usedItems;
4878 
4879         const size_t threadTotalItemCount = config.TotalItemCount / config.ThreadCount;
4880 
4881         // Create all items - all unused, not yet allocated.
4882         for(size_t i = 0; i < threadTotalItemCount; ++i)
4883         {
4884             Item item = {};
4885 
4886             uint32_t allocSizeIndex = 0;
4887             uint32_t r = threadRand.Generate() % allocationSizeProbabilitySum;
4888             while(r >= config.AllocationSizes[allocSizeIndex].Probability)
4889                 r -= config.AllocationSizes[allocSizeIndex++].Probability;
4890 
4891             const AllocationSize& allocSize = config.AllocationSizes[allocSizeIndex];
4892             if(allocSize.BufferSizeMax > 0)
4893             {
4894                 TEST(allocSize.BufferSizeMin > 0);
4895                 TEST(allocSize.ImageSizeMin == 0 && allocSize.ImageSizeMax == 0);
4896                 if(allocSize.BufferSizeMax == allocSize.BufferSizeMin)
4897                     item.BufferSize = allocSize.BufferSizeMin;
4898                 else
4899                 {
4900                     item.BufferSize = allocSize.BufferSizeMin + threadRand.Generate() % (allocSize.BufferSizeMax - allocSize.BufferSizeMin);
4901                     item.BufferSize = item.BufferSize / 16 * 16;
4902                 }
4903             }
4904             else
4905             {
4906                 TEST(allocSize.ImageSizeMin > 0 && allocSize.ImageSizeMax > 0);
4907                 if(allocSize.ImageSizeMax == allocSize.ImageSizeMin)
4908                     item.ImageSize.width = item.ImageSize.height = allocSize.ImageSizeMax;
4909                 else
4910                 {
4911                     item.ImageSize.width  = allocSize.ImageSizeMin + threadRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
4912                     item.ImageSize.height = allocSize.ImageSizeMin + threadRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
4913                 }
4914             }
4915 
4916             unusedItems.push_back(std::move(item));
4917         }
4918 
4919         auto Allocate = [&](Item& item) -> VkResult
4920         {
4921             assert(item.Buf == VK_NULL_HANDLE && item.Image == VK_NULL_HANDLE && item.Alloc == VK_NULL_HANDLE);
4922 
4923             VmaAllocationCreateInfo allocCreateInfo = {};
4924             allocCreateInfo.pool = pool;
4925             allocCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT |
4926                 VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
4927 
4928             if(item.BufferSize)
4929             {
4930                 bufferInfo.size = item.BufferSize;
4931                 VkResult res = VK_SUCCESS;
4932                 {
4933                     PoolAllocationTimeRegisterObj timeRegisterObj(*outThreadResult);
4934                     res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocCreateInfo, &item.Buf, &item.Alloc, nullptr);
4935                 }
4936                 if(res == VK_SUCCESS)
4937                     SetDebugUtilsObjectName(VK_OBJECT_TYPE_BUFFER, (uint64_t)item.Buf, "TestPool_Benchmark_Buffer");
4938                 return res;
4939             }
4940             else
4941             {
4942                 TEST(item.ImageSize.width && item.ImageSize.height);
4943 
4944                 imageInfo.extent.width = item.ImageSize.width;
4945                 imageInfo.extent.height = item.ImageSize.height;
4946                 VkResult res = VK_SUCCESS;
4947                 {
4948                     PoolAllocationTimeRegisterObj timeRegisterObj(*outThreadResult);
4949                     res = vmaCreateImage(g_hAllocator, &imageInfo, &allocCreateInfo, &item.Image, &item.Alloc, nullptr);
4950                 }
4951                 if(res == VK_SUCCESS)
4952                     SetDebugUtilsObjectName(VK_OBJECT_TYPE_IMAGE, (uint64_t)item.Image, "TestPool_Benchmark_Image");
4953                 return res;
4954             }
4955         };
4956 
4957         ////////////////////////////////////////////////////////////////////////////////
4958         // Frames
4959         for(uint32_t frameIndex = 0; frameIndex < config.FrameCount; ++frameIndex)
4960         {
4961             WaitForSingleObject(frameStartEvent, INFINITE);
4962 
4963             // Always make some percent of used bufs unused, to choose different used ones.
4964             const size_t bufsToMakeUnused = usedItems.size() * config.ItemsToMakeUnusedPercent / 100;
4965             for(size_t i = 0; i < bufsToMakeUnused; ++i)
4966             {
4967                 size_t index = threadRand.Generate() % usedItems.size();
4968                 auto it = usedItems.begin() + index;
4969                 Item item = std::move(*it);
4970                 usedItems.erase(it);
4971                 unusedItems.push_back(std::move(item));
4972             }
4973 
4974             // Determine which bufs we want to use in this frame.
4975             const size_t usedBufCount = (threadRand.Generate() % (config.UsedItemCountMax - config.UsedItemCountMin) + config.UsedItemCountMin)
4976                 / config.ThreadCount;
4977             TEST(usedBufCount < usedItems.size() + unusedItems.size());
4978             // Move some used to unused.
4979             while(usedBufCount < usedItems.size())
4980             {
4981                 size_t index = threadRand.Generate() % usedItems.size();
4982                 auto it = usedItems.begin() + index;
4983                 Item item = std::move(*it);
4984                 usedItems.erase(it);
4985                 unusedItems.push_back(std::move(item));
4986             }
4987             // Move some unused to used.
4988             while(usedBufCount > usedItems.size())
4989             {
4990                 size_t index = threadRand.Generate() % unusedItems.size();
4991                 auto it = unusedItems.begin() + index;
4992                 Item item = std::move(*it);
4993                 unusedItems.erase(it);
4994                 usedItems.push_back(std::move(item));
4995             }
4996 
4997             uint32_t touchExistingCount = 0;
4998             uint32_t touchLostCount = 0;
4999             uint32_t createSucceededCount = 0;
5000             uint32_t createFailedCount = 0;
5001 
5002             // Touch all used bufs. If not created or lost, allocate.
5003             for(size_t i = 0; i < usedItems.size(); ++i)
5004             {
5005                 Item& item = usedItems[i];
5006                 // Not yet created.
5007                 if(item.Alloc == VK_NULL_HANDLE)
5008                 {
5009                     res = Allocate(item);
5010                     ++outThreadResult->AllocationCount;
5011                     if(res != VK_SUCCESS)
5012                     {
5013                         assert(item.Alloc == VK_NULL_HANDLE && item.Buf == VK_NULL_HANDLE && item.Image == VK_NULL_HANDLE);
5014                         ++outThreadResult->FailedAllocationCount;
5015                         outThreadResult->FailedAllocationTotalSize += item.CalcSizeBytes();
5016                         ++createFailedCount;
5017                     }
5018                     else
5019                         ++createSucceededCount;
5020                 }
5021                 else
5022                 {
5023                     // Touch.
5024                     VmaAllocationInfo allocInfo;
5025                     vmaGetAllocationInfo(g_hAllocator, item.Alloc, &allocInfo);
5026                     // Lost.
5027                     if(allocInfo.deviceMemory == VK_NULL_HANDLE)
5028                     {
5029                         ++touchLostCount;
5030 
5031                         // Destroy.
5032                         {
5033                             PoolDeallocationTimeRegisterObj timeRegisterObj(*outThreadResult);
5034                             item.DestroyResources();
5035                             ++outThreadResult->DeallocationCount;
5036                         }
5037 
5038                         ++outThreadResult->LostAllocationCount;
5039                         outThreadResult->LostAllocationTotalSize += item.CalcSizeBytes();
5040 
5041                         // Recreate.
5042                         res = Allocate(item);
5043                         ++outThreadResult->AllocationCount;
5044                         // Creation failed.
5045                         if(res != VK_SUCCESS)
5046                         {
5047                             TEST(item.Alloc == VK_NULL_HANDLE && item.Buf == VK_NULL_HANDLE && item.Image == VK_NULL_HANDLE);
5048                             ++outThreadResult->FailedAllocationCount;
5049                             outThreadResult->FailedAllocationTotalSize += item.CalcSizeBytes();
5050                             ++createFailedCount;
5051                         }
5052                         else
5053                             ++createSucceededCount;
5054                     }
5055                     else
5056                         ++touchExistingCount;
5057                 }
5058             }
5059 
5060             /*
5061             printf("Thread %u frame %u: Touch existing %u lost %u, create succeeded %u failed %u\n",
5062                 randSeed, frameIndex,
5063                 touchExistingCount, touchLostCount,
5064                 createSucceededCount, createFailedCount);
5065             */
5066 
5067             SetEvent(frameEndEvent);
5068         }
5069 
5070         // Free all remaining items.
5071         for(size_t i = usedItems.size(); i--; )
5072         {
5073             PoolDeallocationTimeRegisterObj timeRegisterObj(*outThreadResult);
5074             usedItems[i].DestroyResources();
5075             ++outThreadResult->DeallocationCount;
5076         }
5077         for(size_t i = unusedItems.size(); i--; )
5078         {
5079             PoolDeallocationTimeRegisterObj timeRegisterOb(*outThreadResult);
5080             unusedItems[i].DestroyResources();
5081             ++outThreadResult->DeallocationCount;
5082         }
5083     };
5084 
5085     // Launch threads.
5086     uint32_t threadRandSeed = mainRand.Generate();
5087     std::vector<HANDLE> frameStartEvents{config.ThreadCount};
5088     std::vector<HANDLE> frameEndEvents{config.ThreadCount};
5089     std::vector<std::thread> bkgThreads;
5090     std::vector<PoolTestThreadResult> threadResults{config.ThreadCount};
5091     for(uint32_t threadIndex = 0; threadIndex < config.ThreadCount; ++threadIndex)
5092     {
5093         frameStartEvents[threadIndex] = CreateEvent(NULL, FALSE, FALSE, NULL);
5094         frameEndEvents[threadIndex] = CreateEvent(NULL, FALSE, FALSE, NULL);
5095         bkgThreads.emplace_back(std::bind(
5096             ThreadProc,
5097             &threadResults[threadIndex],
5098             threadRandSeed + threadIndex,
5099             frameStartEvents[threadIndex],
5100             frameEndEvents[threadIndex]));
5101     }
5102 
5103     // Execute frames.
5104     TEST(config.ThreadCount <= MAXIMUM_WAIT_OBJECTS);
5105     for(uint32_t frameIndex = 0; frameIndex < config.FrameCount; ++frameIndex)
5106     {
5107         vmaSetCurrentFrameIndex(g_hAllocator, frameIndex);
5108         for(size_t threadIndex = 0; threadIndex < config.ThreadCount; ++threadIndex)
5109             SetEvent(frameStartEvents[threadIndex]);
5110         WaitForMultipleObjects(config.ThreadCount, &frameEndEvents[0], TRUE, INFINITE);
5111     }
5112 
5113     // Wait for threads finished
5114     for(size_t i = 0; i < bkgThreads.size(); ++i)
5115     {
5116         bkgThreads[i].join();
5117         CloseHandle(frameEndEvents[i]);
5118         CloseHandle(frameStartEvents[i]);
5119     }
5120     bkgThreads.clear();
5121 
5122     // Finish time measurement - before destroying pool.
5123     outResult.TotalTime = std::chrono::high_resolution_clock::now() - timeBeg;
5124 
5125     vmaDestroyPool(g_hAllocator, pool);
5126 
5127     outResult.AllocationTimeMin = duration::max();
5128     outResult.AllocationTimeAvg = duration::zero();
5129     outResult.AllocationTimeMax = duration::min();
5130     outResult.DeallocationTimeMin = duration::max();
5131     outResult.DeallocationTimeAvg = duration::zero();
5132     outResult.DeallocationTimeMax = duration::min();
5133     outResult.LostAllocationCount = 0;
5134     outResult.LostAllocationTotalSize = 0;
5135     outResult.FailedAllocationCount = 0;
5136     outResult.FailedAllocationTotalSize = 0;
5137     size_t allocationCount = 0;
5138     size_t deallocationCount = 0;
5139     for(size_t threadIndex = 0; threadIndex < config.ThreadCount; ++threadIndex)
5140     {
5141         const PoolTestThreadResult& threadResult = threadResults[threadIndex];
5142         outResult.AllocationTimeMin = std::min(outResult.AllocationTimeMin, threadResult.AllocationTimeMin);
5143         outResult.AllocationTimeMax = std::max(outResult.AllocationTimeMax, threadResult.AllocationTimeMax);
5144         outResult.AllocationTimeAvg += threadResult.AllocationTimeSum;
5145         outResult.DeallocationTimeMin = std::min(outResult.DeallocationTimeMin, threadResult.DeallocationTimeMin);
5146         outResult.DeallocationTimeMax = std::max(outResult.DeallocationTimeMax, threadResult.DeallocationTimeMax);
5147         outResult.DeallocationTimeAvg += threadResult.DeallocationTimeSum;
5148         allocationCount += threadResult.AllocationCount;
5149         deallocationCount += threadResult.DeallocationCount;
5150         outResult.FailedAllocationCount += threadResult.FailedAllocationCount;
5151         outResult.FailedAllocationTotalSize += threadResult.FailedAllocationTotalSize;
5152         outResult.LostAllocationCount += threadResult.LostAllocationCount;
5153         outResult.LostAllocationTotalSize += threadResult.LostAllocationTotalSize;
5154     }
5155     if(allocationCount)
5156         outResult.AllocationTimeAvg /= allocationCount;
5157     if(deallocationCount)
5158         outResult.DeallocationTimeAvg /= deallocationCount;
5159 }
5160 
MemoryRegionsOverlap(char * ptr1,size_t size1,char * ptr2,size_t size2)5161 static inline bool MemoryRegionsOverlap(char* ptr1, size_t size1, char* ptr2, size_t size2)
5162 {
5163     if(ptr1 < ptr2)
5164         return ptr1 + size1 > ptr2;
5165     else if(ptr2 < ptr1)
5166         return ptr2 + size2 > ptr1;
5167     else
5168         return true;
5169 }
5170 
TestMemoryUsage()5171 static void TestMemoryUsage()
5172 {
5173     wprintf(L"Testing memory usage:\n");
5174 
5175     static const VmaMemoryUsage lastUsage = VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED;
5176     for(uint32_t usage = 0; usage <= lastUsage; ++usage)
5177     {
5178         switch(usage)
5179         {
5180         case VMA_MEMORY_USAGE_UNKNOWN: printf("  VMA_MEMORY_USAGE_UNKNOWN:\n"); break;
5181         case VMA_MEMORY_USAGE_GPU_ONLY: printf("  VMA_MEMORY_USAGE_GPU_ONLY:\n"); break;
5182         case VMA_MEMORY_USAGE_CPU_ONLY: printf("  VMA_MEMORY_USAGE_CPU_ONLY:\n"); break;
5183         case VMA_MEMORY_USAGE_CPU_TO_GPU: printf("  VMA_MEMORY_USAGE_CPU_TO_GPU:\n"); break;
5184         case VMA_MEMORY_USAGE_GPU_TO_CPU: printf("  VMA_MEMORY_USAGE_GPU_TO_CPU:\n"); break;
5185         case VMA_MEMORY_USAGE_CPU_COPY: printf("  VMA_MEMORY_USAGE_CPU_COPY:\n"); break;
5186         case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: printf("  VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED:\n"); break;
5187         default: assert(0);
5188         }
5189 
5190         auto printResult = [](const char* testName, VkResult res, uint32_t memoryTypeBits, uint32_t memoryTypeIndex)
5191         {
5192             if(res == VK_SUCCESS)
5193                 printf("    %s: memoryTypeBits=0x%X, memoryTypeIndex=%u\n", testName, memoryTypeBits, memoryTypeIndex);
5194             else
5195                 printf("    %s: memoryTypeBits=0x%X, FAILED with res=%d\n", testName, memoryTypeBits, (int32_t)res);
5196         };
5197 
5198         // 1: Buffer for copy
5199         {
5200             VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5201             bufCreateInfo.size = 65536;
5202             bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
5203 
5204             VkBuffer buf = VK_NULL_HANDLE;
5205             VkResult res = vkCreateBuffer(g_hDevice, &bufCreateInfo, g_Allocs, &buf);
5206             TEST(res == VK_SUCCESS && buf != VK_NULL_HANDLE);
5207 
5208             VkMemoryRequirements memReq = {};
5209             vkGetBufferMemoryRequirements(g_hDevice, buf, &memReq);
5210 
5211             VmaAllocationCreateInfo allocCreateInfo = {};
5212             allocCreateInfo.usage = (VmaMemoryUsage)usage;
5213             VmaAllocation alloc = VK_NULL_HANDLE;
5214             VmaAllocationInfo allocInfo = {};
5215             res = vmaAllocateMemoryForBuffer(g_hAllocator, buf, &allocCreateInfo, &alloc, &allocInfo);
5216             if(res == VK_SUCCESS)
5217             {
5218                 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
5219                 res = vkBindBufferMemory(g_hDevice, buf, allocInfo.deviceMemory, allocInfo.offset);
5220                 TEST(res == VK_SUCCESS);
5221             }
5222             printResult("Buffer TRANSFER_DST + TRANSFER_SRC", res, memReq.memoryTypeBits, allocInfo.memoryType);
5223             vmaDestroyBuffer(g_hAllocator, buf, alloc);
5224         }
5225 
5226         // 2: Vertex buffer
5227         {
5228             VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5229             bufCreateInfo.size = 65536;
5230             bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
5231 
5232             VkBuffer buf = VK_NULL_HANDLE;
5233             VkResult res = vkCreateBuffer(g_hDevice, &bufCreateInfo, g_Allocs, &buf);
5234             TEST(res == VK_SUCCESS && buf != VK_NULL_HANDLE);
5235 
5236             VkMemoryRequirements memReq = {};
5237             vkGetBufferMemoryRequirements(g_hDevice, buf, &memReq);
5238 
5239             VmaAllocationCreateInfo allocCreateInfo = {};
5240             allocCreateInfo.usage = (VmaMemoryUsage)usage;
5241             VmaAllocation alloc = VK_NULL_HANDLE;
5242             VmaAllocationInfo allocInfo = {};
5243             res = vmaAllocateMemoryForBuffer(g_hAllocator, buf, &allocCreateInfo, &alloc, &allocInfo);
5244             if(res == VK_SUCCESS)
5245             {
5246                 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
5247                 res = vkBindBufferMemory(g_hDevice, buf, allocInfo.deviceMemory, allocInfo.offset);
5248                 TEST(res == VK_SUCCESS);
5249             }
5250             printResult("Buffer TRANSFER_DST + VERTEX_BUFFER", res, memReq.memoryTypeBits, allocInfo.memoryType);
5251             vmaDestroyBuffer(g_hAllocator, buf, alloc);
5252         }
5253 
5254         // 3: Image for copy, OPTIMAL
5255         {
5256             VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
5257             imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
5258             imgCreateInfo.extent.width = 256;
5259             imgCreateInfo.extent.height = 256;
5260             imgCreateInfo.extent.depth = 1;
5261             imgCreateInfo.mipLevels = 1;
5262             imgCreateInfo.arrayLayers = 1;
5263             imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
5264             imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
5265             imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
5266             imgCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
5267             imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
5268 
5269             VkImage img = VK_NULL_HANDLE;
5270             VkResult res = vkCreateImage(g_hDevice, &imgCreateInfo, g_Allocs, &img);
5271             TEST(res == VK_SUCCESS && img != VK_NULL_HANDLE);
5272 
5273             VkMemoryRequirements memReq = {};
5274             vkGetImageMemoryRequirements(g_hDevice, img, &memReq);
5275 
5276             VmaAllocationCreateInfo allocCreateInfo = {};
5277             allocCreateInfo.usage = (VmaMemoryUsage)usage;
5278             VmaAllocation alloc = VK_NULL_HANDLE;
5279             VmaAllocationInfo allocInfo = {};
5280             res = vmaAllocateMemoryForImage(g_hAllocator, img, &allocCreateInfo, &alloc, &allocInfo);
5281             if(res == VK_SUCCESS)
5282             {
5283                 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
5284                 res = vkBindImageMemory(g_hDevice, img, allocInfo.deviceMemory, allocInfo.offset);
5285                 TEST(res == VK_SUCCESS);
5286             }
5287             printResult("Image OPTIMAL TRANSFER_DST + TRANSFER_SRC", res, memReq.memoryTypeBits, allocInfo.memoryType);
5288 
5289             vmaDestroyImage(g_hAllocator, img, alloc);
5290         }
5291 
5292         // 4: Image SAMPLED, OPTIMAL
5293         {
5294             VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
5295             imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
5296             imgCreateInfo.extent.width = 256;
5297             imgCreateInfo.extent.height = 256;
5298             imgCreateInfo.extent.depth = 1;
5299             imgCreateInfo.mipLevels = 1;
5300             imgCreateInfo.arrayLayers = 1;
5301             imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
5302             imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
5303             imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
5304             imgCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
5305             imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
5306 
5307             VkImage img = VK_NULL_HANDLE;
5308             VkResult res = vkCreateImage(g_hDevice, &imgCreateInfo, g_Allocs, &img);
5309             TEST(res == VK_SUCCESS && img != VK_NULL_HANDLE);
5310 
5311             VkMemoryRequirements memReq = {};
5312             vkGetImageMemoryRequirements(g_hDevice, img, &memReq);
5313 
5314             VmaAllocationCreateInfo allocCreateInfo = {};
5315             allocCreateInfo.usage = (VmaMemoryUsage)usage;
5316             VmaAllocation alloc = VK_NULL_HANDLE;
5317             VmaAllocationInfo allocInfo = {};
5318             res = vmaAllocateMemoryForImage(g_hAllocator, img, &allocCreateInfo, &alloc, &allocInfo);
5319             if(res == VK_SUCCESS)
5320             {
5321                 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
5322                 res = vkBindImageMemory(g_hDevice, img, allocInfo.deviceMemory, allocInfo.offset);
5323                 TEST(res == VK_SUCCESS);
5324             }
5325             printResult("Image OPTIMAL TRANSFER_DST + SAMPLED", res, memReq.memoryTypeBits, allocInfo.memoryType);
5326             vmaDestroyImage(g_hAllocator, img, alloc);
5327         }
5328 
5329         // 5: Image COLOR_ATTACHMENT, OPTIMAL
5330         {
5331             VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
5332             imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
5333             imgCreateInfo.extent.width = 256;
5334             imgCreateInfo.extent.height = 256;
5335             imgCreateInfo.extent.depth = 1;
5336             imgCreateInfo.mipLevels = 1;
5337             imgCreateInfo.arrayLayers = 1;
5338             imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
5339             imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
5340             imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
5341             imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
5342             imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
5343 
5344             VkImage img = VK_NULL_HANDLE;
5345             VkResult res = vkCreateImage(g_hDevice, &imgCreateInfo, g_Allocs, &img);
5346             TEST(res == VK_SUCCESS && img != VK_NULL_HANDLE);
5347 
5348             VkMemoryRequirements memReq = {};
5349             vkGetImageMemoryRequirements(g_hDevice, img, &memReq);
5350 
5351             VmaAllocationCreateInfo allocCreateInfo = {};
5352             allocCreateInfo.usage = (VmaMemoryUsage)usage;
5353             VmaAllocation alloc = VK_NULL_HANDLE;
5354             VmaAllocationInfo allocInfo = {};
5355             res = vmaAllocateMemoryForImage(g_hAllocator, img, &allocCreateInfo, &alloc, &allocInfo);
5356             if(res == VK_SUCCESS)
5357             {
5358                 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
5359                 res = vkBindImageMemory(g_hDevice, img, allocInfo.deviceMemory, allocInfo.offset);
5360                 TEST(res == VK_SUCCESS);
5361             }
5362             printResult("Image OPTIMAL SAMPLED + COLOR_ATTACHMENT", res, memReq.memoryTypeBits, allocInfo.memoryType);
5363             vmaDestroyImage(g_hAllocator, img, alloc);
5364         }
5365     }
5366 }
5367 
FindDeviceCoherentMemoryTypeBits()5368 static uint32_t FindDeviceCoherentMemoryTypeBits()
5369 {
5370     VkPhysicalDeviceMemoryProperties memProps;
5371     vkGetPhysicalDeviceMemoryProperties(g_hPhysicalDevice, &memProps);
5372 
5373     uint32_t memTypeBits = 0;
5374     for(uint32_t i = 0; i < memProps.memoryTypeCount; ++i)
5375     {
5376         if(memProps.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD)
5377             memTypeBits |= 1u << i;
5378     }
5379     return memTypeBits;
5380 }
5381 
TestDeviceCoherentMemory()5382 static void TestDeviceCoherentMemory()
5383 {
5384     if(!VK_AMD_device_coherent_memory_enabled)
5385         return;
5386 
5387     uint32_t deviceCoherentMemoryTypeBits = FindDeviceCoherentMemoryTypeBits();
5388     // Extension is enabled, feature is enabled, and the device still doesn't support any such memory type?
5389     // OK then, so it's just fake!
5390     if(deviceCoherentMemoryTypeBits == 0)
5391         return;
5392 
5393     wprintf(L"Testing device coherent memory...\n");
5394 
5395     // 1. Try to allocate buffer from a memory type that is DEVICE_COHERENT.
5396 
5397     VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5398     bufCreateInfo.size = 0x10000;
5399     bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
5400 
5401     VmaAllocationCreateInfo allocCreateInfo = {};
5402     allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
5403     allocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD;
5404 
5405     AllocInfo alloc = {};
5406     VmaAllocationInfo allocInfo = {};
5407     VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &alloc.m_Buffer, &alloc.m_Allocation, &allocInfo);
5408 
5409     // Make sure it succeeded and was really created in such memory type.
5410     TEST(res == VK_SUCCESS);
5411     TEST((1u << allocInfo.memoryType) & deviceCoherentMemoryTypeBits);
5412 
5413     alloc.Destroy();
5414 
5415     // 2. Try to create a pool in such memory type.
5416     {
5417         VmaPoolCreateInfo poolCreateInfo = {};
5418 
5419         res = vmaFindMemoryTypeIndex(g_hAllocator, UINT32_MAX, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex);
5420         TEST(res == VK_SUCCESS);
5421         TEST((1u << poolCreateInfo.memoryTypeIndex) & deviceCoherentMemoryTypeBits);
5422 
5423         VmaPool pool = VK_NULL_HANDLE;
5424         res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
5425         TEST(res == VK_SUCCESS);
5426 
5427         vmaDestroyPool(g_hAllocator, pool);
5428     }
5429 
5430     // 3. Try the same with a local allocator created without VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT.
5431 
5432     VmaAllocatorCreateInfo allocatorCreateInfo = {};
5433     SetAllocatorCreateInfo(allocatorCreateInfo);
5434     allocatorCreateInfo.flags &= ~VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT;
5435 
5436     VmaAllocator localAllocator = VK_NULL_HANDLE;
5437     res = vmaCreateAllocator(&allocatorCreateInfo, &localAllocator);
5438     TEST(res == VK_SUCCESS && localAllocator);
5439 
5440     res = vmaCreateBuffer(localAllocator, &bufCreateInfo, &allocCreateInfo, &alloc.m_Buffer, &alloc.m_Allocation, &allocInfo);
5441 
5442     // Make sure it failed.
5443     TEST(res != VK_SUCCESS && !alloc.m_Buffer && !alloc.m_Allocation);
5444 
5445     // 4. Try to find memory type.
5446     {
5447         uint32_t memTypeIndex = UINT_MAX;
5448         res = vmaFindMemoryTypeIndex(localAllocator, UINT32_MAX, &allocCreateInfo, &memTypeIndex);
5449         TEST(res != VK_SUCCESS);
5450     }
5451 
5452     vmaDestroyAllocator(localAllocator);
5453 }
5454 
TestBudget()5455 static void TestBudget()
5456 {
5457     wprintf(L"Testing budget...\n");
5458 
5459     static const VkDeviceSize BUF_SIZE = 10ull * 1024 * 1024;
5460     static const uint32_t BUF_COUNT = 4;
5461 
5462     const VkPhysicalDeviceMemoryProperties* memProps = {};
5463     vmaGetMemoryProperties(g_hAllocator, &memProps);
5464 
5465     for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
5466     {
5467         vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
5468 
5469         VmaBudget budgetBeg[VK_MAX_MEMORY_HEAPS] = {};
5470         vmaGetBudget(g_hAllocator, budgetBeg);
5471 
5472         for(uint32_t i = 0; i < memProps->memoryHeapCount; ++i)
5473         {
5474             TEST(budgetBeg[i].budget > 0);
5475             TEST(budgetBeg[i].budget <= memProps->memoryHeaps[i].size);
5476             TEST(budgetBeg[i].allocationBytes <= budgetBeg[i].blockBytes);
5477         }
5478 
5479         VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5480         bufInfo.size = BUF_SIZE;
5481         bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
5482 
5483         VmaAllocationCreateInfo allocCreateInfo = {};
5484         allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
5485         if(testIndex == 0)
5486         {
5487             allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
5488         }
5489 
5490         // CREATE BUFFERS
5491         uint32_t heapIndex = 0;
5492         BufferInfo bufInfos[BUF_COUNT] = {};
5493         for(uint32_t bufIndex = 0; bufIndex < BUF_COUNT; ++bufIndex)
5494         {
5495             VmaAllocationInfo allocInfo;
5496             VkResult res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo,
5497                 &bufInfos[bufIndex].Buffer, &bufInfos[bufIndex].Allocation, &allocInfo);
5498             TEST(res == VK_SUCCESS);
5499             if(bufIndex == 0)
5500             {
5501                 heapIndex = MemoryTypeToHeap(allocInfo.memoryType);
5502             }
5503             else
5504             {
5505                 // All buffers need to fall into the same heap.
5506                 TEST(MemoryTypeToHeap(allocInfo.memoryType) == heapIndex);
5507             }
5508         }
5509 
5510         VmaBudget budgetWithBufs[VK_MAX_MEMORY_HEAPS] = {};
5511         vmaGetBudget(g_hAllocator, budgetWithBufs);
5512 
5513         // DESTROY BUFFERS
5514         for(size_t bufIndex = BUF_COUNT; bufIndex--; )
5515         {
5516             vmaDestroyBuffer(g_hAllocator, bufInfos[bufIndex].Buffer, bufInfos[bufIndex].Allocation);
5517         }
5518 
5519         VmaBudget budgetEnd[VK_MAX_MEMORY_HEAPS] = {};
5520         vmaGetBudget(g_hAllocator, budgetEnd);
5521 
5522         // CHECK
5523         for(uint32_t i = 0; i < memProps->memoryHeapCount; ++i)
5524         {
5525             TEST(budgetEnd[i].allocationBytes <= budgetEnd[i].blockBytes);
5526             if(i == heapIndex)
5527             {
5528                 TEST(budgetEnd[i].allocationBytes == budgetBeg[i].allocationBytes);
5529                 TEST(budgetWithBufs[i].allocationBytes == budgetBeg[i].allocationBytes + BUF_SIZE * BUF_COUNT);
5530                 TEST(budgetWithBufs[i].blockBytes >= budgetEnd[i].blockBytes);
5531             }
5532             else
5533             {
5534                 TEST(budgetEnd[i].allocationBytes == budgetEnd[i].allocationBytes &&
5535                     budgetEnd[i].allocationBytes == budgetWithBufs[i].allocationBytes);
5536                 TEST(budgetEnd[i].blockBytes == budgetEnd[i].blockBytes &&
5537                     budgetEnd[i].blockBytes == budgetWithBufs[i].blockBytes);
5538             }
5539         }
5540     }
5541 }
5542 
TestAliasing()5543 static void TestAliasing()
5544 {
5545     wprintf(L"Testing aliasing...\n");
5546 
5547     /*
5548     This is just a simple test, more like a code sample to demonstrate it's possible.
5549     */
5550 
5551     // A 512x512 texture to be sampled.
5552     VkImageCreateInfo img1CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
5553     img1CreateInfo.imageType = VK_IMAGE_TYPE_2D;
5554     img1CreateInfo.extent.width = 512;
5555     img1CreateInfo.extent.height = 512;
5556     img1CreateInfo.extent.depth = 1;
5557     img1CreateInfo.mipLevels = 10;
5558     img1CreateInfo.arrayLayers = 1;
5559     img1CreateInfo.format = VK_FORMAT_R8G8B8A8_SRGB;
5560     img1CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
5561     img1CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
5562     img1CreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
5563     img1CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
5564 
5565     // A full screen texture to be used as color attachment.
5566     VkImageCreateInfo img2CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
5567     img2CreateInfo.imageType = VK_IMAGE_TYPE_2D;
5568     img2CreateInfo.extent.width = 1920;
5569     img2CreateInfo.extent.height = 1080;
5570     img2CreateInfo.extent.depth = 1;
5571     img2CreateInfo.mipLevels = 1;
5572     img2CreateInfo.arrayLayers = 1;
5573     img2CreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
5574     img2CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
5575     img2CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
5576     img2CreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
5577     img2CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
5578 
5579     VkImage img1 = VK_NULL_HANDLE;
5580     ERR_GUARD_VULKAN(vkCreateImage(g_hDevice, &img1CreateInfo, g_Allocs, &img1));
5581     VkImage img2 = VK_NULL_HANDLE;
5582     ERR_GUARD_VULKAN(vkCreateImage(g_hDevice, &img2CreateInfo, g_Allocs, &img2));
5583 
5584     VkMemoryRequirements img1MemReq = {};
5585     vkGetImageMemoryRequirements(g_hDevice, img1, &img1MemReq);
5586     VkMemoryRequirements img2MemReq = {};
5587     vkGetImageMemoryRequirements(g_hDevice, img2, &img2MemReq);
5588 
5589     VkMemoryRequirements finalMemReq = {};
5590     finalMemReq.size = std::max(img1MemReq.size, img2MemReq.size);
5591     finalMemReq.alignment = std::max(img1MemReq.alignment, img2MemReq.alignment);
5592     finalMemReq.memoryTypeBits = img1MemReq.memoryTypeBits & img2MemReq.memoryTypeBits;
5593     if(finalMemReq.memoryTypeBits != 0)
5594     {
5595         wprintf(L"  size: max(%llu, %llu) = %llu\n",
5596             img1MemReq.size, img2MemReq.size, finalMemReq.size);
5597         wprintf(L"  alignment: max(%llu, %llu) = %llu\n",
5598             img1MemReq.alignment, img2MemReq.alignment, finalMemReq.alignment);
5599         wprintf(L"  memoryTypeBits: %u & %u = %u\n",
5600             img1MemReq.memoryTypeBits, img2MemReq.memoryTypeBits, finalMemReq.memoryTypeBits);
5601 
5602         VmaAllocationCreateInfo allocCreateInfo = {};
5603         allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
5604 
5605         VmaAllocation alloc = VK_NULL_HANDLE;
5606         ERR_GUARD_VULKAN(vmaAllocateMemory(g_hAllocator, &finalMemReq, &allocCreateInfo, &alloc, nullptr));
5607 
5608         ERR_GUARD_VULKAN(vmaBindImageMemory(g_hAllocator, alloc, img1));
5609         ERR_GUARD_VULKAN(vmaBindImageMemory(g_hAllocator, alloc, img2));
5610 
5611         // You can use img1, img2 here, but not at the same time!
5612 
5613         vmaFreeMemory(g_hAllocator, alloc);
5614     }
5615     else
5616     {
5617         wprintf(L"  Textures cannot alias!\n");
5618     }
5619 
5620     vkDestroyImage(g_hDevice, img2, g_Allocs);
5621     vkDestroyImage(g_hDevice, img1, g_Allocs);
5622 }
5623 
TestMapping()5624 static void TestMapping()
5625 {
5626     wprintf(L"Testing mapping...\n");
5627 
5628     VkResult res;
5629     uint32_t memTypeIndex = UINT32_MAX;
5630 
5631     enum TEST
5632     {
5633         TEST_NORMAL,
5634         TEST_POOL,
5635         TEST_DEDICATED,
5636         TEST_COUNT
5637     };
5638     for(uint32_t testIndex = 0; testIndex < TEST_COUNT; ++testIndex)
5639     {
5640         VmaPool pool = nullptr;
5641         if(testIndex == TEST_POOL)
5642         {
5643             TEST(memTypeIndex != UINT32_MAX);
5644             VmaPoolCreateInfo poolInfo = {};
5645             poolInfo.memoryTypeIndex = memTypeIndex;
5646             res = vmaCreatePool(g_hAllocator, &poolInfo, &pool);
5647             TEST(res == VK_SUCCESS);
5648         }
5649 
5650         VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5651         bufInfo.size = 0x10000;
5652         bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
5653 
5654         VmaAllocationCreateInfo allocCreateInfo = {};
5655         allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
5656         allocCreateInfo.pool = pool;
5657         if(testIndex == TEST_DEDICATED)
5658             allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
5659 
5660         VmaAllocationInfo allocInfo;
5661 
5662         // Mapped manually
5663 
5664         // Create 2 buffers.
5665         BufferInfo bufferInfos[3];
5666         for(size_t i = 0; i < 2; ++i)
5667         {
5668             res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo,
5669                 &bufferInfos[i].Buffer, &bufferInfos[i].Allocation, &allocInfo);
5670             TEST(res == VK_SUCCESS);
5671             TEST(allocInfo.pMappedData == nullptr);
5672             memTypeIndex = allocInfo.memoryType;
5673         }
5674 
5675         // Map buffer 0.
5676         char* data00 = nullptr;
5677         res = vmaMapMemory(g_hAllocator, bufferInfos[0].Allocation, (void**)&data00);
5678         TEST(res == VK_SUCCESS && data00 != nullptr);
5679         data00[0xFFFF] = data00[0];
5680 
5681         // Map buffer 0 second time.
5682         char* data01 = nullptr;
5683         res = vmaMapMemory(g_hAllocator, bufferInfos[0].Allocation, (void**)&data01);
5684         TEST(res == VK_SUCCESS && data01 == data00);
5685 
5686         // Map buffer 1.
5687         char* data1 = nullptr;
5688         res = vmaMapMemory(g_hAllocator, bufferInfos[1].Allocation, (void**)&data1);
5689         TEST(res == VK_SUCCESS && data1 != nullptr);
5690         TEST(!MemoryRegionsOverlap(data00, (size_t)bufInfo.size, data1, (size_t)bufInfo.size));
5691         data1[0xFFFF] = data1[0];
5692 
5693         // Unmap buffer 0 two times.
5694         vmaUnmapMemory(g_hAllocator, bufferInfos[0].Allocation);
5695         vmaUnmapMemory(g_hAllocator, bufferInfos[0].Allocation);
5696         vmaGetAllocationInfo(g_hAllocator, bufferInfos[0].Allocation, &allocInfo);
5697         TEST(allocInfo.pMappedData == nullptr);
5698 
5699         // Unmap buffer 1.
5700         vmaUnmapMemory(g_hAllocator, bufferInfos[1].Allocation);
5701         vmaGetAllocationInfo(g_hAllocator, bufferInfos[1].Allocation, &allocInfo);
5702         TEST(allocInfo.pMappedData == nullptr);
5703 
5704         // Create 3rd buffer - persistently mapped.
5705         allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
5706         res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo,
5707             &bufferInfos[2].Buffer, &bufferInfos[2].Allocation, &allocInfo);
5708         TEST(res == VK_SUCCESS && allocInfo.pMappedData != nullptr);
5709 
5710         // Map buffer 2.
5711         char* data2 = nullptr;
5712         res = vmaMapMemory(g_hAllocator, bufferInfos[2].Allocation, (void**)&data2);
5713         TEST(res == VK_SUCCESS && data2 == allocInfo.pMappedData);
5714         data2[0xFFFF] = data2[0];
5715 
5716         // Unmap buffer 2.
5717         vmaUnmapMemory(g_hAllocator, bufferInfos[2].Allocation);
5718         vmaGetAllocationInfo(g_hAllocator, bufferInfos[2].Allocation, &allocInfo);
5719         TEST(allocInfo.pMappedData == data2);
5720 
5721         // Destroy all buffers.
5722         for(size_t i = 3; i--; )
5723             vmaDestroyBuffer(g_hAllocator, bufferInfos[i].Buffer, bufferInfos[i].Allocation);
5724 
5725         vmaDestroyPool(g_hAllocator, pool);
5726     }
5727 }
5728 
5729 // Test CREATE_MAPPED with required DEVICE_LOCAL. There was a bug with it.
TestDeviceLocalMapped()5730 static void TestDeviceLocalMapped()
5731 {
5732     VkResult res;
5733 
5734     for(uint32_t testIndex = 0; testIndex < 3; ++testIndex)
5735     {
5736         VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5737         bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
5738         bufCreateInfo.size = 4096;
5739 
5740         VmaPool pool = VK_NULL_HANDLE;
5741         VmaAllocationCreateInfo allocCreateInfo = {};
5742         allocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
5743         allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
5744         if(testIndex == 2)
5745         {
5746             VmaPoolCreateInfo poolCreateInfo = {};
5747             res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex);
5748             TEST(res == VK_SUCCESS);
5749             res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
5750             TEST(res == VK_SUCCESS);
5751             allocCreateInfo.pool = pool;
5752         }
5753         else if(testIndex == 1)
5754         {
5755             allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
5756         }
5757 
5758         VkBuffer buf = VK_NULL_HANDLE;
5759         VmaAllocation alloc = VK_NULL_HANDLE;
5760         VmaAllocationInfo allocInfo = {};
5761         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
5762         TEST(res == VK_SUCCESS && alloc);
5763 
5764         VkMemoryPropertyFlags memTypeFlags = 0;
5765         vmaGetMemoryTypeProperties(g_hAllocator, allocInfo.memoryType, &memTypeFlags);
5766         const bool shouldBeMapped = (memTypeFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
5767         TEST((allocInfo.pMappedData != nullptr) == shouldBeMapped);
5768 
5769         vmaDestroyBuffer(g_hAllocator, buf, alloc);
5770         vmaDestroyPool(g_hAllocator, pool);
5771     }
5772 }
5773 
TestMappingMultithreaded()5774 static void TestMappingMultithreaded()
5775 {
5776     wprintf(L"Testing mapping multithreaded...\n");
5777 
5778     static const uint32_t threadCount = 16;
5779     static const uint32_t bufferCount = 1024;
5780     static const uint32_t threadBufferCount = bufferCount / threadCount;
5781 
5782     VkResult res;
5783     volatile uint32_t memTypeIndex = UINT32_MAX;
5784 
5785     enum TEST
5786     {
5787         TEST_NORMAL,
5788         TEST_POOL,
5789         TEST_DEDICATED,
5790         TEST_COUNT
5791     };
5792     for(uint32_t testIndex = 0; testIndex < TEST_COUNT; ++testIndex)
5793     {
5794         VmaPool pool = nullptr;
5795         if(testIndex == TEST_POOL)
5796         {
5797             TEST(memTypeIndex != UINT32_MAX);
5798             VmaPoolCreateInfo poolInfo = {};
5799             poolInfo.memoryTypeIndex = memTypeIndex;
5800             res = vmaCreatePool(g_hAllocator, &poolInfo, &pool);
5801             TEST(res == VK_SUCCESS);
5802         }
5803 
5804         VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5805         bufCreateInfo.size = 0x10000;
5806         bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
5807 
5808         VmaAllocationCreateInfo allocCreateInfo = {};
5809         allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
5810         allocCreateInfo.pool = pool;
5811         if(testIndex == TEST_DEDICATED)
5812             allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
5813 
5814         std::thread threads[threadCount];
5815         for(uint32_t threadIndex = 0; threadIndex < threadCount; ++threadIndex)
5816         {
5817             threads[threadIndex] = std::thread([=, &memTypeIndex](){
5818                 // ======== THREAD FUNCTION ========
5819 
5820                 RandomNumberGenerator rand{threadIndex};
5821 
5822                 enum class MODE
5823                 {
5824                     // Don't map this buffer at all.
5825                     DONT_MAP,
5826                     // Map and quickly unmap.
5827                     MAP_FOR_MOMENT,
5828                     // Map and unmap before destruction.
5829                     MAP_FOR_LONGER,
5830                     // Map two times. Quickly unmap, second unmap before destruction.
5831                     MAP_TWO_TIMES,
5832                     // Create this buffer as persistently mapped.
5833                     PERSISTENTLY_MAPPED,
5834                     COUNT
5835                 };
5836                 std::vector<BufferInfo> bufInfos{threadBufferCount};
5837                 std::vector<MODE> bufModes{threadBufferCount};
5838 
5839                 for(uint32_t bufferIndex = 0; bufferIndex < threadBufferCount; ++bufferIndex)
5840                 {
5841                     BufferInfo& bufInfo = bufInfos[bufferIndex];
5842                     const MODE mode = (MODE)(rand.Generate() % (uint32_t)MODE::COUNT);
5843                     bufModes[bufferIndex] = mode;
5844 
5845                     VmaAllocationCreateInfo localAllocCreateInfo = allocCreateInfo;
5846                     if(mode == MODE::PERSISTENTLY_MAPPED)
5847                         localAllocCreateInfo.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
5848 
5849                     VmaAllocationInfo allocInfo;
5850                     VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &localAllocCreateInfo,
5851                         &bufInfo.Buffer, &bufInfo.Allocation, &allocInfo);
5852                     TEST(res == VK_SUCCESS);
5853 
5854                     if(memTypeIndex == UINT32_MAX)
5855                         memTypeIndex = allocInfo.memoryType;
5856 
5857                     char* data = nullptr;
5858 
5859                     if(mode == MODE::PERSISTENTLY_MAPPED)
5860                     {
5861                         data = (char*)allocInfo.pMappedData;
5862                         TEST(data != nullptr);
5863                     }
5864                     else if(mode == MODE::MAP_FOR_MOMENT || mode == MODE::MAP_FOR_LONGER ||
5865                         mode == MODE::MAP_TWO_TIMES)
5866                     {
5867                         TEST(data == nullptr);
5868                         res = vmaMapMemory(g_hAllocator, bufInfo.Allocation, (void**)&data);
5869                         TEST(res == VK_SUCCESS && data != nullptr);
5870 
5871                         if(mode == MODE::MAP_TWO_TIMES)
5872                         {
5873                             char* data2 = nullptr;
5874                             res = vmaMapMemory(g_hAllocator, bufInfo.Allocation, (void**)&data2);
5875                             TEST(res == VK_SUCCESS && data2 == data);
5876                         }
5877                     }
5878                     else if(mode == MODE::DONT_MAP)
5879                     {
5880                         TEST(allocInfo.pMappedData == nullptr);
5881                     }
5882                     else
5883                         TEST(0);
5884 
5885                     // Test if reading and writing from the beginning and end of mapped memory doesn't crash.
5886                     if(data)
5887                         data[0xFFFF] = data[0];
5888 
5889                     if(mode == MODE::MAP_FOR_MOMENT || mode == MODE::MAP_TWO_TIMES)
5890                     {
5891                         vmaUnmapMemory(g_hAllocator, bufInfo.Allocation);
5892 
5893                         VmaAllocationInfo allocInfo;
5894                         vmaGetAllocationInfo(g_hAllocator, bufInfo.Allocation, &allocInfo);
5895                         if(mode == MODE::MAP_FOR_MOMENT)
5896                             TEST(allocInfo.pMappedData == nullptr);
5897                         else
5898                             TEST(allocInfo.pMappedData == data);
5899                     }
5900 
5901                     switch(rand.Generate() % 3)
5902                     {
5903                     case 0: Sleep(0); break; // Yield.
5904                     case 1: Sleep(10); break; // 10 ms
5905                     // default: No sleep.
5906                     }
5907 
5908                     // Test if reading and writing from the beginning and end of mapped memory doesn't crash.
5909                     if(data)
5910                         data[0xFFFF] = data[0];
5911                 }
5912 
5913                 for(size_t bufferIndex = threadBufferCount; bufferIndex--; )
5914                 {
5915                     if(bufModes[bufferIndex] == MODE::MAP_FOR_LONGER ||
5916                         bufModes[bufferIndex] == MODE::MAP_TWO_TIMES)
5917                     {
5918                         vmaUnmapMemory(g_hAllocator, bufInfos[bufferIndex].Allocation);
5919 
5920                         VmaAllocationInfo allocInfo;
5921                         vmaGetAllocationInfo(g_hAllocator, bufInfos[bufferIndex].Allocation, &allocInfo);
5922                         TEST(allocInfo.pMappedData == nullptr);
5923                     }
5924 
5925                     vmaDestroyBuffer(g_hAllocator, bufInfos[bufferIndex].Buffer, bufInfos[bufferIndex].Allocation);
5926                 }
5927             });
5928         }
5929 
5930         for(uint32_t threadIndex = 0; threadIndex < threadCount; ++threadIndex)
5931             threads[threadIndex].join();
5932 
5933         vmaDestroyPool(g_hAllocator, pool);
5934     }
5935 }
5936 
WriteMainTestResultHeader(FILE * file)5937 static void WriteMainTestResultHeader(FILE* file)
5938 {
5939     fprintf(file,
5940         "Code,Time,"
5941         "Threads,Buffers and images,Sizes,Operations,Allocation strategy,Free order,"
5942         "Total Time (us),"
5943         "Allocation Time Min (us),"
5944         "Allocation Time Avg (us),"
5945         "Allocation Time Max (us),"
5946         "Deallocation Time Min (us),"
5947         "Deallocation Time Avg (us),"
5948         "Deallocation Time Max (us),"
5949         "Total Memory Allocated (B),"
5950         "Free Range Size Avg (B),"
5951         "Free Range Size Max (B)\n");
5952 }
5953 
WriteMainTestResult(FILE * file,const char * codeDescription,const char * testDescription,const Config & config,const Result & result)5954 static void WriteMainTestResult(
5955     FILE* file,
5956     const char* codeDescription,
5957     const char* testDescription,
5958     const Config& config, const Result& result)
5959 {
5960     float totalTimeSeconds = ToFloatSeconds(result.TotalTime);
5961     float allocationTimeMinSeconds = ToFloatSeconds(result.AllocationTimeMin);
5962     float allocationTimeAvgSeconds = ToFloatSeconds(result.AllocationTimeAvg);
5963     float allocationTimeMaxSeconds = ToFloatSeconds(result.AllocationTimeMax);
5964     float deallocationTimeMinSeconds = ToFloatSeconds(result.DeallocationTimeMin);
5965     float deallocationTimeAvgSeconds = ToFloatSeconds(result.DeallocationTimeAvg);
5966     float deallocationTimeMaxSeconds = ToFloatSeconds(result.DeallocationTimeMax);
5967 
5968     std::string currTime;
5969     CurrentTimeToStr(currTime);
5970 
5971     fprintf(file,
5972         "%s,%s,%s,"
5973         "%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%I64u,%I64u,%I64u\n",
5974         codeDescription,
5975         currTime.c_str(),
5976         testDescription,
5977         totalTimeSeconds * 1e6f,
5978         allocationTimeMinSeconds * 1e6f,
5979         allocationTimeAvgSeconds * 1e6f,
5980         allocationTimeMaxSeconds * 1e6f,
5981         deallocationTimeMinSeconds * 1e6f,
5982         deallocationTimeAvgSeconds * 1e6f,
5983         deallocationTimeMaxSeconds * 1e6f,
5984         result.TotalMemoryAllocated,
5985         result.FreeRangeSizeAvg,
5986         result.FreeRangeSizeMax);
5987 }
5988 
WritePoolTestResultHeader(FILE * file)5989 static void WritePoolTestResultHeader(FILE* file)
5990 {
5991     fprintf(file,
5992         "Code,Test,Time,"
5993         "Config,"
5994         "Total Time (us),"
5995         "Allocation Time Min (us),"
5996         "Allocation Time Avg (us),"
5997         "Allocation Time Max (us),"
5998         "Deallocation Time Min (us),"
5999         "Deallocation Time Avg (us),"
6000         "Deallocation Time Max (us),"
6001         "Lost Allocation Count,"
6002         "Lost Allocation Total Size (B),"
6003         "Failed Allocation Count,"
6004         "Failed Allocation Total Size (B)\n");
6005 }
6006 
WritePoolTestResult(FILE * file,const char * codeDescription,const char * testDescription,const PoolTestConfig & config,const PoolTestResult & result)6007 static void WritePoolTestResult(
6008     FILE* file,
6009     const char* codeDescription,
6010     const char* testDescription,
6011     const PoolTestConfig& config,
6012     const PoolTestResult& result)
6013 {
6014     float totalTimeSeconds = ToFloatSeconds(result.TotalTime);
6015     float allocationTimeMinSeconds = ToFloatSeconds(result.AllocationTimeMin);
6016     float allocationTimeAvgSeconds = ToFloatSeconds(result.AllocationTimeAvg);
6017     float allocationTimeMaxSeconds = ToFloatSeconds(result.AllocationTimeMax);
6018     float deallocationTimeMinSeconds = ToFloatSeconds(result.DeallocationTimeMin);
6019     float deallocationTimeAvgSeconds = ToFloatSeconds(result.DeallocationTimeAvg);
6020     float deallocationTimeMaxSeconds = ToFloatSeconds(result.DeallocationTimeMax);
6021 
6022     std::string currTime;
6023     CurrentTimeToStr(currTime);
6024 
6025     fprintf(file,
6026         "%s,%s,%s,"
6027         "ThreadCount=%u PoolSize=%llu FrameCount=%u TotalItemCount=%u UsedItemCount=%u...%u ItemsToMakeUnusedPercent=%u,"
6028         "%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%I64u,%I64u,%I64u,%I64u\n",
6029         // General
6030         codeDescription,
6031         testDescription,
6032         currTime.c_str(),
6033         // Config
6034         config.ThreadCount,
6035         (unsigned long long)config.PoolSize,
6036         config.FrameCount,
6037         config.TotalItemCount,
6038         config.UsedItemCountMin,
6039         config.UsedItemCountMax,
6040         config.ItemsToMakeUnusedPercent,
6041         // Results
6042         totalTimeSeconds * 1e6f,
6043         allocationTimeMinSeconds * 1e6f,
6044         allocationTimeAvgSeconds * 1e6f,
6045         allocationTimeMaxSeconds * 1e6f,
6046         deallocationTimeMinSeconds * 1e6f,
6047         deallocationTimeAvgSeconds * 1e6f,
6048         deallocationTimeMaxSeconds * 1e6f,
6049         result.LostAllocationCount,
6050         result.LostAllocationTotalSize,
6051         result.FailedAllocationCount,
6052         result.FailedAllocationTotalSize);
6053 }
6054 
PerformCustomMainTest(FILE * file)6055 static void PerformCustomMainTest(FILE* file)
6056 {
6057     Config config{};
6058     config.RandSeed = 65735476;
6059     //config.MaxBytesToAllocate = 4ull * 1024 * 1024; // 4 MB
6060     config.MaxBytesToAllocate = 4ull * 1024 * 1024 * 1024; // 4 GB
6061     config.MemUsageProbability[0] = 1; // VMA_MEMORY_USAGE_GPU_ONLY
6062     config.FreeOrder = FREE_ORDER::FORWARD;
6063     config.ThreadCount = 16;
6064     config.ThreadsUsingCommonAllocationsProbabilityPercent = 50;
6065     config.AllocationStrategy = 0;
6066 
6067     // Buffers
6068     //config.AllocationSizes.push_back({4, 16, 1024});
6069     config.AllocationSizes.push_back({4, 0x10000, 0xA00000}); // 64 KB ... 10 MB
6070 
6071     // Images
6072     //config.AllocationSizes.push_back({4, 0, 0, 4, 32});
6073     //config.AllocationSizes.push_back({4, 0, 0, 256, 2048});
6074 
6075     config.BeginBytesToAllocate = config.MaxBytesToAllocate * 5 / 100;
6076     config.AdditionalOperationCount = 1024;
6077 
6078     Result result{};
6079     VkResult res = MainTest(result, config);
6080     TEST(res == VK_SUCCESS);
6081     WriteMainTestResult(file, "Foo", "CustomTest", config, result);
6082 }
6083 
PerformCustomPoolTest(FILE * file)6084 static void PerformCustomPoolTest(FILE* file)
6085 {
6086     PoolTestConfig config;
6087     config.PoolSize = 100 * 1024 * 1024;
6088     config.RandSeed = 2345764;
6089     config.ThreadCount = 1;
6090     config.FrameCount = 200;
6091     config.ItemsToMakeUnusedPercent = 2;
6092 
6093     AllocationSize allocSize = {};
6094     allocSize.BufferSizeMin = 1024;
6095     allocSize.BufferSizeMax = 1024 * 1024;
6096     allocSize.Probability = 1;
6097     config.AllocationSizes.push_back(allocSize);
6098 
6099     allocSize.BufferSizeMin = 0;
6100     allocSize.BufferSizeMax = 0;
6101     allocSize.ImageSizeMin = 128;
6102     allocSize.ImageSizeMax = 1024;
6103     allocSize.Probability = 1;
6104     config.AllocationSizes.push_back(allocSize);
6105 
6106     config.PoolSize = config.CalcAvgResourceSize() * 200;
6107     config.UsedItemCountMax = 160;
6108     config.TotalItemCount = config.UsedItemCountMax * 10;
6109     config.UsedItemCountMin = config.UsedItemCountMax * 80 / 100;
6110 
6111     PoolTestResult result = {};
6112     TestPool_Benchmark(result, config);
6113 
6114     WritePoolTestResult(file, "Code desc", "Test desc", config, result);
6115 }
6116 
PerformMainTests(FILE * file)6117 static void PerformMainTests(FILE* file)
6118 {
6119     wprintf(L"MAIN TESTS:\n");
6120 
6121     uint32_t repeatCount = 1;
6122     if(ConfigType >= CONFIG_TYPE_MAXIMUM) repeatCount = 3;
6123 
6124     Config config{};
6125     config.RandSeed = 65735476;
6126     config.MemUsageProbability[0] = 1; // VMA_MEMORY_USAGE_GPU_ONLY
6127     config.FreeOrder = FREE_ORDER::FORWARD;
6128 
6129     size_t threadCountCount = 1;
6130     switch(ConfigType)
6131     {
6132     case CONFIG_TYPE_MINIMUM: threadCountCount = 1; break;
6133     case CONFIG_TYPE_SMALL:   threadCountCount = 2; break;
6134     case CONFIG_TYPE_AVERAGE: threadCountCount = 3; break;
6135     case CONFIG_TYPE_LARGE:   threadCountCount = 5; break;
6136     case CONFIG_TYPE_MAXIMUM: threadCountCount = 7; break;
6137     default: assert(0);
6138     }
6139 
6140     const size_t strategyCount = GetAllocationStrategyCount();
6141 
6142     for(size_t threadCountIndex = 0; threadCountIndex < threadCountCount; ++threadCountIndex)
6143     {
6144         std::string desc1;
6145 
6146         switch(threadCountIndex)
6147         {
6148         case 0:
6149             desc1 += "1_thread";
6150             config.ThreadCount = 1;
6151             config.ThreadsUsingCommonAllocationsProbabilityPercent = 0;
6152             break;
6153         case 1:
6154             desc1 += "16_threads+0%_common";
6155             config.ThreadCount = 16;
6156             config.ThreadsUsingCommonAllocationsProbabilityPercent = 0;
6157             break;
6158         case 2:
6159             desc1 += "16_threads+50%_common";
6160             config.ThreadCount = 16;
6161             config.ThreadsUsingCommonAllocationsProbabilityPercent = 50;
6162             break;
6163         case 3:
6164             desc1 += "16_threads+100%_common";
6165             config.ThreadCount = 16;
6166             config.ThreadsUsingCommonAllocationsProbabilityPercent = 100;
6167             break;
6168         case 4:
6169             desc1 += "2_threads+0%_common";
6170             config.ThreadCount = 2;
6171             config.ThreadsUsingCommonAllocationsProbabilityPercent = 0;
6172             break;
6173         case 5:
6174             desc1 += "2_threads+50%_common";
6175             config.ThreadCount = 2;
6176             config.ThreadsUsingCommonAllocationsProbabilityPercent = 50;
6177             break;
6178         case 6:
6179             desc1 += "2_threads+100%_common";
6180             config.ThreadCount = 2;
6181             config.ThreadsUsingCommonAllocationsProbabilityPercent = 100;
6182             break;
6183         default:
6184             assert(0);
6185         }
6186 
6187         // 0 = buffers, 1 = images, 2 = buffers and images
6188         size_t buffersVsImagesCount = 2;
6189         if(ConfigType >= CONFIG_TYPE_LARGE) ++buffersVsImagesCount;
6190         for(size_t buffersVsImagesIndex = 0; buffersVsImagesIndex < buffersVsImagesCount; ++buffersVsImagesIndex)
6191         {
6192             std::string desc2 = desc1;
6193             switch(buffersVsImagesIndex)
6194             {
6195             case 0: desc2 += ",Buffers"; break;
6196             case 1: desc2 += ",Images"; break;
6197             case 2: desc2 += ",Buffers+Images"; break;
6198             default: assert(0);
6199             }
6200 
6201             // 0 = small, 1 = large, 2 = small and large
6202             size_t smallVsLargeCount = 2;
6203             if(ConfigType >= CONFIG_TYPE_LARGE) ++smallVsLargeCount;
6204             for(size_t smallVsLargeIndex = 0; smallVsLargeIndex < smallVsLargeCount; ++smallVsLargeIndex)
6205             {
6206                 std::string desc3 = desc2;
6207                 switch(smallVsLargeIndex)
6208                 {
6209                 case 0: desc3 += ",Small"; break;
6210                 case 1: desc3 += ",Large"; break;
6211                 case 2: desc3 += ",Small+Large"; break;
6212                 default: assert(0);
6213                 }
6214 
6215                 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
6216                     config.MaxBytesToAllocate = 4ull * 1024 * 1024 * 1024; // 4 GB
6217                 else
6218                     config.MaxBytesToAllocate = 4ull * 1024 * 1024;
6219 
6220                 // 0 = varying sizes min...max, 1 = set of constant sizes
6221                 size_t constantSizesCount = 1;
6222                 if(ConfigType >= CONFIG_TYPE_SMALL) ++constantSizesCount;
6223                 for(size_t constantSizesIndex = 0; constantSizesIndex < constantSizesCount; ++constantSizesIndex)
6224                 {
6225                     std::string desc4 = desc3;
6226                     switch(constantSizesIndex)
6227                     {
6228                     case 0: desc4 += " Varying_sizes"; break;
6229                     case 1: desc4 += " Constant_sizes"; break;
6230                     default: assert(0);
6231                     }
6232 
6233                     config.AllocationSizes.clear();
6234                     // Buffers present
6235                     if(buffersVsImagesIndex == 0 || buffersVsImagesIndex == 2)
6236                     {
6237                         // Small
6238                         if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
6239                         {
6240                             // Varying size
6241                             if(constantSizesIndex == 0)
6242                                 config.AllocationSizes.push_back({4, 16, 1024});
6243                             // Constant sizes
6244                             else
6245                             {
6246                                 config.AllocationSizes.push_back({1, 16, 16});
6247                                 config.AllocationSizes.push_back({1, 64, 64});
6248                                 config.AllocationSizes.push_back({1, 256, 256});
6249                                 config.AllocationSizes.push_back({1, 1024, 1024});
6250                             }
6251                         }
6252                         // Large
6253                         if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
6254                         {
6255                             // Varying size
6256                             if(constantSizesIndex == 0)
6257                                 config.AllocationSizes.push_back({4, 0x10000, 0xA00000}); // 64 KB ... 10 MB
6258                             // Constant sizes
6259                             else
6260                             {
6261                                 config.AllocationSizes.push_back({1, 0x10000, 0x10000});
6262                                 config.AllocationSizes.push_back({1, 0x80000, 0x80000});
6263                                 config.AllocationSizes.push_back({1, 0x200000, 0x200000});
6264                                 config.AllocationSizes.push_back({1, 0xA00000, 0xA00000});
6265                             }
6266                         }
6267                     }
6268                     // Images present
6269                     if(buffersVsImagesIndex == 1 || buffersVsImagesIndex == 2)
6270                     {
6271                         // Small
6272                         if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
6273                         {
6274                             // Varying size
6275                             if(constantSizesIndex == 0)
6276                                 config.AllocationSizes.push_back({4, 0, 0, 4, 32});
6277                             // Constant sizes
6278                             else
6279                             {
6280                                 config.AllocationSizes.push_back({1, 0, 0,  4,  4});
6281                                 config.AllocationSizes.push_back({1, 0, 0,  8,  8});
6282                                 config.AllocationSizes.push_back({1, 0, 0, 16, 16});
6283                                 config.AllocationSizes.push_back({1, 0, 0, 32, 32});
6284                             }
6285                         }
6286                         // Large
6287                         if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
6288                         {
6289                             // Varying size
6290                             if(constantSizesIndex == 0)
6291                                 config.AllocationSizes.push_back({4, 0, 0, 256, 2048});
6292                             // Constant sizes
6293                             else
6294                             {
6295                                 config.AllocationSizes.push_back({1, 0, 0,  256,  256});
6296                                 config.AllocationSizes.push_back({1, 0, 0,  512,  512});
6297                                 config.AllocationSizes.push_back({1, 0, 0, 1024, 1024});
6298                                 config.AllocationSizes.push_back({1, 0, 0, 2048, 2048});
6299                             }
6300                         }
6301                     }
6302 
6303                     // 0 = 100%, additional_operations = 0, 1 = 50%, 2 = 5%, 3 = 95% additional_operations = a lot
6304                     size_t beginBytesToAllocateCount = 1;
6305                     if(ConfigType >= CONFIG_TYPE_SMALL) ++beginBytesToAllocateCount;
6306                     if(ConfigType >= CONFIG_TYPE_AVERAGE) ++beginBytesToAllocateCount;
6307                     if(ConfigType >= CONFIG_TYPE_LARGE) ++beginBytesToAllocateCount;
6308                     for(size_t beginBytesToAllocateIndex = 0; beginBytesToAllocateIndex < beginBytesToAllocateCount; ++beginBytesToAllocateIndex)
6309                     {
6310                         std::string desc5 = desc4;
6311 
6312                         switch(beginBytesToAllocateIndex)
6313                         {
6314                         case 0:
6315                             desc5 += ",Allocate_100%";
6316                             config.BeginBytesToAllocate = config.MaxBytesToAllocate;
6317                             config.AdditionalOperationCount = 0;
6318                             break;
6319                         case 1:
6320                             desc5 += ",Allocate_50%+Operations";
6321                             config.BeginBytesToAllocate = config.MaxBytesToAllocate * 50 / 100;
6322                             config.AdditionalOperationCount = 1024;
6323                             break;
6324                         case 2:
6325                             desc5 += ",Allocate_5%+Operations";
6326                             config.BeginBytesToAllocate = config.MaxBytesToAllocate *  5 / 100;
6327                             config.AdditionalOperationCount = 1024;
6328                             break;
6329                         case 3:
6330                             desc5 += ",Allocate_95%+Operations";
6331                             config.BeginBytesToAllocate = config.MaxBytesToAllocate * 95 / 100;
6332                             config.AdditionalOperationCount = 1024;
6333                             break;
6334                         default:
6335                             assert(0);
6336                         }
6337 
6338                         for(size_t strategyIndex = 0; strategyIndex < strategyCount; ++strategyIndex)
6339                         {
6340                             std::string desc6 = desc5;
6341                             switch(strategyIndex)
6342                             {
6343                             case 0:
6344                                 desc6 += ",BestFit";
6345                                 config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT;
6346                                 break;
6347                             case 1:
6348                                 desc6 += ",WorstFit";
6349                                 config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT;
6350                                 break;
6351                             case 2:
6352                                 desc6 += ",FirstFit";
6353                                 config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT;
6354                                 break;
6355                             default:
6356                                 assert(0);
6357                             }
6358 
6359                             desc6 += ',';
6360                             desc6 += FREE_ORDER_NAMES[(uint32_t)config.FreeOrder];
6361 
6362                             const char* testDescription = desc6.c_str();
6363 
6364                             for(size_t repeat = 0; repeat < repeatCount; ++repeat)
6365                             {
6366                                 printf("%s #%u\n", testDescription, (uint32_t)repeat);
6367 
6368                                 Result result{};
6369                                 VkResult res = MainTest(result, config);
6370                                 TEST(res == VK_SUCCESS);
6371                                 if(file)
6372                                 {
6373                                     WriteMainTestResult(file, CODE_DESCRIPTION, testDescription, config, result);
6374                                 }
6375                             }
6376                         }
6377                     }
6378                 }
6379             }
6380         }
6381     }
6382 }
6383 
PerformPoolTests(FILE * file)6384 static void PerformPoolTests(FILE* file)
6385 {
6386     wprintf(L"POOL TESTS:\n");
6387 
6388     const size_t AVG_RESOURCES_PER_POOL = 300;
6389 
6390     uint32_t repeatCount = 1;
6391     if(ConfigType >= CONFIG_TYPE_MAXIMUM) repeatCount = 3;
6392 
6393     PoolTestConfig config{};
6394     config.RandSeed = 2346343;
6395     config.FrameCount = 200;
6396     config.ItemsToMakeUnusedPercent = 2;
6397 
6398     size_t threadCountCount = 1;
6399     switch(ConfigType)
6400     {
6401     case CONFIG_TYPE_MINIMUM: threadCountCount = 1; break;
6402     case CONFIG_TYPE_SMALL:   threadCountCount = 2; break;
6403     case CONFIG_TYPE_AVERAGE: threadCountCount = 2; break;
6404     case CONFIG_TYPE_LARGE:   threadCountCount = 3; break;
6405     case CONFIG_TYPE_MAXIMUM: threadCountCount = 3; break;
6406     default: assert(0);
6407     }
6408     for(size_t threadCountIndex = 0; threadCountIndex < threadCountCount; ++threadCountIndex)
6409     {
6410         std::string desc1;
6411 
6412         switch(threadCountIndex)
6413         {
6414         case 0:
6415             desc1 += "1_thread";
6416             config.ThreadCount = 1;
6417             break;
6418         case 1:
6419             desc1 += "16_threads";
6420             config.ThreadCount = 16;
6421             break;
6422         case 2:
6423             desc1 += "2_threads";
6424             config.ThreadCount = 2;
6425             break;
6426         default:
6427             assert(0);
6428         }
6429 
6430         // 0 = buffers, 1 = images, 2 = buffers and images
6431         size_t buffersVsImagesCount = 2;
6432         if(ConfigType >= CONFIG_TYPE_LARGE) ++buffersVsImagesCount;
6433         for(size_t buffersVsImagesIndex = 0; buffersVsImagesIndex < buffersVsImagesCount; ++buffersVsImagesIndex)
6434         {
6435             std::string desc2 = desc1;
6436             switch(buffersVsImagesIndex)
6437             {
6438             case 0: desc2 += " Buffers"; break;
6439             case 1: desc2 += " Images"; break;
6440             case 2: desc2 += " Buffers+Images"; break;
6441             default: assert(0);
6442             }
6443 
6444             // 0 = small, 1 = large, 2 = small and large
6445             size_t smallVsLargeCount = 2;
6446             if(ConfigType >= CONFIG_TYPE_LARGE) ++smallVsLargeCount;
6447             for(size_t smallVsLargeIndex = 0; smallVsLargeIndex < smallVsLargeCount; ++smallVsLargeIndex)
6448             {
6449                 std::string desc3 = desc2;
6450                 switch(smallVsLargeIndex)
6451                 {
6452                 case 0: desc3 += " Small"; break;
6453                 case 1: desc3 += " Large"; break;
6454                 case 2: desc3 += " Small+Large"; break;
6455                 default: assert(0);
6456                 }
6457 
6458                 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
6459                     config.PoolSize = 6ull * 1024 * 1024 * 1024; // 6 GB
6460                 else
6461                     config.PoolSize = 4ull * 1024 * 1024;
6462 
6463                 // 0 = varying sizes min...max, 1 = set of constant sizes
6464                 size_t constantSizesCount = 1;
6465                 if(ConfigType >= CONFIG_TYPE_SMALL) ++constantSizesCount;
6466                 for(size_t constantSizesIndex = 0; constantSizesIndex < constantSizesCount; ++constantSizesIndex)
6467                 {
6468                     std::string desc4 = desc3;
6469                     switch(constantSizesIndex)
6470                     {
6471                     case 0: desc4 += " Varying_sizes"; break;
6472                     case 1: desc4 += " Constant_sizes"; break;
6473                     default: assert(0);
6474                     }
6475 
6476                     config.AllocationSizes.clear();
6477                     // Buffers present
6478                     if(buffersVsImagesIndex == 0 || buffersVsImagesIndex == 2)
6479                     {
6480                         // Small
6481                         if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
6482                         {
6483                             // Varying size
6484                             if(constantSizesIndex == 0)
6485                                 config.AllocationSizes.push_back({4, 16, 1024});
6486                             // Constant sizes
6487                             else
6488                             {
6489                                 config.AllocationSizes.push_back({1, 16, 16});
6490                                 config.AllocationSizes.push_back({1, 64, 64});
6491                                 config.AllocationSizes.push_back({1, 256, 256});
6492                                 config.AllocationSizes.push_back({1, 1024, 1024});
6493                             }
6494                         }
6495                         // Large
6496                         if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
6497                         {
6498                             // Varying size
6499                             if(constantSizesIndex == 0)
6500                                 config.AllocationSizes.push_back({4, 0x10000, 0xA00000}); // 64 KB ... 10 MB
6501                             // Constant sizes
6502                             else
6503                             {
6504                                 config.AllocationSizes.push_back({1, 0x10000, 0x10000});
6505                                 config.AllocationSizes.push_back({1, 0x80000, 0x80000});
6506                                 config.AllocationSizes.push_back({1, 0x200000, 0x200000});
6507                                 config.AllocationSizes.push_back({1, 0xA00000, 0xA00000});
6508                             }
6509                         }
6510                     }
6511                     // Images present
6512                     if(buffersVsImagesIndex == 1 || buffersVsImagesIndex == 2)
6513                     {
6514                         // Small
6515                         if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
6516                         {
6517                             // Varying size
6518                             if(constantSizesIndex == 0)
6519                                 config.AllocationSizes.push_back({4, 0, 0, 4, 32});
6520                             // Constant sizes
6521                             else
6522                             {
6523                                 config.AllocationSizes.push_back({1, 0, 0,  4,  4});
6524                                 config.AllocationSizes.push_back({1, 0, 0,  8,  8});
6525                                 config.AllocationSizes.push_back({1, 0, 0, 16, 16});
6526                                 config.AllocationSizes.push_back({1, 0, 0, 32, 32});
6527                             }
6528                         }
6529                         // Large
6530                         if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
6531                         {
6532                             // Varying size
6533                             if(constantSizesIndex == 0)
6534                                 config.AllocationSizes.push_back({4, 0, 0, 256, 2048});
6535                             // Constant sizes
6536                             else
6537                             {
6538                                 config.AllocationSizes.push_back({1, 0, 0,  256,  256});
6539                                 config.AllocationSizes.push_back({1, 0, 0,  512,  512});
6540                                 config.AllocationSizes.push_back({1, 0, 0, 1024, 1024});
6541                                 config.AllocationSizes.push_back({1, 0, 0, 2048, 2048});
6542                             }
6543                         }
6544                     }
6545 
6546                     const VkDeviceSize avgResourceSize = config.CalcAvgResourceSize();
6547                     config.PoolSize = avgResourceSize * AVG_RESOURCES_PER_POOL;
6548 
6549                     // 0 = 66%, 1 = 133%, 2 = 100%, 3 = 33%, 4 = 166%
6550                     size_t subscriptionModeCount;
6551                     switch(ConfigType)
6552                     {
6553                     case CONFIG_TYPE_MINIMUM: subscriptionModeCount = 2; break;
6554                     case CONFIG_TYPE_SMALL:   subscriptionModeCount = 2; break;
6555                     case CONFIG_TYPE_AVERAGE: subscriptionModeCount = 3; break;
6556                     case CONFIG_TYPE_LARGE:   subscriptionModeCount = 5; break;
6557                     case CONFIG_TYPE_MAXIMUM: subscriptionModeCount = 5; break;
6558                     default: assert(0);
6559                     }
6560                     for(size_t subscriptionModeIndex = 0; subscriptionModeIndex < subscriptionModeCount; ++subscriptionModeIndex)
6561                     {
6562                         std::string desc5 = desc4;
6563 
6564                         switch(subscriptionModeIndex)
6565                         {
6566                         case 0:
6567                             desc5 += " Subscription_66%";
6568                             config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 66 / 100;
6569                             break;
6570                         case 1:
6571                             desc5 += " Subscription_133%";
6572                             config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 133 / 100;
6573                             break;
6574                         case 2:
6575                             desc5 += " Subscription_100%";
6576                             config.UsedItemCountMax = AVG_RESOURCES_PER_POOL;
6577                             break;
6578                         case 3:
6579                             desc5 += " Subscription_33%";
6580                             config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 33 / 100;
6581                             break;
6582                         case 4:
6583                             desc5 += " Subscription_166%";
6584                             config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 166 / 100;
6585                             break;
6586                         default:
6587                             assert(0);
6588                         }
6589 
6590                         config.TotalItemCount = config.UsedItemCountMax * 5;
6591                         config.UsedItemCountMin = config.UsedItemCountMax * 80 / 100;
6592 
6593                         const char* testDescription = desc5.c_str();
6594 
6595                         for(size_t repeat = 0; repeat < repeatCount; ++repeat)
6596                         {
6597                             printf("%s #%u\n", testDescription, (uint32_t)repeat);
6598 
6599                             PoolTestResult result{};
6600                             TestPool_Benchmark(result, config);
6601                             WritePoolTestResult(file, CODE_DESCRIPTION, testDescription, config, result);
6602                         }
6603                     }
6604                 }
6605             }
6606         }
6607     }
6608 }
6609 
BasicTestBuddyAllocator()6610 static void BasicTestBuddyAllocator()
6611 {
6612     wprintf(L"Basic test buddy allocator\n");
6613 
6614     RandomNumberGenerator rand{76543};
6615 
6616     VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
6617     sampleBufCreateInfo.size = 1024; // Whatever.
6618     sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
6619 
6620     VmaAllocationCreateInfo sampleAllocCreateInfo = {};
6621     sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
6622 
6623     VmaPoolCreateInfo poolCreateInfo = {};
6624     VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
6625     TEST(res == VK_SUCCESS);
6626 
6627     // Deliberately adding 1023 to test usable size smaller than memory block size.
6628     poolCreateInfo.blockSize = 1024 * 1024 + 1023;
6629     poolCreateInfo.flags = VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT;
6630     //poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
6631 
6632     VmaPool pool = nullptr;
6633     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
6634     TEST(res == VK_SUCCESS);
6635 
6636     VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
6637 
6638     VmaAllocationCreateInfo allocCreateInfo = {};
6639     allocCreateInfo.pool = pool;
6640 
6641     std::vector<BufferInfo> bufInfo;
6642     BufferInfo newBufInfo;
6643     VmaAllocationInfo allocInfo;
6644 
6645     bufCreateInfo.size = 1024 * 256;
6646     res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6647         &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6648     TEST(res == VK_SUCCESS);
6649     bufInfo.push_back(newBufInfo);
6650 
6651     bufCreateInfo.size = 1024 * 512;
6652     res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6653         &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6654     TEST(res == VK_SUCCESS);
6655     bufInfo.push_back(newBufInfo);
6656 
6657     bufCreateInfo.size = 1024 * 128;
6658     res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6659         &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6660     TEST(res == VK_SUCCESS);
6661     bufInfo.push_back(newBufInfo);
6662 
6663     // Test very small allocation, smaller than minimum node size.
6664     bufCreateInfo.size = 1;
6665     res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6666         &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6667     TEST(res == VK_SUCCESS);
6668     bufInfo.push_back(newBufInfo);
6669 
6670     // Test some small allocation with alignment requirement.
6671     {
6672         VkMemoryRequirements memReq;
6673         memReq.alignment = 256;
6674         memReq.memoryTypeBits = UINT32_MAX;
6675         memReq.size = 32;
6676 
6677         newBufInfo.Buffer = VK_NULL_HANDLE;
6678         res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo,
6679             &newBufInfo.Allocation, &allocInfo);
6680         TEST(res == VK_SUCCESS);
6681         TEST(allocInfo.offset % memReq.alignment == 0);
6682         bufInfo.push_back(newBufInfo);
6683     }
6684 
6685     //SaveAllocatorStatsToFile(L"TEST.json");
6686 
6687     VmaPoolStats stats = {};
6688     vmaGetPoolStats(g_hAllocator, pool, &stats);
6689     int DBG = 0; // Set breakpoint here to inspect `stats`.
6690 
6691     // Allocate enough new buffers to surely fall into second block.
6692     for(uint32_t i = 0; i < 32; ++i)
6693     {
6694         bufCreateInfo.size = 1024 * (rand.Generate() % 32 + 1);
6695         res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6696             &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6697         TEST(res == VK_SUCCESS);
6698         bufInfo.push_back(newBufInfo);
6699     }
6700 
6701     SaveAllocatorStatsToFile(L"BuddyTest01.json");
6702 
6703     // Destroy the buffers in random order.
6704     while(!bufInfo.empty())
6705     {
6706         const size_t indexToDestroy = rand.Generate() % bufInfo.size();
6707         const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
6708         vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
6709         bufInfo.erase(bufInfo.begin() + indexToDestroy);
6710     }
6711 
6712     vmaDestroyPool(g_hAllocator, pool);
6713 }
6714 
BasicTestAllocatePages()6715 static void BasicTestAllocatePages()
6716 {
6717     wprintf(L"Basic test allocate pages\n");
6718 
6719     RandomNumberGenerator rand{765461};
6720 
6721     VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
6722     sampleBufCreateInfo.size = 1024; // Whatever.
6723     sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
6724 
6725     VmaAllocationCreateInfo sampleAllocCreateInfo = {};
6726     sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
6727 
6728     VmaPoolCreateInfo poolCreateInfo = {};
6729     VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
6730     TEST(res == VK_SUCCESS);
6731 
6732     // 1 block of 1 MB.
6733     poolCreateInfo.blockSize = 1024 * 1024;
6734     poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
6735 
6736     // Create pool.
6737     VmaPool pool = nullptr;
6738     res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
6739     TEST(res == VK_SUCCESS);
6740 
6741     // Make 100 allocations of 4 KB - they should fit into the pool.
6742     VkMemoryRequirements memReq;
6743     memReq.memoryTypeBits = UINT32_MAX;
6744     memReq.alignment = 4 * 1024;
6745     memReq.size = 4 * 1024;
6746 
6747     VmaAllocationCreateInfo allocCreateInfo = {};
6748     allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
6749     allocCreateInfo.pool = pool;
6750 
6751     constexpr uint32_t allocCount = 100;
6752 
6753     std::vector<VmaAllocation> alloc{allocCount};
6754     std::vector<VmaAllocationInfo> allocInfo{allocCount};
6755     res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &allocCreateInfo, allocCount, alloc.data(), allocInfo.data());
6756     TEST(res == VK_SUCCESS);
6757     for(uint32_t i = 0; i < allocCount; ++i)
6758     {
6759         TEST(alloc[i] != VK_NULL_HANDLE &&
6760             allocInfo[i].pMappedData != nullptr &&
6761             allocInfo[i].deviceMemory == allocInfo[0].deviceMemory &&
6762             allocInfo[i].memoryType == allocInfo[0].memoryType);
6763     }
6764 
6765     // Free the allocations.
6766     vmaFreeMemoryPages(g_hAllocator, allocCount, alloc.data());
6767     std::fill(alloc.begin(), alloc.end(), nullptr);
6768     std::fill(allocInfo.begin(), allocInfo.end(), VmaAllocationInfo{});
6769 
6770     // Try to make 100 allocations of 100 KB. This call should fail due to not enough memory.
6771     // Also test optional allocationInfo = null.
6772     memReq.size = 100 * 1024;
6773     res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &allocCreateInfo, allocCount, alloc.data(), nullptr);
6774     TEST(res != VK_SUCCESS);
6775     TEST(std::find_if(alloc.begin(), alloc.end(), [](VmaAllocation alloc){ return alloc != VK_NULL_HANDLE; }) == alloc.end());
6776 
6777     // Make 100 allocations of 4 KB, but with required alignment of 128 KB. This should also fail.
6778     memReq.size = 4 * 1024;
6779     memReq.alignment = 128 * 1024;
6780     res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &allocCreateInfo, allocCount, alloc.data(), allocInfo.data());
6781     TEST(res != VK_SUCCESS);
6782 
6783     // Make 100 dedicated allocations of 4 KB.
6784     memReq.alignment = 4 * 1024;
6785     memReq.size = 4 * 1024;
6786 
6787     VmaAllocationCreateInfo dedicatedAllocCreateInfo = {};
6788     dedicatedAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
6789     dedicatedAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
6790     res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &dedicatedAllocCreateInfo, allocCount, alloc.data(), allocInfo.data());
6791     TEST(res == VK_SUCCESS);
6792     for(uint32_t i = 0; i < allocCount; ++i)
6793     {
6794         TEST(alloc[i] != VK_NULL_HANDLE &&
6795             allocInfo[i].pMappedData != nullptr &&
6796             allocInfo[i].memoryType == allocInfo[0].memoryType &&
6797             allocInfo[i].offset == 0);
6798         if(i > 0)
6799         {
6800             TEST(allocInfo[i].deviceMemory != allocInfo[0].deviceMemory);
6801         }
6802     }
6803 
6804     // Free the allocations.
6805     vmaFreeMemoryPages(g_hAllocator, allocCount, alloc.data());
6806     std::fill(alloc.begin(), alloc.end(), nullptr);
6807     std::fill(allocInfo.begin(), allocInfo.end(), VmaAllocationInfo{});
6808 
6809     vmaDestroyPool(g_hAllocator, pool);
6810 }
6811 
6812 // Test the testing environment.
TestGpuData()6813 static void TestGpuData()
6814 {
6815     RandomNumberGenerator rand = { 53434 };
6816 
6817     std::vector<AllocInfo> allocInfo;
6818 
6819     for(size_t i = 0; i < 100; ++i)
6820     {
6821         AllocInfo info = {};
6822 
6823         info.m_BufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
6824         info.m_BufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
6825             VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
6826             VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
6827         info.m_BufferInfo.size = 1024 * 1024 * (rand.Generate() % 9 + 1);
6828 
6829         VmaAllocationCreateInfo allocCreateInfo = {};
6830         allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
6831 
6832         VkResult res = vmaCreateBuffer(g_hAllocator, &info.m_BufferInfo, &allocCreateInfo, &info.m_Buffer, &info.m_Allocation, nullptr);
6833         TEST(res == VK_SUCCESS);
6834 
6835         info.m_StartValue = rand.Generate();
6836 
6837         allocInfo.push_back(std::move(info));
6838     }
6839 
6840     UploadGpuData(allocInfo.data(), allocInfo.size());
6841 
6842     ValidateGpuData(allocInfo.data(), allocInfo.size());
6843 
6844     DestroyAllAllocations(allocInfo);
6845 }
6846 
Test()6847 void Test()
6848 {
6849     wprintf(L"TESTING:\n");
6850 
6851     if(false)
6852     {
6853         ////////////////////////////////////////////////////////////////////////////////
6854         // Temporarily insert custom tests here:
6855         TestVirtualBlocks();
6856         TestVirtualBlocksAlgorithms();
6857         return;
6858     }
6859 
6860     // # Simple tests
6861 
6862     TestBasics();
6863     TestVirtualBlocks();
6864     TestVirtualBlocksAlgorithms();
6865     TestAllocationVersusResourceSize();
6866     //TestGpuData(); // Not calling this because it's just testing the testing environment.
6867 #if VMA_DEBUG_MARGIN
6868     TestDebugMargin();
6869 #else
6870     TestPool_SameSize();
6871     TestPool_MinBlockCount();
6872     TestPool_MinAllocationAlignment();
6873     TestHeapSizeLimit();
6874 #endif
6875 #if VMA_DEBUG_INITIALIZE_ALLOCATIONS
6876     TestAllocationsInitialization();
6877 #endif
6878     TestMemoryUsage();
6879     TestDeviceCoherentMemory();
6880     TestBudget();
6881     TestAliasing();
6882     TestMapping();
6883     TestDeviceLocalMapped();
6884     TestMappingMultithreaded();
6885     TestLinearAllocator();
6886     ManuallyTestLinearAllocator();
6887     TestLinearAllocatorMultiBlock();
6888 
6889     BasicTestBuddyAllocator();
6890     BasicTestAllocatePages();
6891 
6892     if(VK_KHR_buffer_device_address_enabled)
6893         TestBufferDeviceAddress();
6894     if(VK_EXT_memory_priority_enabled)
6895         TestMemoryPriority();
6896 
6897     {
6898         FILE* file;
6899         fopen_s(&file, "Algorithms.csv", "w");
6900         assert(file != NULL);
6901         BenchmarkAlgorithms(file);
6902         fclose(file);
6903     }
6904 
6905     TestDefragmentationSimple();
6906     TestDefragmentationFull();
6907     TestDefragmentationWholePool();
6908     TestDefragmentationGpu();
6909     TestDefragmentationIncrementalBasic();
6910     TestDefragmentationIncrementalComplex();
6911 
6912     // # Detailed tests
6913     FILE* file;
6914     fopen_s(&file, "Results.csv", "w");
6915     assert(file != NULL);
6916 
6917     WriteMainTestResultHeader(file);
6918     PerformMainTests(file);
6919     //PerformCustomMainTest(file);
6920 
6921     WritePoolTestResultHeader(file);
6922     PerformPoolTests(file);
6923     //PerformCustomPoolTest(file);
6924 
6925     fclose(file);
6926 
6927     wprintf(L"Done, all PASSED.\n");
6928 }
6929 
6930 #endif // #ifdef _WIN32
6931