1 //
2 // Copyright (c) 2017-2021 Advanced Micro Devices, Inc. All rights reserved.
3 //
4 // Permission is hereby granted, free of charge, to any person obtaining a copy
5 // of this software and associated documentation files (the "Software"), to deal
6 // in the Software without restriction, including without limitation the rights
7 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 // copies of the Software, and to permit persons to whom the Software is
9 // furnished to do so, subject to the following conditions:
10 //
11 // The above copyright notice and this permission notice shall be included in
12 // all copies or substantial portions of the Software.
13 //
14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 // THE SOFTWARE.
21 //
22
23 #include "Tests.h"
24 #include "VmaUsage.h"
25 #include "Common.h"
26 #include <atomic>
27 #include <thread>
28 #include <mutex>
29 #include <functional>
30
31 #ifdef _WIN32
32
33 static const char* CODE_DESCRIPTION = "Foo";
34 static constexpr VkDeviceSize MEGABYTE = 1024 * 1024;
35
36 extern VkCommandBuffer g_hTemporaryCommandBuffer;
37 extern const VkAllocationCallbacks* g_Allocs;
38 extern bool VK_KHR_buffer_device_address_enabled;
39 extern bool VK_EXT_memory_priority_enabled;
40 extern PFN_vkGetBufferDeviceAddressKHR g_vkGetBufferDeviceAddressKHR;
41 void BeginSingleTimeCommands();
42 void EndSingleTimeCommands();
43 void SetDebugUtilsObjectName(VkObjectType type, uint64_t handle, const char* name);
44
45 #ifndef VMA_DEBUG_MARGIN
46 #define VMA_DEBUG_MARGIN 0
47 #endif
48
49 enum CONFIG_TYPE {
50 CONFIG_TYPE_MINIMUM,
51 CONFIG_TYPE_SMALL,
52 CONFIG_TYPE_AVERAGE,
53 CONFIG_TYPE_LARGE,
54 CONFIG_TYPE_MAXIMUM,
55 CONFIG_TYPE_COUNT
56 };
57
58 static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_SMALL;
59 //static constexpr CONFIG_TYPE ConfigType = CONFIG_TYPE_LARGE;
60
61 enum class FREE_ORDER { FORWARD, BACKWARD, RANDOM, COUNT };
62
63 static const char* FREE_ORDER_NAMES[] = {
64 "FORWARD",
65 "BACKWARD",
66 "RANDOM",
67 };
68
69 // Copy of internal VmaAlgorithmToStr.
AlgorithmToStr(uint32_t algorithm)70 static const char* AlgorithmToStr(uint32_t algorithm)
71 {
72 switch(algorithm)
73 {
74 case VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT:
75 return "Linear";
76 case VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT:
77 return "Buddy";
78 case 0:
79 return "Default";
80 default:
81 assert(0);
82 return "";
83 }
84 }
85
86 struct AllocationSize
87 {
88 uint32_t Probability;
89 VkDeviceSize BufferSizeMin, BufferSizeMax;
90 uint32_t ImageSizeMin, ImageSizeMax;
91 };
92
93 struct Config
94 {
95 uint32_t RandSeed;
96 VkDeviceSize BeginBytesToAllocate;
97 uint32_t AdditionalOperationCount;
98 VkDeviceSize MaxBytesToAllocate;
99 uint32_t MemUsageProbability[4]; // For VMA_MEMORY_USAGE_*
100 std::vector<AllocationSize> AllocationSizes;
101 uint32_t ThreadCount;
102 uint32_t ThreadsUsingCommonAllocationsProbabilityPercent;
103 FREE_ORDER FreeOrder;
104 VmaAllocationCreateFlags AllocationStrategy; // For VMA_ALLOCATION_CREATE_STRATEGY_*
105 };
106
107 struct Result
108 {
109 duration TotalTime;
110 duration AllocationTimeMin, AllocationTimeAvg, AllocationTimeMax;
111 duration DeallocationTimeMin, DeallocationTimeAvg, DeallocationTimeMax;
112 VkDeviceSize TotalMemoryAllocated;
113 VkDeviceSize FreeRangeSizeAvg, FreeRangeSizeMax;
114 };
115
116 void TestDefragmentationSimple();
117 void TestDefragmentationFull();
118
119 struct PoolTestConfig
120 {
121 uint32_t RandSeed;
122 uint32_t ThreadCount;
123 VkDeviceSize PoolSize;
124 uint32_t FrameCount;
125 uint32_t TotalItemCount;
126 // Range for number of items used in each frame.
127 uint32_t UsedItemCountMin, UsedItemCountMax;
128 // Percent of items to make unused, and possibly make some others used in each frame.
129 uint32_t ItemsToMakeUnusedPercent;
130 std::vector<AllocationSize> AllocationSizes;
131
CalcAvgResourceSizePoolTestConfig132 VkDeviceSize CalcAvgResourceSize() const
133 {
134 uint32_t probabilitySum = 0;
135 VkDeviceSize sizeSum = 0;
136 for(size_t i = 0; i < AllocationSizes.size(); ++i)
137 {
138 const AllocationSize& allocSize = AllocationSizes[i];
139 if(allocSize.BufferSizeMax > 0)
140 sizeSum += (allocSize.BufferSizeMin + allocSize.BufferSizeMax) / 2 * allocSize.Probability;
141 else
142 {
143 const VkDeviceSize avgDimension = (allocSize.ImageSizeMin + allocSize.ImageSizeMax) / 2;
144 sizeSum += avgDimension * avgDimension * 4 * allocSize.Probability;
145 }
146 probabilitySum += allocSize.Probability;
147 }
148 return sizeSum / probabilitySum;
149 }
150
UsesBuffersPoolTestConfig151 bool UsesBuffers() const
152 {
153 for(size_t i = 0; i < AllocationSizes.size(); ++i)
154 if(AllocationSizes[i].BufferSizeMax > 0)
155 return true;
156 return false;
157 }
158
UsesImagesPoolTestConfig159 bool UsesImages() const
160 {
161 for(size_t i = 0; i < AllocationSizes.size(); ++i)
162 if(AllocationSizes[i].ImageSizeMax > 0)
163 return true;
164 return false;
165 }
166 };
167
168 struct PoolTestResult
169 {
170 duration TotalTime;
171 duration AllocationTimeMin, AllocationTimeAvg, AllocationTimeMax;
172 duration DeallocationTimeMin, DeallocationTimeAvg, DeallocationTimeMax;
173 size_t LostAllocationCount, LostAllocationTotalSize;
174 size_t FailedAllocationCount, FailedAllocationTotalSize;
175 };
176
177 static const uint32_t IMAGE_BYTES_PER_PIXEL = 1;
178
179 uint32_t g_FrameIndex = 0;
180
181 struct BufferInfo
182 {
183 VkBuffer Buffer = VK_NULL_HANDLE;
184 VmaAllocation Allocation = VK_NULL_HANDLE;
185 };
186
MemoryTypeToHeap(uint32_t memoryTypeIndex)187 static uint32_t MemoryTypeToHeap(uint32_t memoryTypeIndex)
188 {
189 const VkPhysicalDeviceMemoryProperties* props;
190 vmaGetMemoryProperties(g_hAllocator, &props);
191 return props->memoryTypes[memoryTypeIndex].heapIndex;
192 }
193
GetAllocationStrategyCount()194 static uint32_t GetAllocationStrategyCount()
195 {
196 uint32_t strategyCount = 0;
197 switch(ConfigType)
198 {
199 case CONFIG_TYPE_MINIMUM: strategyCount = 1; break;
200 case CONFIG_TYPE_SMALL: strategyCount = 1; break;
201 case CONFIG_TYPE_AVERAGE: strategyCount = 2; break;
202 case CONFIG_TYPE_LARGE: strategyCount = 2; break;
203 case CONFIG_TYPE_MAXIMUM: strategyCount = 3; break;
204 default: assert(0);
205 }
206 return strategyCount;
207 }
208
GetAllocationStrategyName(VmaAllocationCreateFlags allocStrategy)209 static const char* GetAllocationStrategyName(VmaAllocationCreateFlags allocStrategy)
210 {
211 switch(allocStrategy)
212 {
213 case VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT: return "BEST_FIT"; break;
214 case VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT: return "WORST_FIT"; break;
215 case VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT: return "FIRST_FIT"; break;
216 case 0: return "Default"; break;
217 default: assert(0); return "";
218 }
219 }
220
InitResult(Result & outResult)221 static void InitResult(Result& outResult)
222 {
223 outResult.TotalTime = duration::zero();
224 outResult.AllocationTimeMin = duration::max();
225 outResult.AllocationTimeAvg = duration::zero();
226 outResult.AllocationTimeMax = duration::min();
227 outResult.DeallocationTimeMin = duration::max();
228 outResult.DeallocationTimeAvg = duration::zero();
229 outResult.DeallocationTimeMax = duration::min();
230 outResult.TotalMemoryAllocated = 0;
231 outResult.FreeRangeSizeAvg = 0;
232 outResult.FreeRangeSizeMax = 0;
233 }
234
235 class TimeRegisterObj
236 {
237 public:
TimeRegisterObj(duration & min,duration & sum,duration & max)238 TimeRegisterObj(duration& min, duration& sum, duration& max) :
239 m_Min(min),
240 m_Sum(sum),
241 m_Max(max),
242 m_TimeBeg(std::chrono::high_resolution_clock::now())
243 {
244 }
245
~TimeRegisterObj()246 ~TimeRegisterObj()
247 {
248 duration d = std::chrono::high_resolution_clock::now() - m_TimeBeg;
249 m_Sum += d;
250 if(d < m_Min) m_Min = d;
251 if(d > m_Max) m_Max = d;
252 }
253
254 private:
255 duration& m_Min;
256 duration& m_Sum;
257 duration& m_Max;
258 time_point m_TimeBeg;
259 };
260
261 struct PoolTestThreadResult
262 {
263 duration AllocationTimeMin, AllocationTimeSum, AllocationTimeMax;
264 duration DeallocationTimeMin, DeallocationTimeSum, DeallocationTimeMax;
265 size_t AllocationCount, DeallocationCount;
266 size_t LostAllocationCount, LostAllocationTotalSize;
267 size_t FailedAllocationCount, FailedAllocationTotalSize;
268 };
269
270 class AllocationTimeRegisterObj : public TimeRegisterObj
271 {
272 public:
AllocationTimeRegisterObj(Result & result)273 AllocationTimeRegisterObj(Result& result) :
274 TimeRegisterObj(result.AllocationTimeMin, result.AllocationTimeAvg, result.AllocationTimeMax)
275 {
276 }
277 };
278
279 class DeallocationTimeRegisterObj : public TimeRegisterObj
280 {
281 public:
DeallocationTimeRegisterObj(Result & result)282 DeallocationTimeRegisterObj(Result& result) :
283 TimeRegisterObj(result.DeallocationTimeMin, result.DeallocationTimeAvg, result.DeallocationTimeMax)
284 {
285 }
286 };
287
288 class PoolAllocationTimeRegisterObj : public TimeRegisterObj
289 {
290 public:
PoolAllocationTimeRegisterObj(PoolTestThreadResult & result)291 PoolAllocationTimeRegisterObj(PoolTestThreadResult& result) :
292 TimeRegisterObj(result.AllocationTimeMin, result.AllocationTimeSum, result.AllocationTimeMax)
293 {
294 }
295 };
296
297 class PoolDeallocationTimeRegisterObj : public TimeRegisterObj
298 {
299 public:
PoolDeallocationTimeRegisterObj(PoolTestThreadResult & result)300 PoolDeallocationTimeRegisterObj(PoolTestThreadResult& result) :
301 TimeRegisterObj(result.DeallocationTimeMin, result.DeallocationTimeSum, result.DeallocationTimeMax)
302 {
303 }
304 };
305
CurrentTimeToStr(std::string & out)306 static void CurrentTimeToStr(std::string& out)
307 {
308 time_t rawTime; time(&rawTime);
309 struct tm timeInfo; localtime_s(&timeInfo, &rawTime);
310 char timeStr[128];
311 strftime(timeStr, _countof(timeStr), "%c", &timeInfo);
312 out = timeStr;
313 }
314
MainTest(Result & outResult,const Config & config)315 VkResult MainTest(Result& outResult, const Config& config)
316 {
317 assert(config.ThreadCount > 0);
318
319 InitResult(outResult);
320
321 RandomNumberGenerator mainRand{config.RandSeed};
322
323 time_point timeBeg = std::chrono::high_resolution_clock::now();
324
325 std::atomic<size_t> allocationCount = 0;
326 VkResult res = VK_SUCCESS;
327
328 uint32_t memUsageProbabilitySum =
329 config.MemUsageProbability[0] + config.MemUsageProbability[1] +
330 config.MemUsageProbability[2] + config.MemUsageProbability[3];
331 assert(memUsageProbabilitySum > 0);
332
333 uint32_t allocationSizeProbabilitySum = std::accumulate(
334 config.AllocationSizes.begin(),
335 config.AllocationSizes.end(),
336 0u,
337 [](uint32_t sum, const AllocationSize& allocSize) {
338 return sum + allocSize.Probability;
339 });
340
341 struct Allocation
342 {
343 VkBuffer Buffer;
344 VkImage Image;
345 VmaAllocation Alloc;
346 };
347
348 std::vector<Allocation> commonAllocations;
349 std::mutex commonAllocationsMutex;
350
351 auto Allocate = [&](
352 VkDeviceSize bufferSize,
353 const VkExtent2D imageExtent,
354 RandomNumberGenerator& localRand,
355 VkDeviceSize& totalAllocatedBytes,
356 std::vector<Allocation>& allocations) -> VkResult
357 {
358 assert((bufferSize == 0) != (imageExtent.width == 0 && imageExtent.height == 0));
359
360 uint32_t memUsageIndex = 0;
361 uint32_t memUsageRand = localRand.Generate() % memUsageProbabilitySum;
362 while(memUsageRand >= config.MemUsageProbability[memUsageIndex])
363 memUsageRand -= config.MemUsageProbability[memUsageIndex++];
364
365 VmaAllocationCreateInfo memReq = {};
366 memReq.usage = (VmaMemoryUsage)(VMA_MEMORY_USAGE_GPU_ONLY + memUsageIndex);
367 memReq.flags |= config.AllocationStrategy;
368
369 Allocation allocation = {};
370 VmaAllocationInfo allocationInfo;
371
372 // Buffer
373 if(bufferSize > 0)
374 {
375 assert(imageExtent.width == 0);
376 VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
377 bufferInfo.size = bufferSize;
378 bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
379
380 {
381 AllocationTimeRegisterObj timeRegisterObj{outResult};
382 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &memReq, &allocation.Buffer, &allocation.Alloc, &allocationInfo);
383 }
384 }
385 // Image
386 else
387 {
388 VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
389 imageInfo.imageType = VK_IMAGE_TYPE_2D;
390 imageInfo.extent.width = imageExtent.width;
391 imageInfo.extent.height = imageExtent.height;
392 imageInfo.extent.depth = 1;
393 imageInfo.mipLevels = 1;
394 imageInfo.arrayLayers = 1;
395 imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
396 imageInfo.tiling = memReq.usage == VMA_MEMORY_USAGE_GPU_ONLY ?
397 VK_IMAGE_TILING_OPTIMAL :
398 VK_IMAGE_TILING_LINEAR;
399 imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
400 switch(memReq.usage)
401 {
402 case VMA_MEMORY_USAGE_GPU_ONLY:
403 switch(localRand.Generate() % 3)
404 {
405 case 0:
406 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
407 break;
408 case 1:
409 imageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
410 break;
411 case 2:
412 imageInfo.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
413 break;
414 }
415 break;
416 case VMA_MEMORY_USAGE_CPU_ONLY:
417 case VMA_MEMORY_USAGE_CPU_TO_GPU:
418 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
419 break;
420 case VMA_MEMORY_USAGE_GPU_TO_CPU:
421 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT;
422 break;
423 }
424 imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
425 imageInfo.flags = 0;
426
427 {
428 AllocationTimeRegisterObj timeRegisterObj{outResult};
429 res = vmaCreateImage(g_hAllocator, &imageInfo, &memReq, &allocation.Image, &allocation.Alloc, &allocationInfo);
430 }
431 }
432
433 if(res == VK_SUCCESS)
434 {
435 ++allocationCount;
436 totalAllocatedBytes += allocationInfo.size;
437 bool useCommonAllocations = localRand.Generate() % 100 < config.ThreadsUsingCommonAllocationsProbabilityPercent;
438 if(useCommonAllocations)
439 {
440 std::unique_lock<std::mutex> lock(commonAllocationsMutex);
441 commonAllocations.push_back(allocation);
442 }
443 else
444 allocations.push_back(allocation);
445 }
446 else
447 {
448 TEST(0);
449 }
450 return res;
451 };
452
453 auto GetNextAllocationSize = [&](
454 VkDeviceSize& outBufSize,
455 VkExtent2D& outImageSize,
456 RandomNumberGenerator& localRand)
457 {
458 outBufSize = 0;
459 outImageSize = {0, 0};
460
461 uint32_t allocSizeIndex = 0;
462 uint32_t r = localRand.Generate() % allocationSizeProbabilitySum;
463 while(r >= config.AllocationSizes[allocSizeIndex].Probability)
464 r -= config.AllocationSizes[allocSizeIndex++].Probability;
465
466 const AllocationSize& allocSize = config.AllocationSizes[allocSizeIndex];
467 if(allocSize.BufferSizeMax > 0)
468 {
469 assert(allocSize.ImageSizeMax == 0);
470 if(allocSize.BufferSizeMax == allocSize.BufferSizeMin)
471 outBufSize = allocSize.BufferSizeMin;
472 else
473 {
474 outBufSize = allocSize.BufferSizeMin + localRand.Generate() % (allocSize.BufferSizeMax - allocSize.BufferSizeMin);
475 outBufSize = outBufSize / 16 * 16;
476 }
477 }
478 else
479 {
480 if(allocSize.ImageSizeMax == allocSize.ImageSizeMin)
481 outImageSize.width = outImageSize.height = allocSize.ImageSizeMax;
482 else
483 {
484 outImageSize.width = allocSize.ImageSizeMin + localRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
485 outImageSize.height = allocSize.ImageSizeMin + localRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
486 }
487 }
488 };
489
490 std::atomic<uint32_t> numThreadsReachedMaxAllocations = 0;
491 HANDLE threadsFinishEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
492
493 auto ThreadProc = [&](uint32_t randSeed) -> void
494 {
495 RandomNumberGenerator threadRand(randSeed);
496 VkDeviceSize threadTotalAllocatedBytes = 0;
497 std::vector<Allocation> threadAllocations;
498 VkDeviceSize threadBeginBytesToAllocate = config.BeginBytesToAllocate / config.ThreadCount;
499 VkDeviceSize threadMaxBytesToAllocate = config.MaxBytesToAllocate / config.ThreadCount;
500 uint32_t threadAdditionalOperationCount = config.AdditionalOperationCount / config.ThreadCount;
501
502 // BEGIN ALLOCATIONS
503 for(;;)
504 {
505 VkDeviceSize bufferSize = 0;
506 VkExtent2D imageExtent = {};
507 GetNextAllocationSize(bufferSize, imageExtent, threadRand);
508 if(threadTotalAllocatedBytes + bufferSize + imageExtent.width * imageExtent.height * IMAGE_BYTES_PER_PIXEL <
509 threadBeginBytesToAllocate)
510 {
511 if(Allocate(bufferSize, imageExtent, threadRand, threadTotalAllocatedBytes, threadAllocations) != VK_SUCCESS)
512 break;
513 }
514 else
515 break;
516 }
517
518 // ADDITIONAL ALLOCATIONS AND FREES
519 for(size_t i = 0; i < threadAdditionalOperationCount; ++i)
520 {
521 VkDeviceSize bufferSize = 0;
522 VkExtent2D imageExtent = {};
523 GetNextAllocationSize(bufferSize, imageExtent, threadRand);
524
525 // true = allocate, false = free
526 bool allocate = threadRand.Generate() % 2 != 0;
527
528 if(allocate)
529 {
530 if(threadTotalAllocatedBytes +
531 bufferSize +
532 imageExtent.width * imageExtent.height * IMAGE_BYTES_PER_PIXEL <
533 threadMaxBytesToAllocate)
534 {
535 if(Allocate(bufferSize, imageExtent, threadRand, threadTotalAllocatedBytes, threadAllocations) != VK_SUCCESS)
536 break;
537 }
538 }
539 else
540 {
541 bool useCommonAllocations = threadRand.Generate() % 100 < config.ThreadsUsingCommonAllocationsProbabilityPercent;
542 if(useCommonAllocations)
543 {
544 std::unique_lock<std::mutex> lock(commonAllocationsMutex);
545 if(!commonAllocations.empty())
546 {
547 size_t indexToFree = threadRand.Generate() % commonAllocations.size();
548 VmaAllocationInfo allocationInfo;
549 vmaGetAllocationInfo(g_hAllocator, commonAllocations[indexToFree].Alloc, &allocationInfo);
550 if(threadTotalAllocatedBytes >= allocationInfo.size)
551 {
552 DeallocationTimeRegisterObj timeRegisterObj{outResult};
553 if(commonAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
554 vmaDestroyBuffer(g_hAllocator, commonAllocations[indexToFree].Buffer, commonAllocations[indexToFree].Alloc);
555 else
556 vmaDestroyImage(g_hAllocator, commonAllocations[indexToFree].Image, commonAllocations[indexToFree].Alloc);
557 threadTotalAllocatedBytes -= allocationInfo.size;
558 commonAllocations.erase(commonAllocations.begin() + indexToFree);
559 }
560 }
561 }
562 else
563 {
564 if(!threadAllocations.empty())
565 {
566 size_t indexToFree = threadRand.Generate() % threadAllocations.size();
567 VmaAllocationInfo allocationInfo;
568 vmaGetAllocationInfo(g_hAllocator, threadAllocations[indexToFree].Alloc, &allocationInfo);
569 if(threadTotalAllocatedBytes >= allocationInfo.size)
570 {
571 DeallocationTimeRegisterObj timeRegisterObj{outResult};
572 if(threadAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
573 vmaDestroyBuffer(g_hAllocator, threadAllocations[indexToFree].Buffer, threadAllocations[indexToFree].Alloc);
574 else
575 vmaDestroyImage(g_hAllocator, threadAllocations[indexToFree].Image, threadAllocations[indexToFree].Alloc);
576 threadTotalAllocatedBytes -= allocationInfo.size;
577 threadAllocations.erase(threadAllocations.begin() + indexToFree);
578 }
579 }
580 }
581 }
582 }
583
584 ++numThreadsReachedMaxAllocations;
585
586 WaitForSingleObject(threadsFinishEvent, INFINITE);
587
588 // DEALLOCATION
589 while(!threadAllocations.empty())
590 {
591 size_t indexToFree = 0;
592 switch(config.FreeOrder)
593 {
594 case FREE_ORDER::FORWARD:
595 indexToFree = 0;
596 break;
597 case FREE_ORDER::BACKWARD:
598 indexToFree = threadAllocations.size() - 1;
599 break;
600 case FREE_ORDER::RANDOM:
601 indexToFree = mainRand.Generate() % threadAllocations.size();
602 break;
603 }
604
605 {
606 DeallocationTimeRegisterObj timeRegisterObj{outResult};
607 if(threadAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
608 vmaDestroyBuffer(g_hAllocator, threadAllocations[indexToFree].Buffer, threadAllocations[indexToFree].Alloc);
609 else
610 vmaDestroyImage(g_hAllocator, threadAllocations[indexToFree].Image, threadAllocations[indexToFree].Alloc);
611 }
612 threadAllocations.erase(threadAllocations.begin() + indexToFree);
613 }
614 };
615
616 uint32_t threadRandSeed = mainRand.Generate();
617 std::vector<std::thread> bkgThreads;
618 for(size_t i = 0; i < config.ThreadCount; ++i)
619 {
620 bkgThreads.emplace_back(std::bind(ThreadProc, threadRandSeed + (uint32_t)i));
621 }
622
623 // Wait for threads reached max allocations
624 while(numThreadsReachedMaxAllocations < config.ThreadCount)
625 Sleep(0);
626
627 // CALCULATE MEMORY STATISTICS ON FINAL USAGE
628 VmaStats vmaStats = {};
629 vmaCalculateStats(g_hAllocator, &vmaStats);
630 outResult.TotalMemoryAllocated = vmaStats.total.usedBytes + vmaStats.total.unusedBytes;
631 outResult.FreeRangeSizeMax = vmaStats.total.unusedRangeSizeMax;
632 outResult.FreeRangeSizeAvg = vmaStats.total.unusedRangeSizeAvg;
633
634 // Signal threads to deallocate
635 SetEvent(threadsFinishEvent);
636
637 // Wait for threads finished
638 for(size_t i = 0; i < bkgThreads.size(); ++i)
639 bkgThreads[i].join();
640 bkgThreads.clear();
641
642 CloseHandle(threadsFinishEvent);
643
644 // Deallocate remaining common resources
645 while(!commonAllocations.empty())
646 {
647 size_t indexToFree = 0;
648 switch(config.FreeOrder)
649 {
650 case FREE_ORDER::FORWARD:
651 indexToFree = 0;
652 break;
653 case FREE_ORDER::BACKWARD:
654 indexToFree = commonAllocations.size() - 1;
655 break;
656 case FREE_ORDER::RANDOM:
657 indexToFree = mainRand.Generate() % commonAllocations.size();
658 break;
659 }
660
661 {
662 DeallocationTimeRegisterObj timeRegisterObj{outResult};
663 if(commonAllocations[indexToFree].Buffer != VK_NULL_HANDLE)
664 vmaDestroyBuffer(g_hAllocator, commonAllocations[indexToFree].Buffer, commonAllocations[indexToFree].Alloc);
665 else
666 vmaDestroyImage(g_hAllocator, commonAllocations[indexToFree].Image, commonAllocations[indexToFree].Alloc);
667 }
668 commonAllocations.erase(commonAllocations.begin() + indexToFree);
669 }
670
671 if(allocationCount)
672 {
673 outResult.AllocationTimeAvg /= allocationCount;
674 outResult.DeallocationTimeAvg /= allocationCount;
675 }
676
677 outResult.TotalTime = std::chrono::high_resolution_clock::now() - timeBeg;
678
679 return res;
680 }
681
SaveAllocatorStatsToFile(const wchar_t * filePath)682 void SaveAllocatorStatsToFile(const wchar_t* filePath)
683 {
684 wprintf(L"Saving JSON dump to file \"%s\"\n", filePath);
685 char* stats;
686 vmaBuildStatsString(g_hAllocator, &stats, VK_TRUE);
687 SaveFile(filePath, stats, strlen(stats));
688 vmaFreeStatsString(g_hAllocator, stats);
689 }
690
691 struct AllocInfo
692 {
693 VmaAllocation m_Allocation = VK_NULL_HANDLE;
694 VkBuffer m_Buffer = VK_NULL_HANDLE;
695 VkImage m_Image = VK_NULL_HANDLE;
696 VkImageLayout m_ImageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
697 uint32_t m_StartValue = 0;
698 union
699 {
700 VkBufferCreateInfo m_BufferInfo;
701 VkImageCreateInfo m_ImageInfo;
702 };
703
704 // After defragmentation.
705 VkBuffer m_NewBuffer = VK_NULL_HANDLE;
706 VkImage m_NewImage = VK_NULL_HANDLE;
707
708 void CreateBuffer(
709 const VkBufferCreateInfo& bufCreateInfo,
710 const VmaAllocationCreateInfo& allocCreateInfo);
711 void CreateImage(
712 const VkImageCreateInfo& imageCreateInfo,
713 const VmaAllocationCreateInfo& allocCreateInfo,
714 VkImageLayout layout);
715 void Destroy();
716 };
717
CreateBuffer(const VkBufferCreateInfo & bufCreateInfo,const VmaAllocationCreateInfo & allocCreateInfo)718 void AllocInfo::CreateBuffer(
719 const VkBufferCreateInfo& bufCreateInfo,
720 const VmaAllocationCreateInfo& allocCreateInfo)
721 {
722 m_BufferInfo = bufCreateInfo;
723 VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &m_Buffer, &m_Allocation, nullptr);
724 TEST(res == VK_SUCCESS);
725 }
CreateImage(const VkImageCreateInfo & imageCreateInfo,const VmaAllocationCreateInfo & allocCreateInfo,VkImageLayout layout)726 void AllocInfo::CreateImage(
727 const VkImageCreateInfo& imageCreateInfo,
728 const VmaAllocationCreateInfo& allocCreateInfo,
729 VkImageLayout layout)
730 {
731 m_ImageInfo = imageCreateInfo;
732 m_ImageLayout = layout;
733 VkResult res = vmaCreateImage(g_hAllocator, &imageCreateInfo, &allocCreateInfo, &m_Image, &m_Allocation, nullptr);
734 TEST(res == VK_SUCCESS);
735 }
736
Destroy()737 void AllocInfo::Destroy()
738 {
739 if(m_Image)
740 {
741 assert(!m_Buffer);
742 vkDestroyImage(g_hDevice, m_Image, g_Allocs);
743 m_Image = VK_NULL_HANDLE;
744 }
745 if(m_Buffer)
746 {
747 assert(!m_Image);
748 vkDestroyBuffer(g_hDevice, m_Buffer, g_Allocs);
749 m_Buffer = VK_NULL_HANDLE;
750 }
751 if(m_Allocation)
752 {
753 vmaFreeMemory(g_hAllocator, m_Allocation);
754 m_Allocation = VK_NULL_HANDLE;
755 }
756 }
757
758 class StagingBufferCollection
759 {
760 public:
StagingBufferCollection()761 StagingBufferCollection() { }
762 ~StagingBufferCollection();
763 // Returns false if maximum total size of buffers would be exceeded.
764 bool AcquireBuffer(VkDeviceSize size, VkBuffer& outBuffer, void*& outMappedPtr);
765 void ReleaseAllBuffers();
766
767 private:
768 static const VkDeviceSize MAX_TOTAL_SIZE = 256ull * 1024 * 1024;
769 struct BufInfo
770 {
771 VmaAllocation Allocation = VK_NULL_HANDLE;
772 VkBuffer Buffer = VK_NULL_HANDLE;
773 VkDeviceSize Size = VK_WHOLE_SIZE;
774 void* MappedPtr = nullptr;
775 bool Used = false;
776 };
777 std::vector<BufInfo> m_Bufs;
778 // Including both used and unused.
779 VkDeviceSize m_TotalSize = 0;
780 };
781
~StagingBufferCollection()782 StagingBufferCollection::~StagingBufferCollection()
783 {
784 for(size_t i = m_Bufs.size(); i--; )
785 {
786 vmaDestroyBuffer(g_hAllocator, m_Bufs[i].Buffer, m_Bufs[i].Allocation);
787 }
788 }
789
AcquireBuffer(VkDeviceSize size,VkBuffer & outBuffer,void * & outMappedPtr)790 bool StagingBufferCollection::AcquireBuffer(VkDeviceSize size, VkBuffer& outBuffer, void*& outMappedPtr)
791 {
792 assert(size <= MAX_TOTAL_SIZE);
793
794 // Try to find existing unused buffer with best size.
795 size_t bestIndex = SIZE_MAX;
796 for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
797 {
798 BufInfo& currBufInfo = m_Bufs[i];
799 if(!currBufInfo.Used && currBufInfo.Size >= size &&
800 (bestIndex == SIZE_MAX || currBufInfo.Size < m_Bufs[bestIndex].Size))
801 {
802 bestIndex = i;
803 }
804 }
805
806 if(bestIndex != SIZE_MAX)
807 {
808 m_Bufs[bestIndex].Used = true;
809 outBuffer = m_Bufs[bestIndex].Buffer;
810 outMappedPtr = m_Bufs[bestIndex].MappedPtr;
811 return true;
812 }
813
814 // Allocate new buffer with requested size.
815 if(m_TotalSize + size <= MAX_TOTAL_SIZE)
816 {
817 BufInfo bufInfo;
818 bufInfo.Size = size;
819 bufInfo.Used = true;
820
821 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
822 bufCreateInfo.size = size;
823 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
824
825 VmaAllocationCreateInfo allocCreateInfo = {};
826 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
827 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
828
829 VmaAllocationInfo allocInfo;
830 VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &bufInfo.Buffer, &bufInfo.Allocation, &allocInfo);
831 bufInfo.MappedPtr = allocInfo.pMappedData;
832 TEST(res == VK_SUCCESS && bufInfo.MappedPtr);
833
834 outBuffer = bufInfo.Buffer;
835 outMappedPtr = bufInfo.MappedPtr;
836
837 m_Bufs.push_back(std::move(bufInfo));
838
839 m_TotalSize += size;
840
841 return true;
842 }
843
844 // There are some unused but smaller buffers: Free them and try again.
845 bool hasUnused = false;
846 for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
847 {
848 if(!m_Bufs[i].Used)
849 {
850 hasUnused = true;
851 break;
852 }
853 }
854 if(hasUnused)
855 {
856 for(size_t i = m_Bufs.size(); i--; )
857 {
858 if(!m_Bufs[i].Used)
859 {
860 m_TotalSize -= m_Bufs[i].Size;
861 vmaDestroyBuffer(g_hAllocator, m_Bufs[i].Buffer, m_Bufs[i].Allocation);
862 m_Bufs.erase(m_Bufs.begin() + i);
863 }
864 }
865
866 return AcquireBuffer(size, outBuffer, outMappedPtr);
867 }
868
869 return false;
870 }
871
ReleaseAllBuffers()872 void StagingBufferCollection::ReleaseAllBuffers()
873 {
874 for(size_t i = 0, count = m_Bufs.size(); i < count; ++i)
875 {
876 m_Bufs[i].Used = false;
877 }
878 }
879
UploadGpuData(const AllocInfo * allocInfo,size_t allocInfoCount)880 static void UploadGpuData(const AllocInfo* allocInfo, size_t allocInfoCount)
881 {
882 StagingBufferCollection stagingBufs;
883
884 bool cmdBufferStarted = false;
885 for(size_t allocInfoIndex = 0; allocInfoIndex < allocInfoCount; ++allocInfoIndex)
886 {
887 const AllocInfo& currAllocInfo = allocInfo[allocInfoIndex];
888 if(currAllocInfo.m_Buffer)
889 {
890 const VkDeviceSize size = currAllocInfo.m_BufferInfo.size;
891
892 VkBuffer stagingBuf = VK_NULL_HANDLE;
893 void* stagingBufMappedPtr = nullptr;
894 if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
895 {
896 TEST(cmdBufferStarted);
897 EndSingleTimeCommands();
898 stagingBufs.ReleaseAllBuffers();
899 cmdBufferStarted = false;
900
901 bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
902 TEST(ok);
903 }
904
905 // Fill staging buffer.
906 {
907 assert(size % sizeof(uint32_t) == 0);
908 uint32_t* stagingValPtr = (uint32_t*)stagingBufMappedPtr;
909 uint32_t val = currAllocInfo.m_StartValue;
910 for(size_t i = 0; i < size / sizeof(uint32_t); ++i)
911 {
912 *stagingValPtr = val;
913 ++stagingValPtr;
914 ++val;
915 }
916 }
917
918 // Issue copy command from staging buffer to destination buffer.
919 if(!cmdBufferStarted)
920 {
921 cmdBufferStarted = true;
922 BeginSingleTimeCommands();
923 }
924
925 VkBufferCopy copy = {};
926 copy.srcOffset = 0;
927 copy.dstOffset = 0;
928 copy.size = size;
929 vkCmdCopyBuffer(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Buffer, 1, ©);
930 }
931 else
932 {
933 TEST(currAllocInfo.m_ImageInfo.format == VK_FORMAT_R8G8B8A8_UNORM && "Only RGBA8 images are currently supported.");
934 TEST(currAllocInfo.m_ImageInfo.mipLevels == 1 && "Only single mip images are currently supported.");
935
936 const VkDeviceSize size = (VkDeviceSize)currAllocInfo.m_ImageInfo.extent.width * currAllocInfo.m_ImageInfo.extent.height * sizeof(uint32_t);
937
938 VkBuffer stagingBuf = VK_NULL_HANDLE;
939 void* stagingBufMappedPtr = nullptr;
940 if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
941 {
942 TEST(cmdBufferStarted);
943 EndSingleTimeCommands();
944 stagingBufs.ReleaseAllBuffers();
945 cmdBufferStarted = false;
946
947 bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
948 TEST(ok);
949 }
950
951 // Fill staging buffer.
952 {
953 assert(size % sizeof(uint32_t) == 0);
954 uint32_t *stagingValPtr = (uint32_t *)stagingBufMappedPtr;
955 uint32_t val = currAllocInfo.m_StartValue;
956 for(size_t i = 0; i < size / sizeof(uint32_t); ++i)
957 {
958 *stagingValPtr = val;
959 ++stagingValPtr;
960 ++val;
961 }
962 }
963
964 // Issue copy command from staging buffer to destination buffer.
965 if(!cmdBufferStarted)
966 {
967 cmdBufferStarted = true;
968 BeginSingleTimeCommands();
969 }
970
971
972 // Transfer to transfer dst layout
973 VkImageSubresourceRange subresourceRange = {
974 VK_IMAGE_ASPECT_COLOR_BIT,
975 0, VK_REMAINING_MIP_LEVELS,
976 0, VK_REMAINING_ARRAY_LAYERS
977 };
978
979 VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
980 barrier.srcAccessMask = 0;
981 barrier.dstAccessMask = 0;
982 barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
983 barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
984 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
985 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
986 barrier.image = currAllocInfo.m_Image;
987 barrier.subresourceRange = subresourceRange;
988
989 vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,
990 0, nullptr,
991 0, nullptr,
992 1, &barrier);
993
994 // Copy image date
995 VkBufferImageCopy copy = {};
996 copy.bufferOffset = 0;
997 copy.bufferRowLength = 0;
998 copy.bufferImageHeight = 0;
999 copy.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1000 copy.imageSubresource.layerCount = 1;
1001 copy.imageExtent = currAllocInfo.m_ImageInfo.extent;
1002
1003 vkCmdCopyBufferToImage(g_hTemporaryCommandBuffer, stagingBuf, currAllocInfo.m_Image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, ©);
1004
1005 // Transfer to desired layout
1006 barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1007 barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
1008 barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1009 barrier.newLayout = currAllocInfo.m_ImageLayout;
1010
1011 vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0,
1012 0, nullptr,
1013 0, nullptr,
1014 1, &barrier);
1015 }
1016 }
1017
1018 if(cmdBufferStarted)
1019 {
1020 EndSingleTimeCommands();
1021 stagingBufs.ReleaseAllBuffers();
1022 }
1023 }
1024
ValidateGpuData(const AllocInfo * allocInfo,size_t allocInfoCount)1025 static void ValidateGpuData(const AllocInfo* allocInfo, size_t allocInfoCount)
1026 {
1027 StagingBufferCollection stagingBufs;
1028
1029 bool cmdBufferStarted = false;
1030 size_t validateAllocIndexOffset = 0;
1031 std::vector<void*> validateStagingBuffers;
1032 for(size_t allocInfoIndex = 0; allocInfoIndex < allocInfoCount; ++allocInfoIndex)
1033 {
1034 const AllocInfo& currAllocInfo = allocInfo[allocInfoIndex];
1035 if(currAllocInfo.m_Buffer)
1036 {
1037 const VkDeviceSize size = currAllocInfo.m_BufferInfo.size;
1038
1039 VkBuffer stagingBuf = VK_NULL_HANDLE;
1040 void* stagingBufMappedPtr = nullptr;
1041 if(!stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr))
1042 {
1043 TEST(cmdBufferStarted);
1044 EndSingleTimeCommands();
1045 cmdBufferStarted = false;
1046
1047 for(size_t validateIndex = 0;
1048 validateIndex < validateStagingBuffers.size();
1049 ++validateIndex)
1050 {
1051 const size_t validateAllocIndex = validateIndex + validateAllocIndexOffset;
1052 const VkDeviceSize validateSize = allocInfo[validateAllocIndex].m_BufferInfo.size;
1053 TEST(validateSize % sizeof(uint32_t) == 0);
1054 const uint32_t* stagingValPtr = (const uint32_t*)validateStagingBuffers[validateIndex];
1055 uint32_t val = allocInfo[validateAllocIndex].m_StartValue;
1056 bool valid = true;
1057 for(size_t i = 0; i < validateSize / sizeof(uint32_t); ++i)
1058 {
1059 if(*stagingValPtr != val)
1060 {
1061 valid = false;
1062 break;
1063 }
1064 ++stagingValPtr;
1065 ++val;
1066 }
1067 TEST(valid);
1068 }
1069
1070 stagingBufs.ReleaseAllBuffers();
1071
1072 validateAllocIndexOffset = allocInfoIndex;
1073 validateStagingBuffers.clear();
1074
1075 bool ok = stagingBufs.AcquireBuffer(size, stagingBuf, stagingBufMappedPtr);
1076 TEST(ok);
1077 }
1078
1079 // Issue copy command from staging buffer to destination buffer.
1080 if(!cmdBufferStarted)
1081 {
1082 cmdBufferStarted = true;
1083 BeginSingleTimeCommands();
1084 }
1085
1086 VkBufferCopy copy = {};
1087 copy.srcOffset = 0;
1088 copy.dstOffset = 0;
1089 copy.size = size;
1090 vkCmdCopyBuffer(g_hTemporaryCommandBuffer, currAllocInfo.m_Buffer, stagingBuf, 1, ©);
1091
1092 // Sava mapped pointer for later validation.
1093 validateStagingBuffers.push_back(stagingBufMappedPtr);
1094 }
1095 else
1096 {
1097 TEST(0 && "Images not currently supported.");
1098 }
1099 }
1100
1101 if(cmdBufferStarted)
1102 {
1103 EndSingleTimeCommands();
1104
1105 for(size_t validateIndex = 0;
1106 validateIndex < validateStagingBuffers.size();
1107 ++validateIndex)
1108 {
1109 const size_t validateAllocIndex = validateIndex + validateAllocIndexOffset;
1110 const VkDeviceSize validateSize = allocInfo[validateAllocIndex].m_BufferInfo.size;
1111 TEST(validateSize % sizeof(uint32_t) == 0);
1112 const uint32_t* stagingValPtr = (const uint32_t*)validateStagingBuffers[validateIndex];
1113 uint32_t val = allocInfo[validateAllocIndex].m_StartValue;
1114 bool valid = true;
1115 for(size_t i = 0; i < validateSize / sizeof(uint32_t); ++i)
1116 {
1117 if(*stagingValPtr != val)
1118 {
1119 valid = false;
1120 break;
1121 }
1122 ++stagingValPtr;
1123 ++val;
1124 }
1125 TEST(valid);
1126 }
1127
1128 stagingBufs.ReleaseAllBuffers();
1129 }
1130 }
1131
GetMemReq(VmaAllocationCreateInfo & outMemReq)1132 static void GetMemReq(VmaAllocationCreateInfo& outMemReq)
1133 {
1134 outMemReq = {};
1135 outMemReq.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
1136 //outMemReq.flags = VMA_ALLOCATION_CREATE_PERSISTENT_MAP_BIT;
1137 }
1138
CreateBuffer(VmaPool pool,const VkBufferCreateInfo & bufCreateInfo,bool persistentlyMapped,AllocInfo & outAllocInfo)1139 static void CreateBuffer(
1140 VmaPool pool,
1141 const VkBufferCreateInfo& bufCreateInfo,
1142 bool persistentlyMapped,
1143 AllocInfo& outAllocInfo)
1144 {
1145 outAllocInfo = {};
1146 outAllocInfo.m_BufferInfo = bufCreateInfo;
1147
1148 VmaAllocationCreateInfo allocCreateInfo = {};
1149 allocCreateInfo.pool = pool;
1150 if(persistentlyMapped)
1151 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
1152
1153 VmaAllocationInfo vmaAllocInfo = {};
1154 ERR_GUARD_VULKAN( vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &outAllocInfo.m_Buffer, &outAllocInfo.m_Allocation, &vmaAllocInfo) );
1155
1156 // Setup StartValue and fill.
1157 {
1158 outAllocInfo.m_StartValue = (uint32_t)rand();
1159 uint32_t* data = (uint32_t*)vmaAllocInfo.pMappedData;
1160 TEST((data != nullptr) == persistentlyMapped);
1161 if(!persistentlyMapped)
1162 {
1163 ERR_GUARD_VULKAN( vmaMapMemory(g_hAllocator, outAllocInfo.m_Allocation, (void**)&data) );
1164 }
1165
1166 uint32_t value = outAllocInfo.m_StartValue;
1167 TEST(bufCreateInfo.size % 4 == 0);
1168 for(size_t i = 0; i < bufCreateInfo.size / sizeof(uint32_t); ++i)
1169 data[i] = value++;
1170
1171 if(!persistentlyMapped)
1172 vmaUnmapMemory(g_hAllocator, outAllocInfo.m_Allocation);
1173 }
1174 }
1175
CreateAllocation(AllocInfo & outAllocation)1176 static void CreateAllocation(AllocInfo& outAllocation)
1177 {
1178 outAllocation.m_Allocation = nullptr;
1179 outAllocation.m_Buffer = nullptr;
1180 outAllocation.m_Image = nullptr;
1181 outAllocation.m_StartValue = (uint32_t)rand();
1182
1183 VmaAllocationCreateInfo vmaMemReq;
1184 GetMemReq(vmaMemReq);
1185
1186 VmaAllocationInfo allocInfo;
1187
1188 const bool isBuffer = true;//(rand() & 0x1) != 0;
1189 const bool isLarge = (rand() % 16) == 0;
1190 if(isBuffer)
1191 {
1192 const uint32_t bufferSize = isLarge ?
1193 (rand() % 10 + 1) * (1024 * 1024) : // 1 MB ... 10 MB
1194 (rand() % 1024 + 1) * 1024; // 1 KB ... 1 MB
1195
1196 VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1197 bufferInfo.size = bufferSize;
1198 bufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1199
1200 VkResult res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &vmaMemReq, &outAllocation.m_Buffer, &outAllocation.m_Allocation, &allocInfo);
1201 outAllocation.m_BufferInfo = bufferInfo;
1202 TEST(res == VK_SUCCESS);
1203 }
1204 else
1205 {
1206 const uint32_t imageSizeX = isLarge ?
1207 1024 + rand() % (4096 - 1024) : // 1024 ... 4096
1208 rand() % 1024 + 1; // 1 ... 1024
1209 const uint32_t imageSizeY = isLarge ?
1210 1024 + rand() % (4096 - 1024) : // 1024 ... 4096
1211 rand() % 1024 + 1; // 1 ... 1024
1212
1213 VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
1214 imageInfo.imageType = VK_IMAGE_TYPE_2D;
1215 imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
1216 imageInfo.extent.width = imageSizeX;
1217 imageInfo.extent.height = imageSizeY;
1218 imageInfo.extent.depth = 1;
1219 imageInfo.mipLevels = 1;
1220 imageInfo.arrayLayers = 1;
1221 imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
1222 imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
1223 imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
1224 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
1225
1226 VkResult res = vmaCreateImage(g_hAllocator, &imageInfo, &vmaMemReq, &outAllocation.m_Image, &outAllocation.m_Allocation, &allocInfo);
1227 outAllocation.m_ImageInfo = imageInfo;
1228 TEST(res == VK_SUCCESS);
1229 }
1230
1231 uint32_t* data = (uint32_t*)allocInfo.pMappedData;
1232 if(allocInfo.pMappedData == nullptr)
1233 {
1234 VkResult res = vmaMapMemory(g_hAllocator, outAllocation.m_Allocation, (void**)&data);
1235 TEST(res == VK_SUCCESS);
1236 }
1237
1238 uint32_t value = outAllocation.m_StartValue;
1239 TEST(allocInfo.size % 4 == 0);
1240 for(size_t i = 0; i < allocInfo.size / sizeof(uint32_t); ++i)
1241 data[i] = value++;
1242
1243 if(allocInfo.pMappedData == nullptr)
1244 vmaUnmapMemory(g_hAllocator, outAllocation.m_Allocation);
1245 }
1246
DestroyAllocation(const AllocInfo & allocation)1247 static void DestroyAllocation(const AllocInfo& allocation)
1248 {
1249 if(allocation.m_Buffer)
1250 vmaDestroyBuffer(g_hAllocator, allocation.m_Buffer, allocation.m_Allocation);
1251 else
1252 vmaDestroyImage(g_hAllocator, allocation.m_Image, allocation.m_Allocation);
1253 }
1254
DestroyAllAllocations(std::vector<AllocInfo> & allocations)1255 static void DestroyAllAllocations(std::vector<AllocInfo>& allocations)
1256 {
1257 for(size_t i = allocations.size(); i--; )
1258 DestroyAllocation(allocations[i]);
1259 allocations.clear();
1260 }
1261
ValidateAllocationData(const AllocInfo & allocation)1262 static void ValidateAllocationData(const AllocInfo& allocation)
1263 {
1264 VmaAllocationInfo allocInfo;
1265 vmaGetAllocationInfo(g_hAllocator, allocation.m_Allocation, &allocInfo);
1266
1267 uint32_t* data = (uint32_t*)allocInfo.pMappedData;
1268 if(allocInfo.pMappedData == nullptr)
1269 {
1270 VkResult res = vmaMapMemory(g_hAllocator, allocation.m_Allocation, (void**)&data);
1271 TEST(res == VK_SUCCESS);
1272 }
1273
1274 uint32_t value = allocation.m_StartValue;
1275 bool ok = true;
1276 size_t i;
1277 TEST(allocInfo.size % 4 == 0);
1278 for(i = 0; i < allocInfo.size / sizeof(uint32_t); ++i)
1279 {
1280 if(data[i] != value++)
1281 {
1282 ok = false;
1283 break;
1284 }
1285 }
1286 TEST(ok);
1287
1288 if(allocInfo.pMappedData == nullptr)
1289 vmaUnmapMemory(g_hAllocator, allocation.m_Allocation);
1290 }
1291
RecreateAllocationResource(AllocInfo & allocation)1292 static void RecreateAllocationResource(AllocInfo& allocation)
1293 {
1294 VmaAllocationInfo allocInfo;
1295 vmaGetAllocationInfo(g_hAllocator, allocation.m_Allocation, &allocInfo);
1296
1297 if(allocation.m_Buffer)
1298 {
1299 vkDestroyBuffer(g_hDevice, allocation.m_Buffer, g_Allocs);
1300
1301 VkResult res = vkCreateBuffer(g_hDevice, &allocation.m_BufferInfo, g_Allocs, &allocation.m_Buffer);
1302 TEST(res == VK_SUCCESS);
1303
1304 // Just to silence validation layer warnings.
1305 VkMemoryRequirements vkMemReq;
1306 vkGetBufferMemoryRequirements(g_hDevice, allocation.m_Buffer, &vkMemReq);
1307 TEST(vkMemReq.size >= allocation.m_BufferInfo.size);
1308
1309 res = vmaBindBufferMemory(g_hAllocator, allocation.m_Allocation, allocation.m_Buffer);
1310 TEST(res == VK_SUCCESS);
1311 }
1312 else
1313 {
1314 vkDestroyImage(g_hDevice, allocation.m_Image, g_Allocs);
1315
1316 VkResult res = vkCreateImage(g_hDevice, &allocation.m_ImageInfo, g_Allocs, &allocation.m_Image);
1317 TEST(res == VK_SUCCESS);
1318
1319 // Just to silence validation layer warnings.
1320 VkMemoryRequirements vkMemReq;
1321 vkGetImageMemoryRequirements(g_hDevice, allocation.m_Image, &vkMemReq);
1322
1323 res = vmaBindImageMemory(g_hAllocator, allocation.m_Allocation, allocation.m_Image);
1324 TEST(res == VK_SUCCESS);
1325 }
1326 }
1327
Defragment(AllocInfo * allocs,size_t allocCount,const VmaDefragmentationInfo * defragmentationInfo=nullptr,VmaDefragmentationStats * defragmentationStats=nullptr)1328 static void Defragment(AllocInfo* allocs, size_t allocCount,
1329 const VmaDefragmentationInfo* defragmentationInfo = nullptr,
1330 VmaDefragmentationStats* defragmentationStats = nullptr)
1331 {
1332 std::vector<VmaAllocation> vmaAllocs(allocCount);
1333 for(size_t i = 0; i < allocCount; ++i)
1334 vmaAllocs[i] = allocs[i].m_Allocation;
1335
1336 std::vector<VkBool32> allocChanged(allocCount);
1337
1338 ERR_GUARD_VULKAN( vmaDefragment(g_hAllocator, vmaAllocs.data(), allocCount, allocChanged.data(),
1339 defragmentationInfo, defragmentationStats) );
1340
1341 for(size_t i = 0; i < allocCount; ++i)
1342 {
1343 if(allocChanged[i])
1344 {
1345 RecreateAllocationResource(allocs[i]);
1346 }
1347 }
1348 }
1349
ValidateAllocationsData(const AllocInfo * allocs,size_t allocCount)1350 static void ValidateAllocationsData(const AllocInfo* allocs, size_t allocCount)
1351 {
1352 std::for_each(allocs, allocs + allocCount, [](const AllocInfo& allocInfo) {
1353 ValidateAllocationData(allocInfo);
1354 });
1355 }
1356
TestDefragmentationSimple()1357 void TestDefragmentationSimple()
1358 {
1359 wprintf(L"Test defragmentation simple\n");
1360
1361 RandomNumberGenerator rand(667);
1362
1363 const VkDeviceSize BUF_SIZE = 0x10000;
1364 const VkDeviceSize BLOCK_SIZE = BUF_SIZE * 8;
1365
1366 const VkDeviceSize MIN_BUF_SIZE = 32;
1367 const VkDeviceSize MAX_BUF_SIZE = BUF_SIZE * 4;
1368 auto RandomBufSize = [&]() -> VkDeviceSize {
1369 return align_up<VkDeviceSize>(rand.Generate() % (MAX_BUF_SIZE - MIN_BUF_SIZE + 1) + MIN_BUF_SIZE, 32);
1370 };
1371
1372 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1373 bufCreateInfo.size = BUF_SIZE;
1374 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1375
1376 VmaAllocationCreateInfo exampleAllocCreateInfo = {};
1377 exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
1378
1379 uint32_t memTypeIndex = UINT32_MAX;
1380 vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex);
1381
1382 VmaPoolCreateInfo poolCreateInfo = {};
1383 poolCreateInfo.blockSize = BLOCK_SIZE;
1384 poolCreateInfo.memoryTypeIndex = memTypeIndex;
1385
1386 VmaPool pool;
1387 ERR_GUARD_VULKAN( vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool) );
1388
1389 // Defragmentation of empty pool.
1390 {
1391 VmaDefragmentationInfo2 defragInfo = {};
1392 defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE;
1393 defragInfo.maxCpuAllocationsToMove = UINT32_MAX;
1394 defragInfo.poolCount = 1;
1395 defragInfo.pPools = &pool;
1396
1397 VmaDefragmentationStats defragStats = {};
1398 VmaDefragmentationContext defragCtx = nullptr;
1399 VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &defragStats, &defragCtx);
1400 TEST(res >= VK_SUCCESS);
1401 vmaDefragmentationEnd(g_hAllocator, defragCtx);
1402 TEST(defragStats.allocationsMoved == 0 && defragStats.bytesFreed == 0 &&
1403 defragStats.bytesMoved == 0 && defragStats.deviceMemoryBlocksFreed == 0);
1404 }
1405
1406 std::vector<AllocInfo> allocations;
1407
1408 // persistentlyMappedOption = 0 - not persistently mapped.
1409 // persistentlyMappedOption = 1 - persistently mapped.
1410 for(uint32_t persistentlyMappedOption = 0; persistentlyMappedOption < 2; ++persistentlyMappedOption)
1411 {
1412 wprintf(L" Persistently mapped option = %u\n", persistentlyMappedOption);
1413 const bool persistentlyMapped = persistentlyMappedOption != 0;
1414
1415 // # Test 1
1416 // Buffers of fixed size.
1417 // Fill 2 blocks. Remove odd buffers. Defragment everything.
1418 // Expected result: at least 1 block freed.
1419 {
1420 for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
1421 {
1422 AllocInfo allocInfo;
1423 CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
1424 allocations.push_back(allocInfo);
1425 }
1426
1427 for(size_t i = 1; i < allocations.size(); ++i)
1428 {
1429 DestroyAllocation(allocations[i]);
1430 allocations.erase(allocations.begin() + i);
1431 }
1432
1433 VmaDefragmentationStats defragStats;
1434 Defragment(allocations.data(), allocations.size(), nullptr, &defragStats);
1435 TEST(defragStats.allocationsMoved > 0 && defragStats.bytesMoved > 0);
1436 TEST(defragStats.deviceMemoryBlocksFreed >= 1);
1437
1438 ValidateAllocationsData(allocations.data(), allocations.size());
1439
1440 DestroyAllAllocations(allocations);
1441 }
1442
1443 // # Test 2
1444 // Buffers of fixed size.
1445 // Fill 2 blocks. Remove odd buffers. Defragment one buffer at time.
1446 // Expected result: Each of 4 interations makes some progress.
1447 {
1448 for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
1449 {
1450 AllocInfo allocInfo;
1451 CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
1452 allocations.push_back(allocInfo);
1453 }
1454
1455 for(size_t i = 1; i < allocations.size(); ++i)
1456 {
1457 DestroyAllocation(allocations[i]);
1458 allocations.erase(allocations.begin() + i);
1459 }
1460
1461 VmaDefragmentationInfo defragInfo = {};
1462 defragInfo.maxAllocationsToMove = 1;
1463 defragInfo.maxBytesToMove = BUF_SIZE;
1464
1465 for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE / 2; ++i)
1466 {
1467 VmaDefragmentationStats defragStats;
1468 Defragment(allocations.data(), allocations.size(), &defragInfo, &defragStats);
1469 TEST(defragStats.allocationsMoved > 0 && defragStats.bytesMoved > 0);
1470 }
1471
1472 ValidateAllocationsData(allocations.data(), allocations.size());
1473
1474 DestroyAllAllocations(allocations);
1475 }
1476
1477 // # Test 3
1478 // Buffers of variable size.
1479 // Create a number of buffers. Remove some percent of them.
1480 // Defragment while having some percent of them unmovable.
1481 // Expected result: Just simple validation.
1482 {
1483 for(size_t i = 0; i < 100; ++i)
1484 {
1485 VkBufferCreateInfo localBufCreateInfo = bufCreateInfo;
1486 localBufCreateInfo.size = RandomBufSize();
1487
1488 AllocInfo allocInfo;
1489 CreateBuffer(pool, bufCreateInfo, persistentlyMapped, allocInfo);
1490 allocations.push_back(allocInfo);
1491 }
1492
1493 const uint32_t percentToDelete = 60;
1494 const size_t numberToDelete = allocations.size() * percentToDelete / 100;
1495 for(size_t i = 0; i < numberToDelete; ++i)
1496 {
1497 size_t indexToDelete = rand.Generate() % (uint32_t)allocations.size();
1498 DestroyAllocation(allocations[indexToDelete]);
1499 allocations.erase(allocations.begin() + indexToDelete);
1500 }
1501
1502 // Non-movable allocations will be at the beginning of allocations array.
1503 const uint32_t percentNonMovable = 20;
1504 const size_t numberNonMovable = allocations.size() * percentNonMovable / 100;
1505 for(size_t i = 0; i < numberNonMovable; ++i)
1506 {
1507 size_t indexNonMovable = i + rand.Generate() % (uint32_t)(allocations.size() - i);
1508 if(indexNonMovable != i)
1509 std::swap(allocations[i], allocations[indexNonMovable]);
1510 }
1511
1512 VmaDefragmentationStats defragStats;
1513 Defragment(
1514 allocations.data() + numberNonMovable,
1515 allocations.size() - numberNonMovable,
1516 nullptr, &defragStats);
1517
1518 ValidateAllocationsData(allocations.data(), allocations.size());
1519
1520 DestroyAllAllocations(allocations);
1521 }
1522 }
1523
1524 /*
1525 Allocation that must be move to an overlapping place using memmove().
1526 Create 2 buffers, second slightly bigger than the first. Delete first. Then defragment.
1527 */
1528 if(VMA_DEBUG_MARGIN == 0) // FAST algorithm works only when DEBUG_MARGIN disabled.
1529 {
1530 AllocInfo allocInfo[2];
1531
1532 bufCreateInfo.size = BUF_SIZE;
1533 CreateBuffer(pool, bufCreateInfo, false, allocInfo[0]);
1534 const VkDeviceSize biggerBufSize = BUF_SIZE + BUF_SIZE / 256;
1535 bufCreateInfo.size = biggerBufSize;
1536 CreateBuffer(pool, bufCreateInfo, false, allocInfo[1]);
1537
1538 DestroyAllocation(allocInfo[0]);
1539
1540 VmaDefragmentationStats defragStats;
1541 Defragment(&allocInfo[1], 1, nullptr, &defragStats);
1542 // If this fails, it means we couldn't do memmove with overlapping regions.
1543 TEST(defragStats.allocationsMoved == 1 && defragStats.bytesMoved > 0);
1544
1545 ValidateAllocationsData(&allocInfo[1], 1);
1546 DestroyAllocation(allocInfo[1]);
1547 }
1548
1549 vmaDestroyPool(g_hAllocator, pool);
1550 }
1551
TestDefragmentationWholePool()1552 void TestDefragmentationWholePool()
1553 {
1554 wprintf(L"Test defragmentation whole pool\n");
1555
1556 RandomNumberGenerator rand(668);
1557
1558 const VkDeviceSize BUF_SIZE = 0x10000;
1559 const VkDeviceSize BLOCK_SIZE = BUF_SIZE * 8;
1560
1561 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1562 bufCreateInfo.size = BUF_SIZE;
1563 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1564
1565 VmaAllocationCreateInfo exampleAllocCreateInfo = {};
1566 exampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
1567
1568 uint32_t memTypeIndex = UINT32_MAX;
1569 vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &exampleAllocCreateInfo, &memTypeIndex);
1570
1571 VmaPoolCreateInfo poolCreateInfo = {};
1572 poolCreateInfo.blockSize = BLOCK_SIZE;
1573 poolCreateInfo.memoryTypeIndex = memTypeIndex;
1574
1575 VmaDefragmentationStats defragStats[2];
1576 for(size_t caseIndex = 0; caseIndex < 2; ++caseIndex)
1577 {
1578 VmaPool pool;
1579 ERR_GUARD_VULKAN( vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool) );
1580
1581 std::vector<AllocInfo> allocations;
1582
1583 // Buffers of fixed size.
1584 // Fill 2 blocks. Remove odd buffers. Defragment all of them.
1585 for(size_t i = 0; i < BLOCK_SIZE / BUF_SIZE * 2; ++i)
1586 {
1587 AllocInfo allocInfo;
1588 CreateBuffer(pool, bufCreateInfo, false, allocInfo);
1589 allocations.push_back(allocInfo);
1590 }
1591
1592 for(size_t i = 1; i < allocations.size(); ++i)
1593 {
1594 DestroyAllocation(allocations[i]);
1595 allocations.erase(allocations.begin() + i);
1596 }
1597
1598 VmaDefragmentationInfo2 defragInfo = {};
1599 defragInfo.maxCpuAllocationsToMove = UINT32_MAX;
1600 defragInfo.maxCpuBytesToMove = VK_WHOLE_SIZE;
1601 std::vector<VmaAllocation> allocationsToDefrag;
1602 if(caseIndex == 0)
1603 {
1604 defragInfo.poolCount = 1;
1605 defragInfo.pPools = &pool;
1606 }
1607 else
1608 {
1609 const size_t allocCount = allocations.size();
1610 allocationsToDefrag.resize(allocCount);
1611 std::transform(
1612 allocations.begin(), allocations.end(),
1613 allocationsToDefrag.begin(),
1614 [](const AllocInfo& allocInfo) { return allocInfo.m_Allocation; });
1615 defragInfo.allocationCount = (uint32_t)allocCount;
1616 defragInfo.pAllocations = allocationsToDefrag.data();
1617 }
1618
1619 VmaDefragmentationContext defragCtx = VK_NULL_HANDLE;
1620 VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &defragStats[caseIndex], &defragCtx);
1621 TEST(res >= VK_SUCCESS);
1622 vmaDefragmentationEnd(g_hAllocator, defragCtx);
1623
1624 TEST(defragStats[caseIndex].allocationsMoved > 0 && defragStats[caseIndex].bytesMoved > 0);
1625
1626 ValidateAllocationsData(allocations.data(), allocations.size());
1627
1628 DestroyAllAllocations(allocations);
1629
1630 vmaDestroyPool(g_hAllocator, pool);
1631 }
1632
1633 TEST(defragStats[0].bytesMoved == defragStats[1].bytesMoved);
1634 TEST(defragStats[0].allocationsMoved == defragStats[1].allocationsMoved);
1635 TEST(defragStats[0].bytesFreed == defragStats[1].bytesFreed);
1636 TEST(defragStats[0].deviceMemoryBlocksFreed == defragStats[1].deviceMemoryBlocksFreed);
1637 }
1638
TestDefragmentationFull()1639 void TestDefragmentationFull()
1640 {
1641 std::vector<AllocInfo> allocations;
1642
1643 // Create initial allocations.
1644 for(size_t i = 0; i < 400; ++i)
1645 {
1646 AllocInfo allocation;
1647 CreateAllocation(allocation);
1648 allocations.push_back(allocation);
1649 }
1650
1651 // Delete random allocations
1652 const size_t allocationsToDeletePercent = 80;
1653 size_t allocationsToDelete = allocations.size() * allocationsToDeletePercent / 100;
1654 for(size_t i = 0; i < allocationsToDelete; ++i)
1655 {
1656 size_t index = (size_t)rand() % allocations.size();
1657 DestroyAllocation(allocations[index]);
1658 allocations.erase(allocations.begin() + index);
1659 }
1660
1661 for(size_t i = 0; i < allocations.size(); ++i)
1662 ValidateAllocationData(allocations[i]);
1663
1664 //SaveAllocatorStatsToFile(L"Before.csv");
1665
1666 {
1667 std::vector<VmaAllocation> vmaAllocations(allocations.size());
1668 for(size_t i = 0; i < allocations.size(); ++i)
1669 vmaAllocations[i] = allocations[i].m_Allocation;
1670
1671 const size_t nonMovablePercent = 0;
1672 size_t nonMovableCount = vmaAllocations.size() * nonMovablePercent / 100;
1673 for(size_t i = 0; i < nonMovableCount; ++i)
1674 {
1675 size_t index = (size_t)rand() % vmaAllocations.size();
1676 vmaAllocations.erase(vmaAllocations.begin() + index);
1677 }
1678
1679 const uint32_t defragCount = 1;
1680 for(uint32_t defragIndex = 0; defragIndex < defragCount; ++defragIndex)
1681 {
1682 std::vector<VkBool32> allocationsChanged(vmaAllocations.size());
1683
1684 VmaDefragmentationInfo defragmentationInfo;
1685 defragmentationInfo.maxAllocationsToMove = UINT_MAX;
1686 defragmentationInfo.maxBytesToMove = SIZE_MAX;
1687
1688 wprintf(L"Defragmentation #%u\n", defragIndex);
1689
1690 time_point begTime = std::chrono::high_resolution_clock::now();
1691
1692 VmaDefragmentationStats stats;
1693 VkResult res = vmaDefragment(g_hAllocator, vmaAllocations.data(), vmaAllocations.size(), allocationsChanged.data(), &defragmentationInfo, &stats);
1694 TEST(res >= 0);
1695
1696 float defragmentDuration = ToFloatSeconds(std::chrono::high_resolution_clock::now() - begTime);
1697
1698 wprintf(L"Moved allocations %u, bytes %llu\n", stats.allocationsMoved, stats.bytesMoved);
1699 wprintf(L"Freed blocks %u, bytes %llu\n", stats.deviceMemoryBlocksFreed, stats.bytesFreed);
1700 wprintf(L"Time: %.2f s\n", defragmentDuration);
1701
1702 for(size_t i = 0; i < vmaAllocations.size(); ++i)
1703 {
1704 if(allocationsChanged[i])
1705 {
1706 RecreateAllocationResource(allocations[i]);
1707 }
1708 }
1709
1710 for(size_t i = 0; i < allocations.size(); ++i)
1711 ValidateAllocationData(allocations[i]);
1712
1713 //wchar_t fileName[MAX_PATH];
1714 //swprintf(fileName, MAX_PATH, L"After_%02u.csv", defragIndex);
1715 //SaveAllocatorStatsToFile(fileName);
1716 }
1717 }
1718
1719 // Destroy all remaining allocations.
1720 DestroyAllAllocations(allocations);
1721 }
1722
TestDefragmentationGpu()1723 static void TestDefragmentationGpu()
1724 {
1725 wprintf(L"Test defragmentation GPU\n");
1726
1727 std::vector<AllocInfo> allocations;
1728
1729 // Create that many allocations to surely fill 3 new blocks of 256 MB.
1730 const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
1731 const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
1732 const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
1733 const size_t bufCount = (size_t)(totalSize / bufSizeMin);
1734 const size_t percentToLeave = 30;
1735 const size_t percentNonMovable = 3;
1736 RandomNumberGenerator rand = { 234522 };
1737
1738 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1739
1740 VmaAllocationCreateInfo allocCreateInfo = {};
1741 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
1742 allocCreateInfo.flags = 0;
1743
1744 // Create all intended buffers.
1745 for(size_t i = 0; i < bufCount; ++i)
1746 {
1747 bufCreateInfo.size = align_up(rand.Generate() % (bufSizeMax - bufSizeMin) + bufSizeMin, 32ull);
1748
1749 if(rand.Generate() % 100 < percentNonMovable)
1750 {
1751 bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
1752 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1753 VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1754 allocCreateInfo.pUserData = (void*)(uintptr_t)2;
1755 }
1756 else
1757 {
1758 // Different usage just to see different color in output from VmaDumpVis.
1759 bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT |
1760 VK_BUFFER_USAGE_TRANSFER_DST_BIT |
1761 VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
1762 // And in JSON dump.
1763 allocCreateInfo.pUserData = (void*)(uintptr_t)1;
1764 }
1765
1766 AllocInfo alloc;
1767 alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
1768 alloc.m_StartValue = rand.Generate();
1769 allocations.push_back(alloc);
1770 }
1771
1772 // Destroy some percentage of them.
1773 {
1774 const size_t buffersToDestroy = round_div<size_t>(bufCount * (100 - percentToLeave), 100);
1775 for(size_t i = 0; i < buffersToDestroy; ++i)
1776 {
1777 const size_t index = rand.Generate() % allocations.size();
1778 allocations[index].Destroy();
1779 allocations.erase(allocations.begin() + index);
1780 }
1781 }
1782
1783 // Fill them with meaningful data.
1784 UploadGpuData(allocations.data(), allocations.size());
1785
1786 wchar_t fileName[MAX_PATH];
1787 swprintf_s(fileName, L"GPU_defragmentation_A_before.json");
1788 SaveAllocatorStatsToFile(fileName);
1789
1790 // Defragment using GPU only.
1791 {
1792 const size_t allocCount = allocations.size();
1793
1794 std::vector<VmaAllocation> allocationPtrs;
1795 std::vector<VkBool32> allocationChanged;
1796 std::vector<size_t> allocationOriginalIndex;
1797
1798 for(size_t i = 0; i < allocCount; ++i)
1799 {
1800 VmaAllocationInfo allocInfo = {};
1801 vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo);
1802 if((uintptr_t)allocInfo.pUserData == 1) // Movable
1803 {
1804 allocationPtrs.push_back(allocations[i].m_Allocation);
1805 allocationChanged.push_back(VK_FALSE);
1806 allocationOriginalIndex.push_back(i);
1807 }
1808 }
1809
1810 const size_t movableAllocCount = allocationPtrs.size();
1811
1812 BeginSingleTimeCommands();
1813
1814 VmaDefragmentationInfo2 defragInfo = {};
1815 defragInfo.flags = 0;
1816 defragInfo.allocationCount = (uint32_t)movableAllocCount;
1817 defragInfo.pAllocations = allocationPtrs.data();
1818 defragInfo.pAllocationsChanged = allocationChanged.data();
1819 defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
1820 defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
1821 defragInfo.commandBuffer = g_hTemporaryCommandBuffer;
1822
1823 VmaDefragmentationStats stats = {};
1824 VmaDefragmentationContext ctx = VK_NULL_HANDLE;
1825 VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
1826 TEST(res >= VK_SUCCESS);
1827
1828 EndSingleTimeCommands();
1829
1830 vmaDefragmentationEnd(g_hAllocator, ctx);
1831
1832 for(size_t i = 0; i < movableAllocCount; ++i)
1833 {
1834 if(allocationChanged[i])
1835 {
1836 const size_t origAllocIndex = allocationOriginalIndex[i];
1837 RecreateAllocationResource(allocations[origAllocIndex]);
1838 }
1839 }
1840
1841 // If corruption detection is enabled, GPU defragmentation may not work on
1842 // memory types that have this detection active, e.g. on Intel.
1843 #if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
1844 TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
1845 TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
1846 #endif
1847 }
1848
1849 ValidateGpuData(allocations.data(), allocations.size());
1850
1851 swprintf_s(fileName, L"GPU_defragmentation_B_after.json");
1852 SaveAllocatorStatsToFile(fileName);
1853
1854 // Destroy all remaining buffers.
1855 for(size_t i = allocations.size(); i--; )
1856 {
1857 allocations[i].Destroy();
1858 }
1859 }
1860
ProcessDefragmentationStepInfo(VmaDefragmentationPassInfo & stepInfo)1861 static void ProcessDefragmentationStepInfo(VmaDefragmentationPassInfo &stepInfo)
1862 {
1863 std::vector<VkImageMemoryBarrier> beginImageBarriers;
1864 std::vector<VkImageMemoryBarrier> finalizeImageBarriers;
1865
1866 VkPipelineStageFlags beginSrcStageMask = 0;
1867 VkPipelineStageFlags beginDstStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
1868
1869 VkPipelineStageFlags finalizeSrcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
1870 VkPipelineStageFlags finalizeDstStageMask = 0;
1871
1872 bool wantsMemoryBarrier = false;
1873
1874 VkMemoryBarrier beginMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
1875 VkMemoryBarrier finalizeMemoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER };
1876
1877 for(uint32_t i = 0; i < stepInfo.moveCount; ++i)
1878 {
1879 VmaAllocationInfo info;
1880 vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info);
1881
1882 AllocInfo *allocInfo = (AllocInfo *)info.pUserData;
1883
1884 if(allocInfo->m_Image)
1885 {
1886 VkImage newImage;
1887
1888 const VkResult result = vkCreateImage(g_hDevice, &allocInfo->m_ImageInfo, g_Allocs, &newImage);
1889 TEST(result >= VK_SUCCESS);
1890
1891 vkBindImageMemory(g_hDevice, newImage, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset);
1892 allocInfo->m_NewImage = newImage;
1893
1894 // Keep track of our pipeline stages that we need to wait/signal on
1895 beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1896 finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1897
1898 // We need one pipeline barrier and two image layout transitions here
1899 // First we'll have to turn our newly created image into VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
1900 // And the second one is turning the old image into VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
1901
1902 VkImageSubresourceRange subresourceRange = {
1903 VK_IMAGE_ASPECT_COLOR_BIT,
1904 0, VK_REMAINING_MIP_LEVELS,
1905 0, VK_REMAINING_ARRAY_LAYERS
1906 };
1907
1908 VkImageMemoryBarrier barrier = { VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER };
1909 barrier.srcAccessMask = 0;
1910 barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
1911 barrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
1912 barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1913 barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1914 barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
1915 barrier.image = newImage;
1916 barrier.subresourceRange = subresourceRange;
1917
1918 beginImageBarriers.push_back(barrier);
1919
1920 // Second barrier to convert the existing image. This one actually needs a real barrier
1921 barrier.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT;
1922 barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
1923 barrier.oldLayout = allocInfo->m_ImageLayout;
1924 barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1925 barrier.image = allocInfo->m_Image;
1926
1927 beginImageBarriers.push_back(barrier);
1928
1929 // And lastly we need a barrier that turns our new image into the layout of the old one
1930 barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1931 barrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT;
1932 barrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
1933 barrier.newLayout = allocInfo->m_ImageLayout;
1934 barrier.image = newImage;
1935
1936 finalizeImageBarriers.push_back(barrier);
1937 }
1938 else if(allocInfo->m_Buffer)
1939 {
1940 VkBuffer newBuffer;
1941
1942 const VkResult result = vkCreateBuffer(g_hDevice, &allocInfo->m_BufferInfo, g_Allocs, &newBuffer);
1943 TEST(result >= VK_SUCCESS);
1944
1945 vkBindBufferMemory(g_hDevice, newBuffer, stepInfo.pMoves[i].memory, stepInfo.pMoves[i].offset);
1946 allocInfo->m_NewBuffer = newBuffer;
1947
1948 // Keep track of our pipeline stages that we need to wait/signal on
1949 beginSrcStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1950 finalizeDstStageMask |= VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
1951
1952 beginMemoryBarrier.srcAccessMask |= VK_ACCESS_MEMORY_WRITE_BIT;
1953 beginMemoryBarrier.dstAccessMask |= VK_ACCESS_TRANSFER_READ_BIT;
1954
1955 finalizeMemoryBarrier.srcAccessMask |= VK_ACCESS_TRANSFER_WRITE_BIT;
1956 finalizeMemoryBarrier.dstAccessMask |= VK_ACCESS_MEMORY_READ_BIT;
1957
1958 wantsMemoryBarrier = true;
1959 }
1960 }
1961
1962 if(!beginImageBarriers.empty() || wantsMemoryBarrier)
1963 {
1964 const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0;
1965
1966 vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, beginSrcStageMask, beginDstStageMask, 0,
1967 memoryBarrierCount, &beginMemoryBarrier,
1968 0, nullptr,
1969 (uint32_t)beginImageBarriers.size(), beginImageBarriers.data());
1970 }
1971
1972 for(uint32_t i = 0; i < stepInfo.moveCount; ++ i)
1973 {
1974 VmaAllocationInfo info;
1975 vmaGetAllocationInfo(g_hAllocator, stepInfo.pMoves[i].allocation, &info);
1976
1977 AllocInfo *allocInfo = (AllocInfo *)info.pUserData;
1978
1979 if(allocInfo->m_Image)
1980 {
1981 std::vector<VkImageCopy> imageCopies;
1982
1983 // Copy all mips of the source image into the target image
1984 VkOffset3D offset = { 0, 0, 0 };
1985 VkExtent3D extent = allocInfo->m_ImageInfo.extent;
1986
1987 VkImageSubresourceLayers subresourceLayers = {
1988 VK_IMAGE_ASPECT_COLOR_BIT,
1989 0,
1990 0, 1
1991 };
1992
1993 for(uint32_t mip = 0; mip < allocInfo->m_ImageInfo.mipLevels; ++ mip)
1994 {
1995 subresourceLayers.mipLevel = mip;
1996
1997 VkImageCopy imageCopy{
1998 subresourceLayers,
1999 offset,
2000 subresourceLayers,
2001 offset,
2002 extent
2003 };
2004
2005 imageCopies.push_back(imageCopy);
2006
2007 extent.width = std::max(uint32_t(1), extent.width >> 1);
2008 extent.height = std::max(uint32_t(1), extent.height >> 1);
2009 extent.depth = std::max(uint32_t(1), extent.depth >> 1);
2010 }
2011
2012 vkCmdCopyImage(
2013 g_hTemporaryCommandBuffer,
2014 allocInfo->m_Image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
2015 allocInfo->m_NewImage, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
2016 (uint32_t)imageCopies.size(), imageCopies.data());
2017 }
2018 else if(allocInfo->m_Buffer)
2019 {
2020 VkBufferCopy region = {
2021 0,
2022 0,
2023 allocInfo->m_BufferInfo.size };
2024
2025 vkCmdCopyBuffer(g_hTemporaryCommandBuffer,
2026 allocInfo->m_Buffer, allocInfo->m_NewBuffer,
2027 1, ®ion);
2028 }
2029 }
2030
2031 if(!finalizeImageBarriers.empty() || wantsMemoryBarrier)
2032 {
2033 const uint32_t memoryBarrierCount = wantsMemoryBarrier ? 1 : 0;
2034
2035 vkCmdPipelineBarrier(g_hTemporaryCommandBuffer, finalizeSrcStageMask, finalizeDstStageMask, 0,
2036 memoryBarrierCount, &finalizeMemoryBarrier,
2037 0, nullptr,
2038 (uint32_t)finalizeImageBarriers.size(), finalizeImageBarriers.data());
2039 }
2040 }
2041
2042
TestDefragmentationIncrementalBasic()2043 static void TestDefragmentationIncrementalBasic()
2044 {
2045 wprintf(L"Test defragmentation incremental basic\n");
2046
2047 std::vector<AllocInfo> allocations;
2048
2049 // Create that many allocations to surely fill 3 new blocks of 256 MB.
2050 const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 };
2051 const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
2052 const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
2053 const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
2054 const size_t imageCount = totalSize / ((size_t)imageSizes[0] * imageSizes[0] * 4) / 2;
2055 const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2;
2056 const size_t percentToLeave = 30;
2057 RandomNumberGenerator rand = { 234522 };
2058
2059 VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
2060 imageInfo.imageType = VK_IMAGE_TYPE_2D;
2061 imageInfo.extent.depth = 1;
2062 imageInfo.mipLevels = 1;
2063 imageInfo.arrayLayers = 1;
2064 imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
2065 imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
2066 imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
2067 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
2068 imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
2069
2070 VmaAllocationCreateInfo allocCreateInfo = {};
2071 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2072 allocCreateInfo.flags = 0;
2073
2074 // Create all intended images.
2075 for(size_t i = 0; i < imageCount; ++i)
2076 {
2077 const uint32_t size = imageSizes[rand.Generate() % 3];
2078
2079 imageInfo.extent.width = size;
2080 imageInfo.extent.height = size;
2081
2082 AllocInfo alloc;
2083 alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
2084 alloc.m_StartValue = 0;
2085
2086 allocations.push_back(alloc);
2087 }
2088
2089 // And all buffers
2090 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2091
2092 for(size_t i = 0; i < bufCount; ++i)
2093 {
2094 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
2095 bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2096
2097 AllocInfo alloc;
2098 alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
2099 alloc.m_StartValue = 0;
2100
2101 allocations.push_back(alloc);
2102 }
2103
2104 // Destroy some percentage of them.
2105 {
2106 const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100);
2107 for(size_t i = 0; i < allocationsToDestroy; ++i)
2108 {
2109 const size_t index = rand.Generate() % allocations.size();
2110 allocations[index].Destroy();
2111 allocations.erase(allocations.begin() + index);
2112 }
2113 }
2114
2115 {
2116 // Set our user data pointers. A real application should probably be more clever here
2117 const size_t allocationCount = allocations.size();
2118 for(size_t i = 0; i < allocationCount; ++i)
2119 {
2120 AllocInfo &alloc = allocations[i];
2121 vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc);
2122 }
2123 }
2124
2125 // Fill them with meaningful data.
2126 UploadGpuData(allocations.data(), allocations.size());
2127
2128 wchar_t fileName[MAX_PATH];
2129 swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_A_before.json");
2130 SaveAllocatorStatsToFile(fileName);
2131
2132 // Defragment using GPU only.
2133 {
2134 const size_t allocCount = allocations.size();
2135
2136 std::vector<VmaAllocation> allocationPtrs;
2137
2138 for(size_t i = 0; i < allocCount; ++i)
2139 {
2140 allocationPtrs.push_back(allocations[i].m_Allocation);
2141 }
2142
2143 const size_t movableAllocCount = allocationPtrs.size();
2144
2145 VmaDefragmentationInfo2 defragInfo = {};
2146 defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL;
2147 defragInfo.allocationCount = (uint32_t)movableAllocCount;
2148 defragInfo.pAllocations = allocationPtrs.data();
2149 defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
2150 defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
2151
2152 VmaDefragmentationStats stats = {};
2153 VmaDefragmentationContext ctx = VK_NULL_HANDLE;
2154 VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
2155 TEST(res >= VK_SUCCESS);
2156
2157 res = VK_NOT_READY;
2158
2159 std::vector<VmaDefragmentationPassMoveInfo> moveInfo;
2160 moveInfo.resize(movableAllocCount);
2161
2162 while(res == VK_NOT_READY)
2163 {
2164 VmaDefragmentationPassInfo stepInfo = {};
2165 stepInfo.pMoves = moveInfo.data();
2166 stepInfo.moveCount = (uint32_t)moveInfo.size();
2167
2168 res = vmaBeginDefragmentationPass(g_hAllocator, ctx, &stepInfo);
2169 TEST(res >= VK_SUCCESS);
2170
2171 BeginSingleTimeCommands();
2172 std::vector<void*> newHandles;
2173 ProcessDefragmentationStepInfo(stepInfo);
2174 EndSingleTimeCommands();
2175
2176 res = vmaEndDefragmentationPass(g_hAllocator, ctx);
2177
2178 // Destroy old buffers/images and replace them with new handles.
2179 for(size_t i = 0; i < stepInfo.moveCount; ++i)
2180 {
2181 VmaAllocation const alloc = stepInfo.pMoves[i].allocation;
2182 VmaAllocationInfo vmaAllocInfo;
2183 vmaGetAllocationInfo(g_hAllocator, alloc, &vmaAllocInfo);
2184 AllocInfo* allocInfo = (AllocInfo*)vmaAllocInfo.pUserData;
2185 if(allocInfo->m_Buffer)
2186 {
2187 assert(allocInfo->m_NewBuffer && !allocInfo->m_Image && !allocInfo->m_NewImage);
2188 vkDestroyBuffer(g_hDevice, allocInfo->m_Buffer, g_Allocs);
2189 allocInfo->m_Buffer = allocInfo->m_NewBuffer;
2190 allocInfo->m_NewBuffer = VK_NULL_HANDLE;
2191 }
2192 else if(allocInfo->m_Image)
2193 {
2194 assert(allocInfo->m_NewImage && !allocInfo->m_Buffer && !allocInfo->m_NewBuffer);
2195 vkDestroyImage(g_hDevice, allocInfo->m_Image, g_Allocs);
2196 allocInfo->m_Image = allocInfo->m_NewImage;
2197 allocInfo->m_NewImage = VK_NULL_HANDLE;
2198 }
2199 else
2200 assert(0);
2201 }
2202 }
2203
2204 TEST(res >= VK_SUCCESS);
2205 vmaDefragmentationEnd(g_hAllocator, ctx);
2206
2207 // If corruption detection is enabled, GPU defragmentation may not work on
2208 // memory types that have this detection active, e.g. on Intel.
2209 #if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
2210 TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
2211 TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
2212 #endif
2213 }
2214
2215 //ValidateGpuData(allocations.data(), allocations.size());
2216
2217 swprintf_s(fileName, L"GPU_defragmentation_incremental_basic_B_after.json");
2218 SaveAllocatorStatsToFile(fileName);
2219
2220 // Destroy all remaining buffers and images.
2221 for(size_t i = allocations.size(); i--; )
2222 {
2223 allocations[i].Destroy();
2224 }
2225 }
2226
TestDefragmentationIncrementalComplex()2227 void TestDefragmentationIncrementalComplex()
2228 {
2229 wprintf(L"Test defragmentation incremental complex\n");
2230
2231 std::vector<AllocInfo> allocations;
2232
2233 // Create that many allocations to surely fill 3 new blocks of 256 MB.
2234 const std::array<uint32_t, 3> imageSizes = { 256, 512, 1024 };
2235 const VkDeviceSize bufSizeMin = 5ull * 1024 * 1024;
2236 const VkDeviceSize bufSizeMax = 10ull * 1024 * 1024;
2237 const VkDeviceSize totalSize = 3ull * 256 * 1024 * 1024;
2238 const size_t imageCount = (size_t)(totalSize / (imageSizes[0] * imageSizes[0] * 4)) / 2;
2239 const size_t bufCount = (size_t)(totalSize / bufSizeMin) / 2;
2240 const size_t percentToLeave = 30;
2241 RandomNumberGenerator rand = { 234522 };
2242
2243 VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
2244 imageInfo.imageType = VK_IMAGE_TYPE_2D;
2245 imageInfo.extent.depth = 1;
2246 imageInfo.mipLevels = 1;
2247 imageInfo.arrayLayers = 1;
2248 imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
2249 imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
2250 imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
2251 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
2252 imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
2253
2254 VmaAllocationCreateInfo allocCreateInfo = {};
2255 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2256 allocCreateInfo.flags = 0;
2257
2258 // Create all intended images.
2259 for(size_t i = 0; i < imageCount; ++i)
2260 {
2261 const uint32_t size = imageSizes[rand.Generate() % 3];
2262
2263 imageInfo.extent.width = size;
2264 imageInfo.extent.height = size;
2265
2266 AllocInfo alloc;
2267 alloc.CreateImage(imageInfo, allocCreateInfo, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
2268 alloc.m_StartValue = 0;
2269
2270 allocations.push_back(alloc);
2271 }
2272
2273 // And all buffers
2274 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2275
2276 for(size_t i = 0; i < bufCount; ++i)
2277 {
2278 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
2279 bufCreateInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2280
2281 AllocInfo alloc;
2282 alloc.CreateBuffer(bufCreateInfo, allocCreateInfo);
2283 alloc.m_StartValue = 0;
2284
2285 allocations.push_back(alloc);
2286 }
2287
2288 // Destroy some percentage of them.
2289 {
2290 const size_t allocationsToDestroy = round_div<size_t>((imageCount + bufCount) * (100 - percentToLeave), 100);
2291 for(size_t i = 0; i < allocationsToDestroy; ++i)
2292 {
2293 const size_t index = rand.Generate() % allocations.size();
2294 allocations[index].Destroy();
2295 allocations.erase(allocations.begin() + index);
2296 }
2297 }
2298
2299 {
2300 // Set our user data pointers. A real application should probably be more clever here
2301 const size_t allocationCount = allocations.size();
2302 for(size_t i = 0; i < allocationCount; ++i)
2303 {
2304 AllocInfo &alloc = allocations[i];
2305 vmaSetAllocationUserData(g_hAllocator, alloc.m_Allocation, &alloc);
2306 }
2307 }
2308
2309 // Fill them with meaningful data.
2310 UploadGpuData(allocations.data(), allocations.size());
2311
2312 wchar_t fileName[MAX_PATH];
2313 swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_A_before.json");
2314 SaveAllocatorStatsToFile(fileName);
2315
2316 std::vector<AllocInfo> additionalAllocations;
2317
2318 #define MakeAdditionalAllocation() \
2319 do { \
2320 { \
2321 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16); \
2322 bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT; \
2323 \
2324 AllocInfo alloc; \
2325 alloc.CreateBuffer(bufCreateInfo, allocCreateInfo); \
2326 \
2327 additionalAllocations.push_back(alloc); \
2328 } \
2329 } while(0)
2330
2331 // Defragment using GPU only.
2332 {
2333 const size_t allocCount = allocations.size();
2334
2335 std::vector<VmaAllocation> allocationPtrs;
2336
2337 for(size_t i = 0; i < allocCount; ++i)
2338 {
2339 VmaAllocationInfo allocInfo = {};
2340 vmaGetAllocationInfo(g_hAllocator, allocations[i].m_Allocation, &allocInfo);
2341
2342 allocationPtrs.push_back(allocations[i].m_Allocation);
2343 }
2344
2345 const size_t movableAllocCount = allocationPtrs.size();
2346
2347 VmaDefragmentationInfo2 defragInfo = {};
2348 defragInfo.flags = VMA_DEFRAGMENTATION_FLAG_INCREMENTAL;
2349 defragInfo.allocationCount = (uint32_t)movableAllocCount;
2350 defragInfo.pAllocations = allocationPtrs.data();
2351 defragInfo.maxGpuBytesToMove = VK_WHOLE_SIZE;
2352 defragInfo.maxGpuAllocationsToMove = UINT32_MAX;
2353
2354 VmaDefragmentationStats stats = {};
2355 VmaDefragmentationContext ctx = VK_NULL_HANDLE;
2356 VkResult res = vmaDefragmentationBegin(g_hAllocator, &defragInfo, &stats, &ctx);
2357 TEST(res >= VK_SUCCESS);
2358
2359 res = VK_NOT_READY;
2360
2361 std::vector<VmaDefragmentationPassMoveInfo> moveInfo;
2362 moveInfo.resize(movableAllocCount);
2363
2364 MakeAdditionalAllocation();
2365
2366 while(res == VK_NOT_READY)
2367 {
2368 VmaDefragmentationPassInfo stepInfo = {};
2369 stepInfo.pMoves = moveInfo.data();
2370 stepInfo.moveCount = (uint32_t)moveInfo.size();
2371
2372 res = vmaBeginDefragmentationPass(g_hAllocator, ctx, &stepInfo);
2373 TEST(res >= VK_SUCCESS);
2374
2375 MakeAdditionalAllocation();
2376
2377 BeginSingleTimeCommands();
2378 ProcessDefragmentationStepInfo(stepInfo);
2379 EndSingleTimeCommands();
2380
2381 res = vmaEndDefragmentationPass(g_hAllocator, ctx);
2382
2383 // Destroy old buffers/images and replace them with new handles.
2384 for(size_t i = 0; i < stepInfo.moveCount; ++i)
2385 {
2386 VmaAllocation const alloc = stepInfo.pMoves[i].allocation;
2387 VmaAllocationInfo vmaAllocInfo;
2388 vmaGetAllocationInfo(g_hAllocator, alloc, &vmaAllocInfo);
2389 AllocInfo* allocInfo = (AllocInfo*)vmaAllocInfo.pUserData;
2390 if(allocInfo->m_Buffer)
2391 {
2392 assert(allocInfo->m_NewBuffer && !allocInfo->m_Image && !allocInfo->m_NewImage);
2393 vkDestroyBuffer(g_hDevice, allocInfo->m_Buffer, g_Allocs);
2394 allocInfo->m_Buffer = allocInfo->m_NewBuffer;
2395 allocInfo->m_NewBuffer = VK_NULL_HANDLE;
2396 }
2397 else if(allocInfo->m_Image)
2398 {
2399 assert(allocInfo->m_NewImage && !allocInfo->m_Buffer && !allocInfo->m_NewBuffer);
2400 vkDestroyImage(g_hDevice, allocInfo->m_Image, g_Allocs);
2401 allocInfo->m_Image = allocInfo->m_NewImage;
2402 allocInfo->m_NewImage = VK_NULL_HANDLE;
2403 }
2404 else
2405 assert(0);
2406 }
2407
2408 MakeAdditionalAllocation();
2409 }
2410
2411 TEST(res >= VK_SUCCESS);
2412 vmaDefragmentationEnd(g_hAllocator, ctx);
2413
2414 // If corruption detection is enabled, GPU defragmentation may not work on
2415 // memory types that have this detection active, e.g. on Intel.
2416 #if !defined(VMA_DEBUG_DETECT_CORRUPTION) || VMA_DEBUG_DETECT_CORRUPTION == 0
2417 TEST(stats.allocationsMoved > 0 && stats.bytesMoved > 0);
2418 TEST(stats.deviceMemoryBlocksFreed > 0 && stats.bytesFreed > 0);
2419 #endif
2420 }
2421
2422 //ValidateGpuData(allocations.data(), allocations.size());
2423
2424 swprintf_s(fileName, L"GPU_defragmentation_incremental_complex_B_after.json");
2425 SaveAllocatorStatsToFile(fileName);
2426
2427 // Destroy all remaining buffers.
2428 for(size_t i = allocations.size(); i--; )
2429 {
2430 allocations[i].Destroy();
2431 }
2432
2433 for(size_t i = additionalAllocations.size(); i--; )
2434 {
2435 additionalAllocations[i].Destroy();
2436 }
2437 }
2438
2439
TestUserData()2440 static void TestUserData()
2441 {
2442 VkResult res;
2443
2444 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2445 bufCreateInfo.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
2446 bufCreateInfo.size = 0x10000;
2447
2448 for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
2449 {
2450 // Opaque pointer
2451 {
2452
2453 void* numberAsPointer = (void*)(size_t)0xC2501FF3u;
2454 void* pointerToSomething = &res;
2455
2456 VmaAllocationCreateInfo allocCreateInfo = {};
2457 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2458 allocCreateInfo.pUserData = numberAsPointer;
2459 if(testIndex == 1)
2460 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2461
2462 VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo;
2463 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2464 TEST(res == VK_SUCCESS);
2465 TEST(allocInfo.pUserData = numberAsPointer);
2466
2467 vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2468 TEST(allocInfo.pUserData == numberAsPointer);
2469
2470 vmaSetAllocationUserData(g_hAllocator, alloc, pointerToSomething);
2471 vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2472 TEST(allocInfo.pUserData == pointerToSomething);
2473
2474 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2475 }
2476
2477 // String
2478 {
2479 const char* name1 = "Buffer name \\\"\'<>&% \nSecond line .,;=";
2480 const char* name2 = "2";
2481 const size_t name1Len = strlen(name1);
2482
2483 char* name1Buf = new char[name1Len + 1];
2484 strcpy_s(name1Buf, name1Len + 1, name1);
2485
2486 VmaAllocationCreateInfo allocCreateInfo = {};
2487 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2488 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_USER_DATA_COPY_STRING_BIT;
2489 allocCreateInfo.pUserData = name1Buf;
2490 if(testIndex == 1)
2491 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2492
2493 VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo;
2494 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2495 TEST(res == VK_SUCCESS);
2496 TEST(allocInfo.pUserData != nullptr && allocInfo.pUserData != name1Buf);
2497 TEST(strcmp(name1, (const char*)allocInfo.pUserData) == 0);
2498
2499 delete[] name1Buf;
2500
2501 vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2502 TEST(strcmp(name1, (const char*)allocInfo.pUserData) == 0);
2503
2504 vmaSetAllocationUserData(g_hAllocator, alloc, (void*)name2);
2505 vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2506 TEST(strcmp(name2, (const char*)allocInfo.pUserData) == 0);
2507
2508 vmaSetAllocationUserData(g_hAllocator, alloc, nullptr);
2509 vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2510 TEST(allocInfo.pUserData == nullptr);
2511
2512 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2513 }
2514 }
2515 }
2516
TestInvalidAllocations()2517 static void TestInvalidAllocations()
2518 {
2519 VkResult res;
2520
2521 VmaAllocationCreateInfo allocCreateInfo = {};
2522 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2523
2524 // Try to allocate 0 bytes.
2525 {
2526 VkMemoryRequirements memReq = {};
2527 memReq.size = 0; // !!!
2528 memReq.alignment = 4;
2529 memReq.memoryTypeBits = UINT32_MAX;
2530 VmaAllocation alloc = VK_NULL_HANDLE;
2531 res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
2532 TEST(res == VK_ERROR_INITIALIZATION_FAILED && alloc == VK_NULL_HANDLE);
2533 }
2534
2535 // Try to create buffer with size = 0.
2536 {
2537 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2538 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2539 bufCreateInfo.size = 0; // !!!
2540 VkBuffer buf = VK_NULL_HANDLE;
2541 VmaAllocation alloc = VK_NULL_HANDLE;
2542 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, nullptr);
2543 TEST(res == VK_ERROR_INITIALIZATION_FAILED && buf == VK_NULL_HANDLE && alloc == VK_NULL_HANDLE);
2544 }
2545
2546 // Try to create image with one dimension = 0.
2547 {
2548 VkImageCreateInfo imageCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2549 imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
2550 imageCreateInfo.format = VK_FORMAT_B8G8R8A8_UNORM;
2551 imageCreateInfo.extent.width = 128;
2552 imageCreateInfo.extent.height = 0; // !!!
2553 imageCreateInfo.extent.depth = 1;
2554 imageCreateInfo.mipLevels = 1;
2555 imageCreateInfo.arrayLayers = 1;
2556 imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
2557 imageCreateInfo.tiling = VK_IMAGE_TILING_LINEAR;
2558 imageCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
2559 imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
2560 VkImage image = VK_NULL_HANDLE;
2561 VmaAllocation alloc = VK_NULL_HANDLE;
2562 res = vmaCreateImage(g_hAllocator, &imageCreateInfo, &allocCreateInfo, &image, &alloc, nullptr);
2563 TEST(res == VK_ERROR_INITIALIZATION_FAILED && image == VK_NULL_HANDLE && alloc == VK_NULL_HANDLE);
2564 }
2565 }
2566
TestMemoryRequirements()2567 static void TestMemoryRequirements()
2568 {
2569 VkResult res;
2570 VkBuffer buf;
2571 VmaAllocation alloc;
2572 VmaAllocationInfo allocInfo;
2573
2574 const VkPhysicalDeviceMemoryProperties* memProps;
2575 vmaGetMemoryProperties(g_hAllocator, &memProps);
2576
2577 VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2578 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
2579 bufInfo.size = 128;
2580
2581 VmaAllocationCreateInfo allocCreateInfo = {};
2582
2583 // No requirements.
2584 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2585 TEST(res == VK_SUCCESS);
2586 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2587
2588 // Usage.
2589 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2590 allocCreateInfo.requiredFlags = 0;
2591 allocCreateInfo.preferredFlags = 0;
2592 allocCreateInfo.memoryTypeBits = UINT32_MAX;
2593
2594 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2595 TEST(res == VK_SUCCESS);
2596 TEST(memProps->memoryTypes[allocInfo.memoryType].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
2597 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2598
2599 // Required flags, preferred flags.
2600 allocCreateInfo.usage = VMA_MEMORY_USAGE_UNKNOWN;
2601 allocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
2602 allocCreateInfo.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
2603 allocCreateInfo.memoryTypeBits = 0;
2604
2605 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2606 TEST(res == VK_SUCCESS);
2607 TEST(memProps->memoryTypes[allocInfo.memoryType].propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
2608 TEST(memProps->memoryTypes[allocInfo.memoryType].propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
2609 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2610
2611 // memoryTypeBits.
2612 const uint32_t memType = allocInfo.memoryType;
2613 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2614 allocCreateInfo.requiredFlags = 0;
2615 allocCreateInfo.preferredFlags = 0;
2616 allocCreateInfo.memoryTypeBits = 1u << memType;
2617
2618 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2619 TEST(res == VK_SUCCESS);
2620 TEST(allocInfo.memoryType == memType);
2621 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2622
2623 }
2624
TestGetAllocatorInfo()2625 static void TestGetAllocatorInfo()
2626 {
2627 wprintf(L"Test vnaGetAllocatorInfo\n");
2628
2629 VmaAllocatorInfo allocInfo = {};
2630 vmaGetAllocatorInfo(g_hAllocator, &allocInfo);
2631 TEST(allocInfo.instance == g_hVulkanInstance);
2632 TEST(allocInfo.physicalDevice == g_hPhysicalDevice);
2633 TEST(allocInfo.device == g_hDevice);
2634 }
2635
TestBasics()2636 static void TestBasics()
2637 {
2638 wprintf(L"Test basics\n");
2639
2640 VkResult res;
2641
2642 TestGetAllocatorInfo();
2643
2644 TestMemoryRequirements();
2645
2646 // Lost allocation
2647 {
2648 VmaAllocation alloc = VK_NULL_HANDLE;
2649 vmaCreateLostAllocation(g_hAllocator, &alloc);
2650 TEST(alloc != VK_NULL_HANDLE);
2651
2652 VmaAllocationInfo allocInfo;
2653 vmaGetAllocationInfo(g_hAllocator, alloc, &allocInfo);
2654 TEST(allocInfo.deviceMemory == VK_NULL_HANDLE);
2655 TEST(allocInfo.size == 0);
2656
2657 vmaFreeMemory(g_hAllocator, alloc);
2658 }
2659
2660 // Allocation that is MAPPED and not necessarily HOST_VISIBLE.
2661 {
2662 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2663 bufCreateInfo.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
2664 bufCreateInfo.size = 128;
2665
2666 VmaAllocationCreateInfo allocCreateInfo = {};
2667 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
2668 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
2669
2670 VkBuffer buf; VmaAllocation alloc; VmaAllocationInfo allocInfo;
2671 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2672 TEST(res == VK_SUCCESS);
2673
2674 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2675
2676 // Same with OWN_MEMORY.
2677 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
2678
2679 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
2680 TEST(res == VK_SUCCESS);
2681
2682 vmaDestroyBuffer(g_hAllocator, buf, alloc);
2683 }
2684
2685 TestUserData();
2686
2687 TestInvalidAllocations();
2688 }
2689
TestVirtualBlocks()2690 static void TestVirtualBlocks()
2691 {
2692 wprintf(L"Test virtual blocks\n");
2693
2694 const VkDeviceSize blockSize = 16 * MEGABYTE;
2695 const VkDeviceSize alignment = 256;
2696
2697 // # Create block 16 MB
2698
2699 VmaVirtualBlockCreateInfo blockCreateInfo = {};
2700 blockCreateInfo.pAllocationCallbacks = g_Allocs;
2701 blockCreateInfo.size = blockSize;
2702 VmaVirtualBlock block;
2703 TEST(vmaCreateVirtualBlock(&blockCreateInfo, &block) == VK_SUCCESS && block);
2704
2705 // # Allocate 8 MB
2706
2707 VmaVirtualAllocationCreateInfo allocCreateInfo = {};
2708 allocCreateInfo.alignment = alignment;
2709 allocCreateInfo.pUserData = (void*)(uintptr_t)1;
2710 allocCreateInfo.size = 8 * MEGABYTE;
2711 VkDeviceSize alloc0Offset;
2712 TEST(vmaVirtualAllocate(block, &allocCreateInfo, &alloc0Offset) == VK_SUCCESS);
2713 TEST(alloc0Offset < blockSize);
2714
2715 // # Validate the allocation
2716
2717 VmaVirtualAllocationInfo allocInfo = {};
2718 vmaGetVirtualAllocationInfo(block, alloc0Offset, &allocInfo);
2719 TEST(allocInfo.size == allocCreateInfo.size);
2720 TEST(allocInfo.pUserData = allocCreateInfo.pUserData);
2721
2722 // # Check SetUserData
2723
2724 vmaSetVirtualAllocationUserData(block, alloc0Offset, (void*)(uintptr_t)2);
2725 vmaGetVirtualAllocationInfo(block, alloc0Offset, &allocInfo);
2726 TEST(allocInfo.pUserData = (void*)(uintptr_t)2);
2727
2728 // # Allocate 4 MB
2729
2730 allocCreateInfo.size = 4 * MEGABYTE;
2731 UINT64 alloc1Offset;
2732 TEST(vmaVirtualAllocate(block, &allocCreateInfo, &alloc1Offset) == VK_SUCCESS);
2733 TEST(alloc1Offset < blockSize);
2734 TEST(alloc1Offset + 4 * MEGABYTE <= alloc0Offset || alloc0Offset + 8 * MEGABYTE <= alloc1Offset); // Check if they don't overlap.
2735
2736 // # Allocate another 8 MB - it should fail
2737
2738 allocCreateInfo.size = 8 * MEGABYTE;
2739 UINT64 alloc2Offset;
2740 TEST(vmaVirtualAllocate(block, &allocCreateInfo, &alloc2Offset) < 0);
2741 TEST(alloc2Offset == VK_WHOLE_SIZE);
2742
2743 // # Free the 4 MB block. Now allocation of 8 MB should succeed.
2744
2745 vmaVirtualFree(block, alloc1Offset);
2746 TEST(vmaVirtualAllocate(block, &allocCreateInfo, &alloc2Offset) == VK_SUCCESS);
2747 TEST(alloc2Offset < blockSize);
2748 TEST(alloc2Offset + 4 * MEGABYTE <= alloc0Offset || alloc0Offset + 8 * MEGABYTE <= alloc2Offset); // Check if they don't overlap.
2749
2750 // # Calculate statistics
2751
2752 VmaStatInfo statInfo = {};
2753 vmaCalculateVirtualBlockStats(block, &statInfo);
2754 TEST(statInfo.allocationCount == 2);
2755 TEST(statInfo.blockCount == 1);
2756 TEST(statInfo.usedBytes == blockSize);
2757 TEST(statInfo.unusedBytes + statInfo.usedBytes == blockSize);
2758
2759 // # Generate JSON dump
2760
2761 char* json = nullptr;
2762 vmaBuildVirtualBlockStatsString(block, &json, VK_TRUE);
2763 {
2764 std::string str(json);
2765 TEST( str.find("\"UserData\": \"0000000000000001\"") != std::string::npos );
2766 TEST( str.find("\"UserData\": \"0000000000000002\"") != std::string::npos );
2767 }
2768 vmaFreeVirtualBlockStatsString(block, json);
2769
2770 // # Free alloc0, leave alloc2 unfreed.
2771
2772 vmaVirtualFree(block, alloc0Offset);
2773
2774 // # Test alignment
2775
2776 {
2777 constexpr size_t allocCount = 10;
2778 VkDeviceSize allocOffset[allocCount] = {};
2779 for(size_t i = 0; i < allocCount; ++i)
2780 {
2781 const bool alignment0 = i == allocCount - 1;
2782 allocCreateInfo.size = i * 3 + 15;
2783 allocCreateInfo.alignment = alignment0 ? 0 : 8;
2784 TEST(vmaVirtualAllocate(block, &allocCreateInfo, &allocOffset[i]) == VK_SUCCESS);
2785 if(!alignment0)
2786 {
2787 TEST(allocOffset[i] % allocCreateInfo.alignment == 0);
2788 }
2789 }
2790
2791 for(size_t i = allocCount; i--; )
2792 {
2793 vmaVirtualFree(block, allocOffset[i]);
2794 }
2795 }
2796
2797 // # Final cleanup
2798
2799 vmaVirtualFree(block, alloc2Offset);
2800 vmaDestroyVirtualBlock(block);
2801
2802 {
2803 // Another virtual block, using Clear this time.
2804 TEST(vmaCreateVirtualBlock(&blockCreateInfo, &block) == VK_SUCCESS);
2805
2806 allocCreateInfo = VmaVirtualAllocationCreateInfo{};
2807 allocCreateInfo.size = MEGABYTE;
2808
2809 for(size_t i = 0; i < 8; ++i)
2810 {
2811 VkDeviceSize offset = 0;
2812 TEST(vmaVirtualAllocate(block, &allocCreateInfo, &offset) == VK_SUCCESS);
2813 }
2814
2815 vmaClearVirtualBlock(block);
2816 vmaDestroyVirtualBlock(block);
2817 }
2818 }
2819
TestVirtualBlocksAlgorithms()2820 static void TestVirtualBlocksAlgorithms()
2821 {
2822 wprintf(L"Test virtual blocks algorithms\n");
2823
2824 RandomNumberGenerator rand{3454335};
2825 auto calcRandomAllocSize = [&rand]() -> VkDeviceSize { return rand.Generate() % 20 + 5; };
2826
2827 for(size_t algorithmIndex = 0; algorithmIndex < 3; ++algorithmIndex)
2828 {
2829 // Create the block
2830 VmaVirtualBlockCreateInfo blockCreateInfo = {};
2831 blockCreateInfo.pAllocationCallbacks = g_Allocs;
2832 blockCreateInfo.size = 10'000;
2833 switch(algorithmIndex)
2834 {
2835 case 1: blockCreateInfo.flags = VMA_VIRTUAL_BLOCK_CREATE_LINEAR_ALGORITHM_BIT; break;
2836 case 2: blockCreateInfo.flags = VMA_VIRTUAL_BLOCK_CREATE_BUDDY_ALGORITHM_BIT; break;
2837 }
2838 VmaVirtualBlock block = nullptr;
2839 VkResult res = vmaCreateVirtualBlock(&blockCreateInfo, &block);
2840 TEST(res == VK_SUCCESS);
2841
2842 struct AllocData
2843 {
2844 VkDeviceSize offset, requestedSize, allocationSize;
2845 };
2846 std::vector<AllocData> allocations;
2847
2848 // Make some allocations
2849 for(size_t i = 0; i < 20; ++i)
2850 {
2851 VmaVirtualAllocationCreateInfo allocCreateInfo = {};
2852 allocCreateInfo.size = calcRandomAllocSize();
2853 allocCreateInfo.pUserData = (void*)(uintptr_t)(allocCreateInfo.size * 10);
2854 if(i < 10) { }
2855 else if(i < 12) allocCreateInfo.flags = VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_MEMORY_BIT;
2856 else if(i < 14) allocCreateInfo.flags = VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_TIME_BIT;
2857 else if(i < 16) allocCreateInfo.flags = VMA_VIRTUAL_ALLOCATION_CREATE_STRATEGY_MIN_FRAGMENTATION_BIT;
2858 else if(i < 18 && algorithmIndex == 1) allocCreateInfo.flags = VMA_VIRTUAL_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
2859
2860 AllocData alloc = {};
2861 alloc.requestedSize = allocCreateInfo.size;
2862 res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc.offset);
2863 TEST(res == VK_SUCCESS);
2864
2865 VmaVirtualAllocationInfo allocInfo;
2866 vmaGetVirtualAllocationInfo(block, alloc.offset, &allocInfo);
2867 TEST(allocInfo.size >= allocCreateInfo.size);
2868 alloc.allocationSize = allocInfo.size;
2869
2870 allocations.push_back(alloc);
2871 }
2872
2873 // Free some of the allocations
2874 for(size_t i = 0; i < 5; ++i)
2875 {
2876 const size_t index = rand.Generate() % allocations.size();
2877 vmaVirtualFree(block, allocations[index].offset);
2878 allocations.erase(allocations.begin() + index);
2879 }
2880
2881 // Allocate some more
2882 for(size_t i = 0; i < 6; ++i)
2883 {
2884 VmaVirtualAllocationCreateInfo allocCreateInfo = {};
2885 allocCreateInfo.size = calcRandomAllocSize();
2886 allocCreateInfo.pUserData = (void*)(uintptr_t)(allocCreateInfo.size * 10);
2887
2888 AllocData alloc = {};
2889 alloc.requestedSize = allocCreateInfo.size;
2890 res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc.offset);
2891 TEST(res == VK_SUCCESS);
2892
2893 VmaVirtualAllocationInfo allocInfo;
2894 vmaGetVirtualAllocationInfo(block, alloc.offset, &allocInfo);
2895 TEST(allocInfo.size >= allocCreateInfo.size);
2896 alloc.allocationSize = allocInfo.size;
2897
2898 allocations.push_back(alloc);
2899 }
2900
2901 // Allocate some with extra alignment
2902 for(size_t i = 0; i < 3; ++i)
2903 {
2904 VmaVirtualAllocationCreateInfo allocCreateInfo = {};
2905 allocCreateInfo.size = calcRandomAllocSize();
2906 allocCreateInfo.alignment = 16;
2907 allocCreateInfo.pUserData = (void*)(uintptr_t)(allocCreateInfo.size * 10);
2908
2909 AllocData alloc = {};
2910 alloc.requestedSize = allocCreateInfo.size;
2911 res = vmaVirtualAllocate(block, &allocCreateInfo, &alloc.offset);
2912 TEST(res == VK_SUCCESS);
2913 TEST(alloc.offset % 16 == 0);
2914
2915 VmaVirtualAllocationInfo allocInfo;
2916 vmaGetVirtualAllocationInfo(block, alloc.offset, &allocInfo);
2917 TEST(allocInfo.size >= allocCreateInfo.size);
2918 alloc.allocationSize = allocInfo.size;
2919
2920 allocations.push_back(alloc);
2921 }
2922
2923 // Check if the allocations don't overlap
2924 std::sort(allocations.begin(), allocations.end(), [](const AllocData& lhs, const AllocData& rhs) {
2925 return lhs.offset < rhs.offset; });
2926 for(size_t i = 0; i < allocations.size() - 1; ++i)
2927 {
2928 TEST(allocations[i+1].offset >= allocations[i].offset + allocations[i].allocationSize);
2929 }
2930
2931 // Check pUserData
2932 {
2933 const AllocData& alloc = allocations.back();
2934 VmaVirtualAllocationInfo allocInfo = {};
2935 vmaGetVirtualAllocationInfo(block, alloc.offset, &allocInfo);
2936 TEST((uintptr_t)allocInfo.pUserData == alloc.requestedSize * 10);
2937
2938 vmaSetVirtualAllocationUserData(block, alloc.offset, (void*)(uintptr_t)666);
2939 vmaGetVirtualAllocationInfo(block, alloc.offset, &allocInfo);
2940 TEST((uintptr_t)allocInfo.pUserData == 666);
2941 }
2942
2943 // Calculate statistics
2944 {
2945 VkDeviceSize actualAllocSizeMin = VK_WHOLE_SIZE, actualAllocSizeMax = 0, actualAllocSizeSum = 0;
2946 std::for_each(allocations.begin(), allocations.end(), [&](const AllocData& a) {
2947 actualAllocSizeMin = std::min(actualAllocSizeMin, a.allocationSize);
2948 actualAllocSizeMax = std::max(actualAllocSizeMax, a.allocationSize);
2949 actualAllocSizeSum += a.allocationSize;
2950 });
2951
2952 VmaStatInfo statInfo = {};
2953 vmaCalculateVirtualBlockStats(block, &statInfo);
2954 TEST(statInfo.allocationCount == allocations.size());
2955 TEST(statInfo.blockCount == 1);
2956 TEST(statInfo.usedBytes + statInfo.unusedBytes == blockCreateInfo.size);
2957 TEST(statInfo.allocationSizeMax == actualAllocSizeMax);
2958 TEST(statInfo.allocationSizeMin == actualAllocSizeMin);
2959 TEST(statInfo.usedBytes >= actualAllocSizeSum);
2960 }
2961
2962 // Build JSON dump string
2963 {
2964 char* json = nullptr;
2965 vmaBuildVirtualBlockStatsString(block, &json, VK_TRUE);
2966 int I = 0; // put a breakpoint here to debug
2967 vmaFreeVirtualBlockStatsString(block, json);
2968 }
2969
2970 // Final cleanup
2971 vmaClearVirtualBlock(block);
2972 vmaDestroyVirtualBlock(block);
2973 }
2974 }
2975
TestAllocationVersusResourceSize()2976 static void TestAllocationVersusResourceSize()
2977 {
2978 wprintf(L"Test allocation versus resource size\n");
2979
2980 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
2981 bufCreateInfo.size = 22921; // Prime number
2982 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
2983
2984 VmaAllocationCreateInfo allocCreateInfo = {};
2985 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
2986
2987 for(uint32_t i = 0; i < 2; ++i)
2988 {
2989 allocCreateInfo.flags = (i == 1) ? VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT : 0;
2990
2991 AllocInfo info;
2992 info.CreateBuffer(bufCreateInfo, allocCreateInfo);
2993
2994 VmaAllocationInfo allocInfo = {};
2995 vmaGetAllocationInfo(g_hAllocator, info.m_Allocation, &allocInfo);
2996 //wprintf(L" Buffer size = %llu, allocation size = %llu\n", bufCreateInfo.size, allocInfo.size);
2997
2998 // Map and test accessing entire area of the allocation, not only the buffer.
2999 void* mappedPtr = nullptr;
3000 VkResult res = vmaMapMemory(g_hAllocator, info.m_Allocation, &mappedPtr);
3001 TEST(res == VK_SUCCESS);
3002
3003 memset(mappedPtr, 0xCC, (size_t)allocInfo.size);
3004
3005 vmaUnmapMemory(g_hAllocator, info.m_Allocation);
3006
3007 info.Destroy();
3008 }
3009 }
3010
TestPool_MinBlockCount()3011 static void TestPool_MinBlockCount()
3012 {
3013 #if defined(VMA_DEBUG_MARGIN) && VMA_DEBUG_MARGIN > 0
3014 return;
3015 #endif
3016
3017 wprintf(L"Test Pool MinBlockCount\n");
3018 VkResult res;
3019
3020 static const VkDeviceSize ALLOC_SIZE = 512ull * 1024;
3021 static const VkDeviceSize BLOCK_SIZE = ALLOC_SIZE * 2; // Each block can fit 2 allocations.
3022
3023 VmaAllocationCreateInfo allocCreateInfo = {};
3024 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_COPY;
3025
3026 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3027 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
3028 bufCreateInfo.size = ALLOC_SIZE;
3029
3030 VmaPoolCreateInfo poolCreateInfo = {};
3031 poolCreateInfo.blockSize = BLOCK_SIZE;
3032 poolCreateInfo.minBlockCount = 2; // At least 2 blocks always present.
3033 res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3034 TEST(res == VK_SUCCESS);
3035
3036 VmaPool pool = VK_NULL_HANDLE;
3037 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3038 TEST(res == VK_SUCCESS && pool != VK_NULL_HANDLE);
3039
3040 // Check that there are 2 blocks preallocated as requested.
3041 VmaPoolStats begPoolStats = {};
3042 vmaGetPoolStats(g_hAllocator, pool, &begPoolStats);
3043 TEST(begPoolStats.blockCount == 2 && begPoolStats.allocationCount == 0 && begPoolStats.size == BLOCK_SIZE * 2);
3044
3045 // Allocate 5 buffers to create 3 blocks.
3046 static const uint32_t BUF_COUNT = 5;
3047 allocCreateInfo.pool = pool;
3048 std::vector<AllocInfo> allocs(BUF_COUNT);
3049 for(uint32_t i = 0; i < BUF_COUNT; ++i)
3050 {
3051 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &allocs[i].m_Buffer, &allocs[i].m_Allocation, nullptr);
3052 TEST(res == VK_SUCCESS && allocs[i].m_Buffer != VK_NULL_HANDLE && allocs[i].m_Allocation != VK_NULL_HANDLE);
3053 }
3054
3055 // Check that there are really 3 blocks.
3056 VmaPoolStats poolStats2 = {};
3057 vmaGetPoolStats(g_hAllocator, pool, &poolStats2);
3058 TEST(poolStats2.blockCount == 3 && poolStats2.allocationCount == BUF_COUNT && poolStats2.size == BLOCK_SIZE * 3);
3059
3060 // Free two first allocations to make one block empty.
3061 allocs[0].Destroy();
3062 allocs[1].Destroy();
3063
3064 // Check that there are still 3 blocks due to hysteresis.
3065 VmaPoolStats poolStats3 = {};
3066 vmaGetPoolStats(g_hAllocator, pool, &poolStats3);
3067 TEST(poolStats3.blockCount == 3 && poolStats3.allocationCount == BUF_COUNT - 2 && poolStats2.size == BLOCK_SIZE * 3);
3068
3069 // Free the last allocation to make second block empty.
3070 allocs[BUF_COUNT - 1].Destroy();
3071
3072 // Check that there are now 2 blocks only.
3073 VmaPoolStats poolStats4 = {};
3074 vmaGetPoolStats(g_hAllocator, pool, &poolStats4);
3075 TEST(poolStats4.blockCount == 2 && poolStats4.allocationCount == BUF_COUNT - 3 && poolStats4.size == BLOCK_SIZE * 2);
3076
3077 // Cleanup.
3078 for(size_t i = allocs.size(); i--; )
3079 {
3080 allocs[i].Destroy();
3081 }
3082 vmaDestroyPool(g_hAllocator, pool);
3083 }
3084
TestPool_MinAllocationAlignment()3085 static void TestPool_MinAllocationAlignment()
3086 {
3087 wprintf(L"Test Pool MinAllocationAlignment\n");
3088 VkResult res;
3089
3090 static const VkDeviceSize ALLOC_SIZE = 32;
3091 static const VkDeviceSize BLOCK_SIZE = 1024 * 1024;
3092 static const VkDeviceSize MIN_ALLOCATION_ALIGNMENT = 64 * 1024;
3093
3094 VmaAllocationCreateInfo allocCreateInfo = {};
3095 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_COPY;
3096
3097 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3098 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
3099 bufCreateInfo.size = ALLOC_SIZE;
3100
3101 VmaPoolCreateInfo poolCreateInfo = {};
3102 poolCreateInfo.blockSize = BLOCK_SIZE;
3103 poolCreateInfo.minAllocationAlignment = MIN_ALLOCATION_ALIGNMENT;
3104 res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3105 TEST(res == VK_SUCCESS);
3106
3107 VmaPool pool = VK_NULL_HANDLE;
3108 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3109 TEST(res == VK_SUCCESS && pool != VK_NULL_HANDLE);
3110
3111 static const uint32_t BUF_COUNT = 4;
3112 allocCreateInfo = {};
3113 allocCreateInfo.pool = pool;
3114 std::vector<AllocInfo> allocs(BUF_COUNT);
3115 for(uint32_t i = 0; i < BUF_COUNT; ++i)
3116 {
3117 VmaAllocationInfo allocInfo = {};
3118 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &allocs[i].m_Buffer, &allocs[i].m_Allocation, &allocInfo);
3119 TEST(res == VK_SUCCESS && allocs[i].m_Buffer != VK_NULL_HANDLE && allocs[i].m_Allocation != VK_NULL_HANDLE);
3120 TEST(allocInfo.offset % MIN_ALLOCATION_ALIGNMENT == 0);
3121 }
3122
3123 // Cleanup.
3124 for(size_t i = allocs.size(); i--; )
3125 {
3126 allocs[i].Destroy();
3127 }
3128 vmaDestroyPool(g_hAllocator, pool);
3129 }
3130
TestHeapSizeLimit()3131 void TestHeapSizeLimit()
3132 {
3133 const VkDeviceSize HEAP_SIZE_LIMIT = 100ull * 1024 * 1024; // 100 MB
3134 const VkDeviceSize BLOCK_SIZE = 10ull * 1024 * 1024; // 10 MB
3135
3136 VkDeviceSize heapSizeLimit[VK_MAX_MEMORY_HEAPS];
3137 for(uint32_t i = 0; i < VK_MAX_MEMORY_HEAPS; ++i)
3138 {
3139 heapSizeLimit[i] = HEAP_SIZE_LIMIT;
3140 }
3141
3142 VmaAllocatorCreateInfo allocatorCreateInfo = {};
3143 allocatorCreateInfo.physicalDevice = g_hPhysicalDevice;
3144 allocatorCreateInfo.device = g_hDevice;
3145 allocatorCreateInfo.instance = g_hVulkanInstance;
3146 allocatorCreateInfo.pHeapSizeLimit = heapSizeLimit;
3147
3148 VmaAllocator hAllocator;
3149 VkResult res = vmaCreateAllocator(&allocatorCreateInfo, &hAllocator);
3150 TEST(res == VK_SUCCESS);
3151
3152 struct Item
3153 {
3154 VkBuffer hBuf;
3155 VmaAllocation hAlloc;
3156 };
3157 std::vector<Item> items;
3158
3159 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3160 bufCreateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
3161
3162 // 1. Allocate two blocks of dedicated memory, half the size of BLOCK_SIZE.
3163 VmaAllocationInfo dedicatedAllocInfo;
3164 {
3165 VmaAllocationCreateInfo allocCreateInfo = {};
3166 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
3167 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
3168
3169 bufCreateInfo.size = BLOCK_SIZE / 2;
3170
3171 for(size_t i = 0; i < 2; ++i)
3172 {
3173 Item item;
3174 res = vmaCreateBuffer(hAllocator, &bufCreateInfo, &allocCreateInfo, &item.hBuf, &item.hAlloc, &dedicatedAllocInfo);
3175 TEST(res == VK_SUCCESS);
3176 items.push_back(item);
3177 }
3178 }
3179
3180 // Create pool to make sure allocations must be out of this memory type.
3181 VmaPoolCreateInfo poolCreateInfo = {};
3182 poolCreateInfo.memoryTypeIndex = dedicatedAllocInfo.memoryType;
3183 poolCreateInfo.blockSize = BLOCK_SIZE;
3184
3185 VmaPool hPool;
3186 res = vmaCreatePool(hAllocator, &poolCreateInfo, &hPool);
3187 TEST(res == VK_SUCCESS);
3188
3189 // 2. Allocate normal buffers from all the remaining memory.
3190 {
3191 VmaAllocationCreateInfo allocCreateInfo = {};
3192 allocCreateInfo.pool = hPool;
3193
3194 bufCreateInfo.size = BLOCK_SIZE / 2;
3195
3196 const size_t bufCount = ((HEAP_SIZE_LIMIT / BLOCK_SIZE) - 1) * 2;
3197 for(size_t i = 0; i < bufCount; ++i)
3198 {
3199 Item item;
3200 res = vmaCreateBuffer(hAllocator, &bufCreateInfo, &allocCreateInfo, &item.hBuf, &item.hAlloc, nullptr);
3201 TEST(res == VK_SUCCESS);
3202 items.push_back(item);
3203 }
3204 }
3205
3206 // 3. Allocation of one more (even small) buffer should fail.
3207 {
3208 VmaAllocationCreateInfo allocCreateInfo = {};
3209 allocCreateInfo.pool = hPool;
3210
3211 bufCreateInfo.size = 128;
3212
3213 VkBuffer hBuf;
3214 VmaAllocation hAlloc;
3215 res = vmaCreateBuffer(hAllocator, &bufCreateInfo, &allocCreateInfo, &hBuf, &hAlloc, nullptr);
3216 TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY);
3217 }
3218
3219 // Destroy everything.
3220 for(size_t i = items.size(); i--; )
3221 {
3222 vmaDestroyBuffer(hAllocator, items[i].hBuf, items[i].hAlloc);
3223 }
3224
3225 vmaDestroyPool(hAllocator, hPool);
3226
3227 vmaDestroyAllocator(hAllocator);
3228 }
3229
3230 #if VMA_DEBUG_MARGIN
TestDebugMargin()3231 static void TestDebugMargin()
3232 {
3233 if(VMA_DEBUG_MARGIN == 0)
3234 {
3235 return;
3236 }
3237
3238 VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3239 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
3240
3241 VmaAllocationCreateInfo allocCreateInfo = {};
3242 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
3243
3244 // Create few buffers of different size.
3245 const size_t BUF_COUNT = 10;
3246 BufferInfo buffers[BUF_COUNT];
3247 VmaAllocationInfo allocInfo[BUF_COUNT];
3248 for(size_t i = 0; i < 10; ++i)
3249 {
3250 bufInfo.size = (VkDeviceSize)(i + 1) * 64;
3251 // Last one will be mapped.
3252 allocCreateInfo.flags = (i == BUF_COUNT - 1) ? VMA_ALLOCATION_CREATE_MAPPED_BIT : 0;
3253
3254 VkResult res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo, &buffers[i].Buffer, &buffers[i].Allocation, &allocInfo[i]);
3255 TEST(res == VK_SUCCESS);
3256 // Margin is preserved also at the beginning of a block.
3257 TEST(allocInfo[i].offset >= VMA_DEBUG_MARGIN);
3258
3259 if(i == BUF_COUNT - 1)
3260 {
3261 // Fill with data.
3262 TEST(allocInfo[i].pMappedData != nullptr);
3263 // Uncomment this "+ 1" to overwrite past end of allocation and check corruption detection.
3264 memset(allocInfo[i].pMappedData, 0xFF, bufInfo.size /* + 1 */);
3265 }
3266 }
3267
3268 // Check if their offsets preserve margin between them.
3269 std::sort(allocInfo, allocInfo + BUF_COUNT, [](const VmaAllocationInfo& lhs, const VmaAllocationInfo& rhs) -> bool
3270 {
3271 if(lhs.deviceMemory != rhs.deviceMemory)
3272 {
3273 return lhs.deviceMemory < rhs.deviceMemory;
3274 }
3275 return lhs.offset < rhs.offset;
3276 });
3277 for(size_t i = 1; i < BUF_COUNT; ++i)
3278 {
3279 if(allocInfo[i].deviceMemory == allocInfo[i - 1].deviceMemory)
3280 {
3281 TEST(allocInfo[i].offset >= allocInfo[i - 1].offset + VMA_DEBUG_MARGIN);
3282 }
3283 }
3284
3285 VkResult res = vmaCheckCorruption(g_hAllocator, UINT32_MAX);
3286 TEST(res == VK_SUCCESS);
3287
3288 // Destroy all buffers.
3289 for(size_t i = BUF_COUNT; i--; )
3290 {
3291 vmaDestroyBuffer(g_hAllocator, buffers[i].Buffer, buffers[i].Allocation);
3292 }
3293 }
3294 #endif
3295
TestLinearAllocator()3296 static void TestLinearAllocator()
3297 {
3298 wprintf(L"Test linear allocator\n");
3299
3300 RandomNumberGenerator rand{645332};
3301
3302 VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3303 sampleBufCreateInfo.size = 1024; // Whatever.
3304 sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
3305
3306 VmaAllocationCreateInfo sampleAllocCreateInfo = {};
3307 sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
3308
3309 VmaPoolCreateInfo poolCreateInfo = {};
3310 VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3311 TEST(res == VK_SUCCESS);
3312
3313 poolCreateInfo.blockSize = 1024 * 300;
3314 poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
3315 poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
3316
3317 VmaPool pool = nullptr;
3318 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3319 TEST(res == VK_SUCCESS);
3320
3321 VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
3322
3323 VmaAllocationCreateInfo allocCreateInfo = {};
3324 allocCreateInfo.pool = pool;
3325
3326 constexpr size_t maxBufCount = 100;
3327 std::vector<BufferInfo> bufInfo;
3328
3329 constexpr VkDeviceSize bufSizeMin = 16;
3330 constexpr VkDeviceSize bufSizeMax = 1024;
3331 VmaAllocationInfo allocInfo;
3332 VkDeviceSize prevOffset = 0;
3333
3334 // Test one-time free.
3335 for(size_t i = 0; i < 2; ++i)
3336 {
3337 // Allocate number of buffers of varying size that surely fit into this block.
3338 VkDeviceSize bufSumSize = 0;
3339 for(size_t i = 0; i < maxBufCount; ++i)
3340 {
3341 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3342 BufferInfo newBufInfo;
3343 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3344 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3345 TEST(res == VK_SUCCESS);
3346 TEST(i == 0 || allocInfo.offset > prevOffset);
3347 bufInfo.push_back(newBufInfo);
3348 prevOffset = allocInfo.offset;
3349 bufSumSize += bufCreateInfo.size;
3350 }
3351
3352 // Validate pool stats.
3353 VmaPoolStats stats;
3354 vmaGetPoolStats(g_hAllocator, pool, &stats);
3355 TEST(stats.size == poolCreateInfo.blockSize);
3356 TEST(stats.unusedSize = poolCreateInfo.blockSize - bufSumSize);
3357 TEST(stats.allocationCount == bufInfo.size());
3358
3359 // Destroy the buffers in random order.
3360 while(!bufInfo.empty())
3361 {
3362 const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3363 const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3364 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3365 bufInfo.erase(bufInfo.begin() + indexToDestroy);
3366 }
3367 }
3368
3369 // Test stack.
3370 {
3371 // Allocate number of buffers of varying size that surely fit into this block.
3372 for(size_t i = 0; i < maxBufCount; ++i)
3373 {
3374 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3375 BufferInfo newBufInfo;
3376 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3377 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3378 TEST(res == VK_SUCCESS);
3379 TEST(i == 0 || allocInfo.offset > prevOffset);
3380 bufInfo.push_back(newBufInfo);
3381 prevOffset = allocInfo.offset;
3382 }
3383
3384 // Destroy few buffers from top of the stack.
3385 for(size_t i = 0; i < maxBufCount / 5; ++i)
3386 {
3387 const BufferInfo& currBufInfo = bufInfo.back();
3388 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3389 bufInfo.pop_back();
3390 }
3391
3392 // Create some more
3393 for(size_t i = 0; i < maxBufCount / 5; ++i)
3394 {
3395 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3396 BufferInfo newBufInfo;
3397 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3398 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3399 TEST(res == VK_SUCCESS);
3400 TEST(i == 0 || allocInfo.offset > prevOffset);
3401 bufInfo.push_back(newBufInfo);
3402 prevOffset = allocInfo.offset;
3403 }
3404
3405 // Destroy the buffers in reverse order.
3406 while(!bufInfo.empty())
3407 {
3408 const BufferInfo& currBufInfo = bufInfo.back();
3409 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3410 bufInfo.pop_back();
3411 }
3412 }
3413
3414 // Test ring buffer.
3415 {
3416 // Allocate number of buffers that surely fit into this block.
3417 bufCreateInfo.size = bufSizeMax;
3418 for(size_t i = 0; i < maxBufCount; ++i)
3419 {
3420 BufferInfo newBufInfo;
3421 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3422 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3423 TEST(res == VK_SUCCESS);
3424 TEST(i == 0 || allocInfo.offset > prevOffset);
3425 bufInfo.push_back(newBufInfo);
3426 prevOffset = allocInfo.offset;
3427 }
3428
3429 // Free and allocate new buffers so many times that we make sure we wrap-around at least once.
3430 const size_t buffersPerIter = maxBufCount / 10 - 1;
3431 const size_t iterCount = poolCreateInfo.blockSize / bufCreateInfo.size / buffersPerIter * 2;
3432 for(size_t iter = 0; iter < iterCount; ++iter)
3433 {
3434 for(size_t bufPerIter = 0; bufPerIter < buffersPerIter; ++bufPerIter)
3435 {
3436 const BufferInfo& currBufInfo = bufInfo.front();
3437 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3438 bufInfo.erase(bufInfo.begin());
3439 }
3440 for(size_t bufPerIter = 0; bufPerIter < buffersPerIter; ++bufPerIter)
3441 {
3442 BufferInfo newBufInfo;
3443 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3444 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3445 TEST(res == VK_SUCCESS);
3446 bufInfo.push_back(newBufInfo);
3447 }
3448 }
3449
3450 // Allocate buffers until we reach out-of-memory.
3451 uint32_t debugIndex = 0;
3452 while(res == VK_SUCCESS)
3453 {
3454 BufferInfo newBufInfo;
3455 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3456 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3457 if(res == VK_SUCCESS)
3458 {
3459 bufInfo.push_back(newBufInfo);
3460 }
3461 else
3462 {
3463 TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY);
3464 }
3465 ++debugIndex;
3466 }
3467
3468 // Destroy the buffers in random order.
3469 while(!bufInfo.empty())
3470 {
3471 const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3472 const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3473 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3474 bufInfo.erase(bufInfo.begin() + indexToDestroy);
3475 }
3476 }
3477
3478 // Test double stack.
3479 {
3480 // Allocate number of buffers of varying size that surely fit into this block, alternate from bottom/top.
3481 VkDeviceSize prevOffsetLower = 0;
3482 VkDeviceSize prevOffsetUpper = poolCreateInfo.blockSize;
3483 for(size_t i = 0; i < maxBufCount; ++i)
3484 {
3485 const bool upperAddress = (i % 2) != 0;
3486 if(upperAddress)
3487 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3488 else
3489 allocCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3490 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3491 BufferInfo newBufInfo;
3492 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3493 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3494 TEST(res == VK_SUCCESS);
3495 if(upperAddress)
3496 {
3497 TEST(allocInfo.offset < prevOffsetUpper);
3498 prevOffsetUpper = allocInfo.offset;
3499 }
3500 else
3501 {
3502 TEST(allocInfo.offset >= prevOffsetLower);
3503 prevOffsetLower = allocInfo.offset;
3504 }
3505 TEST(prevOffsetLower < prevOffsetUpper);
3506 bufInfo.push_back(newBufInfo);
3507 }
3508
3509 // Destroy few buffers from top of the stack.
3510 for(size_t i = 0; i < maxBufCount / 5; ++i)
3511 {
3512 const BufferInfo& currBufInfo = bufInfo.back();
3513 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3514 bufInfo.pop_back();
3515 }
3516
3517 // Create some more
3518 for(size_t i = 0; i < maxBufCount / 5; ++i)
3519 {
3520 const bool upperAddress = (i % 2) != 0;
3521 if(upperAddress)
3522 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3523 else
3524 allocCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3525 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3526 BufferInfo newBufInfo;
3527 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3528 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3529 TEST(res == VK_SUCCESS);
3530 bufInfo.push_back(newBufInfo);
3531 }
3532
3533 // Destroy the buffers in reverse order.
3534 while(!bufInfo.empty())
3535 {
3536 const BufferInfo& currBufInfo = bufInfo.back();
3537 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3538 bufInfo.pop_back();
3539 }
3540
3541 // Create buffers on both sides until we reach out of memory.
3542 prevOffsetLower = 0;
3543 prevOffsetUpper = poolCreateInfo.blockSize;
3544 res = VK_SUCCESS;
3545 for(size_t i = 0; res == VK_SUCCESS; ++i)
3546 {
3547 const bool upperAddress = (i % 2) != 0;
3548 if(upperAddress)
3549 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3550 else
3551 allocCreateInfo.flags &= ~VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3552 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3553 BufferInfo newBufInfo;
3554 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3555 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3556 if(res == VK_SUCCESS)
3557 {
3558 if(upperAddress)
3559 {
3560 TEST(allocInfo.offset < prevOffsetUpper);
3561 prevOffsetUpper = allocInfo.offset;
3562 }
3563 else
3564 {
3565 TEST(allocInfo.offset >= prevOffsetLower);
3566 prevOffsetLower = allocInfo.offset;
3567 }
3568 TEST(prevOffsetLower < prevOffsetUpper);
3569 bufInfo.push_back(newBufInfo);
3570 }
3571 }
3572
3573 // Destroy the buffers in random order.
3574 while(!bufInfo.empty())
3575 {
3576 const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3577 const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3578 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3579 bufInfo.erase(bufInfo.begin() + indexToDestroy);
3580 }
3581
3582 // Create buffers on upper side only, constant size, until we reach out of memory.
3583 prevOffsetUpper = poolCreateInfo.blockSize;
3584 res = VK_SUCCESS;
3585 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3586 bufCreateInfo.size = bufSizeMax;
3587 for(size_t i = 0; res == VK_SUCCESS; ++i)
3588 {
3589 BufferInfo newBufInfo;
3590 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3591 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3592 if(res == VK_SUCCESS)
3593 {
3594 TEST(allocInfo.offset < prevOffsetUpper);
3595 prevOffsetUpper = allocInfo.offset;
3596 bufInfo.push_back(newBufInfo);
3597 }
3598 }
3599
3600 // Destroy the buffers in reverse order.
3601 while(!bufInfo.empty())
3602 {
3603 const BufferInfo& currBufInfo = bufInfo.back();
3604 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3605 bufInfo.pop_back();
3606 }
3607 }
3608
3609 // Test ring buffer with lost allocations.
3610 {
3611 // Allocate number of buffers until pool is full.
3612 // Notice CAN_BECOME_LOST flag and call to vmaSetCurrentFrameIndex.
3613 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT;
3614 res = VK_SUCCESS;
3615 for(size_t i = 0; res == VK_SUCCESS; ++i)
3616 {
3617 vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3618
3619 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3620
3621 BufferInfo newBufInfo;
3622 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3623 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3624 if(res == VK_SUCCESS)
3625 bufInfo.push_back(newBufInfo);
3626 }
3627
3628 // Free first half of it.
3629 {
3630 const size_t buffersToDelete = bufInfo.size() / 2;
3631 for(size_t i = 0; i < buffersToDelete; ++i)
3632 {
3633 vmaDestroyBuffer(g_hAllocator, bufInfo[i].Buffer, bufInfo[i].Allocation);
3634 }
3635 bufInfo.erase(bufInfo.begin(), bufInfo.begin() + buffersToDelete);
3636 }
3637
3638 // Allocate number of buffers until pool is full again.
3639 // This way we make sure ring buffers wraps around, front in in the middle.
3640 res = VK_SUCCESS;
3641 for(size_t i = 0; res == VK_SUCCESS; ++i)
3642 {
3643 vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3644
3645 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3646
3647 BufferInfo newBufInfo;
3648 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3649 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3650 if(res == VK_SUCCESS)
3651 bufInfo.push_back(newBufInfo);
3652 }
3653
3654 VkDeviceSize firstNewOffset;
3655 {
3656 vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3657
3658 // Allocate a large buffer with CAN_MAKE_OTHER_LOST.
3659 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
3660 bufCreateInfo.size = bufSizeMax;
3661
3662 BufferInfo newBufInfo;
3663 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3664 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3665 TEST(res == VK_SUCCESS);
3666 bufInfo.push_back(newBufInfo);
3667 firstNewOffset = allocInfo.offset;
3668
3669 // Make sure at least one buffer from the beginning became lost.
3670 vmaGetAllocationInfo(g_hAllocator, bufInfo[0].Allocation, &allocInfo);
3671 TEST(allocInfo.deviceMemory == VK_NULL_HANDLE);
3672 }
3673
3674 #if 0 // TODO Fix and uncomment. Failing on Intel.
3675 // Allocate more buffers that CAN_MAKE_OTHER_LOST until we wrap-around with this.
3676 size_t newCount = 1;
3677 for(;;)
3678 {
3679 vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3680
3681 bufCreateInfo.size = align_up<VkDeviceSize>(bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin), 16);
3682
3683 BufferInfo newBufInfo;
3684 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3685 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3686
3687 TEST(res == VK_SUCCESS);
3688 bufInfo.push_back(newBufInfo);
3689 ++newCount;
3690 if(allocInfo.offset < firstNewOffset)
3691 break;
3692 }
3693 #endif
3694
3695 // Delete buffers that are lost.
3696 for(size_t i = bufInfo.size(); i--; )
3697 {
3698 vmaGetAllocationInfo(g_hAllocator, bufInfo[i].Allocation, &allocInfo);
3699 if(allocInfo.deviceMemory == VK_NULL_HANDLE)
3700 {
3701 vmaDestroyBuffer(g_hAllocator, bufInfo[i].Buffer, bufInfo[i].Allocation);
3702 bufInfo.erase(bufInfo.begin() + i);
3703 }
3704 }
3705
3706 // Test vmaMakePoolAllocationsLost
3707 {
3708 vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
3709
3710 size_t lostAllocCount = 0;
3711 vmaMakePoolAllocationsLost(g_hAllocator, pool, &lostAllocCount);
3712 TEST(lostAllocCount > 0);
3713
3714 size_t realLostAllocCount = 0;
3715 for(size_t i = 0; i < bufInfo.size(); ++i)
3716 {
3717 vmaGetAllocationInfo(g_hAllocator, bufInfo[i].Allocation, &allocInfo);
3718 if(allocInfo.deviceMemory == VK_NULL_HANDLE)
3719 ++realLostAllocCount;
3720 }
3721 TEST(realLostAllocCount == lostAllocCount);
3722 }
3723
3724 // Destroy all the buffers in forward order.
3725 for(size_t i = 0; i < bufInfo.size(); ++i)
3726 vmaDestroyBuffer(g_hAllocator, bufInfo[i].Buffer, bufInfo[i].Allocation);
3727 bufInfo.clear();
3728 }
3729
3730 vmaDestroyPool(g_hAllocator, pool);
3731 }
3732
TestLinearAllocatorMultiBlock()3733 static void TestLinearAllocatorMultiBlock()
3734 {
3735 wprintf(L"Test linear allocator multi block\n");
3736
3737 RandomNumberGenerator rand{345673};
3738
3739 VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3740 sampleBufCreateInfo.size = 1024 * 1024;
3741 sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
3742
3743 VmaAllocationCreateInfo sampleAllocCreateInfo = {};
3744 sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
3745
3746 VmaPoolCreateInfo poolCreateInfo = {};
3747 poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
3748 VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3749 TEST(res == VK_SUCCESS);
3750
3751 VmaPool pool = nullptr;
3752 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3753 TEST(res == VK_SUCCESS);
3754
3755 VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
3756
3757 VmaAllocationCreateInfo allocCreateInfo = {};
3758 allocCreateInfo.pool = pool;
3759
3760 std::vector<BufferInfo> bufInfo;
3761 VmaAllocationInfo allocInfo;
3762
3763 // Test one-time free.
3764 {
3765 // Allocate buffers until we move to a second block.
3766 VkDeviceMemory lastMem = VK_NULL_HANDLE;
3767 for(uint32_t i = 0; ; ++i)
3768 {
3769 BufferInfo newBufInfo;
3770 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3771 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3772 TEST(res == VK_SUCCESS);
3773 bufInfo.push_back(newBufInfo);
3774 if(lastMem && allocInfo.deviceMemory != lastMem)
3775 {
3776 break;
3777 }
3778 lastMem = allocInfo.deviceMemory;
3779 }
3780
3781 TEST(bufInfo.size() > 2);
3782
3783 // Make sure that pool has now two blocks.
3784 VmaPoolStats poolStats = {};
3785 vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3786 TEST(poolStats.blockCount == 2);
3787
3788 // Destroy all the buffers in random order.
3789 while(!bufInfo.empty())
3790 {
3791 const size_t indexToDestroy = rand.Generate() % bufInfo.size();
3792 const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
3793 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3794 bufInfo.erase(bufInfo.begin() + indexToDestroy);
3795 }
3796
3797 // Make sure that pool has now at most one block.
3798 vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3799 TEST(poolStats.blockCount <= 1);
3800 }
3801
3802 // Test stack.
3803 {
3804 // Allocate buffers until we move to a second block.
3805 VkDeviceMemory lastMem = VK_NULL_HANDLE;
3806 for(uint32_t i = 0; ; ++i)
3807 {
3808 BufferInfo newBufInfo;
3809 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3810 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3811 TEST(res == VK_SUCCESS);
3812 bufInfo.push_back(newBufInfo);
3813 if(lastMem && allocInfo.deviceMemory != lastMem)
3814 {
3815 break;
3816 }
3817 lastMem = allocInfo.deviceMemory;
3818 }
3819
3820 TEST(bufInfo.size() > 2);
3821
3822 // Add few more buffers.
3823 for(uint32_t i = 0; i < 5; ++i)
3824 {
3825 BufferInfo newBufInfo;
3826 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3827 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3828 TEST(res == VK_SUCCESS);
3829 bufInfo.push_back(newBufInfo);
3830 }
3831
3832 // Make sure that pool has now two blocks.
3833 VmaPoolStats poolStats = {};
3834 vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3835 TEST(poolStats.blockCount == 2);
3836
3837 // Delete half of buffers, LIFO.
3838 for(size_t i = 0, countToDelete = bufInfo.size() / 2; i < countToDelete; ++i)
3839 {
3840 const BufferInfo& currBufInfo = bufInfo.back();
3841 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3842 bufInfo.pop_back();
3843 }
3844
3845 // Add one more buffer.
3846 BufferInfo newBufInfo;
3847 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3848 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3849 TEST(res == VK_SUCCESS);
3850 bufInfo.push_back(newBufInfo);
3851
3852 // Make sure that pool has now one block.
3853 vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3854 TEST(poolStats.blockCount == 1);
3855
3856 // Delete all the remaining buffers, LIFO.
3857 while(!bufInfo.empty())
3858 {
3859 const BufferInfo& currBufInfo = bufInfo.back();
3860 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3861 bufInfo.pop_back();
3862 }
3863 }
3864
3865 vmaDestroyPool(g_hAllocator, pool);
3866 }
3867
ManuallyTestLinearAllocator()3868 static void ManuallyTestLinearAllocator()
3869 {
3870 VmaStats origStats;
3871 vmaCalculateStats(g_hAllocator, &origStats);
3872
3873 wprintf(L"Manually test linear allocator\n");
3874
3875 RandomNumberGenerator rand{645332};
3876
3877 VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3878 sampleBufCreateInfo.size = 1024; // Whatever.
3879 sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
3880
3881 VmaAllocationCreateInfo sampleAllocCreateInfo = {};
3882 sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
3883
3884 VmaPoolCreateInfo poolCreateInfo = {};
3885 VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
3886 TEST(res == VK_SUCCESS);
3887
3888 poolCreateInfo.blockSize = 10 * 1024;
3889 poolCreateInfo.flags = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
3890 poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
3891
3892 VmaPool pool = nullptr;
3893 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
3894 TEST(res == VK_SUCCESS);
3895
3896 VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
3897
3898 VmaAllocationCreateInfo allocCreateInfo = {};
3899 allocCreateInfo.pool = pool;
3900
3901 std::vector<BufferInfo> bufInfo;
3902 VmaAllocationInfo allocInfo;
3903 BufferInfo newBufInfo;
3904
3905 // Test double stack.
3906 {
3907 /*
3908 Lower: Buffer 32 B, Buffer 1024 B, Buffer 32 B
3909 Upper: Buffer 16 B, Buffer 1024 B, Buffer 128 B
3910
3911 Totally:
3912 1 block allocated
3913 10240 Vulkan bytes
3914 6 new allocations
3915 2256 bytes in allocations
3916 */
3917
3918 bufCreateInfo.size = 32;
3919 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3920 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3921 TEST(res == VK_SUCCESS);
3922 bufInfo.push_back(newBufInfo);
3923
3924 bufCreateInfo.size = 1024;
3925 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3926 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3927 TEST(res == VK_SUCCESS);
3928 bufInfo.push_back(newBufInfo);
3929
3930 bufCreateInfo.size = 32;
3931 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3932 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3933 TEST(res == VK_SUCCESS);
3934 bufInfo.push_back(newBufInfo);
3935
3936 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_UPPER_ADDRESS_BIT;
3937
3938 bufCreateInfo.size = 128;
3939 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3940 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3941 TEST(res == VK_SUCCESS);
3942 bufInfo.push_back(newBufInfo);
3943
3944 bufCreateInfo.size = 1024;
3945 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3946 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3947 TEST(res == VK_SUCCESS);
3948 bufInfo.push_back(newBufInfo);
3949
3950 bufCreateInfo.size = 16;
3951 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
3952 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
3953 TEST(res == VK_SUCCESS);
3954 bufInfo.push_back(newBufInfo);
3955
3956 VmaStats currStats;
3957 vmaCalculateStats(g_hAllocator, &currStats);
3958 VmaPoolStats poolStats;
3959 vmaGetPoolStats(g_hAllocator, pool, &poolStats);
3960
3961 char* statsStr = nullptr;
3962 vmaBuildStatsString(g_hAllocator, &statsStr, VK_TRUE);
3963
3964 // PUT BREAKPOINT HERE TO CHECK.
3965 // Inspect: currStats versus origStats, poolStats, statsStr.
3966 int I = 0;
3967
3968 vmaFreeStatsString(g_hAllocator, statsStr);
3969
3970 // Destroy the buffers in reverse order.
3971 while(!bufInfo.empty())
3972 {
3973 const BufferInfo& currBufInfo = bufInfo.back();
3974 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
3975 bufInfo.pop_back();
3976 }
3977 }
3978
3979 vmaDestroyPool(g_hAllocator, pool);
3980 }
3981
BenchmarkAlgorithmsCase(FILE * file,uint32_t algorithm,bool empty,VmaAllocationCreateFlags allocStrategy,FREE_ORDER freeOrder)3982 static void BenchmarkAlgorithmsCase(FILE* file,
3983 uint32_t algorithm,
3984 bool empty,
3985 VmaAllocationCreateFlags allocStrategy,
3986 FREE_ORDER freeOrder)
3987 {
3988 RandomNumberGenerator rand{16223};
3989
3990 const VkDeviceSize bufSizeMin = 32;
3991 const VkDeviceSize bufSizeMax = 1024;
3992 const size_t maxBufCapacity = 10000;
3993 const uint32_t iterationCount = 10;
3994
3995 VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
3996 sampleBufCreateInfo.size = bufSizeMax;
3997 sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
3998
3999 VmaAllocationCreateInfo sampleAllocCreateInfo = {};
4000 sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
4001
4002 VmaPoolCreateInfo poolCreateInfo = {};
4003 VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
4004 TEST(res == VK_SUCCESS);
4005
4006 poolCreateInfo.blockSize = bufSizeMax * maxBufCapacity;
4007 poolCreateInfo.flags |= algorithm;
4008 poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
4009
4010 VmaPool pool = nullptr;
4011 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
4012 TEST(res == VK_SUCCESS);
4013
4014 // Buffer created just to get memory requirements. Never bound to any memory.
4015 VkBuffer dummyBuffer = VK_NULL_HANDLE;
4016 res = vkCreateBuffer(g_hDevice, &sampleBufCreateInfo, g_Allocs, &dummyBuffer);
4017 TEST(res == VK_SUCCESS && dummyBuffer);
4018
4019 VkMemoryRequirements memReq = {};
4020 vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
4021
4022 vkDestroyBuffer(g_hDevice, dummyBuffer, g_Allocs);
4023
4024 VmaAllocationCreateInfo allocCreateInfo = {};
4025 allocCreateInfo.pool = pool;
4026 allocCreateInfo.flags = allocStrategy;
4027
4028 VmaAllocation alloc;
4029 std::vector<VmaAllocation> baseAllocations;
4030
4031 if(!empty)
4032 {
4033 // Make allocations up to 1/3 of pool size.
4034 VkDeviceSize totalSize = 0;
4035 while(totalSize < poolCreateInfo.blockSize / 3)
4036 {
4037 // This test intentionally allows sizes that are aligned to 4 or 16 bytes.
4038 // This is theoretically allowed and already uncovered one bug.
4039 memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);
4040 res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
4041 TEST(res == VK_SUCCESS);
4042 baseAllocations.push_back(alloc);
4043 totalSize += memReq.size;
4044 }
4045
4046 // Delete half of them, choose randomly.
4047 size_t allocsToDelete = baseAllocations.size() / 2;
4048 for(size_t i = 0; i < allocsToDelete; ++i)
4049 {
4050 const size_t index = (size_t)rand.Generate() % baseAllocations.size();
4051 vmaFreeMemory(g_hAllocator, baseAllocations[index]);
4052 baseAllocations.erase(baseAllocations.begin() + index);
4053 }
4054 }
4055
4056 // BENCHMARK
4057 const size_t allocCount = maxBufCapacity / 3;
4058 std::vector<VmaAllocation> testAllocations;
4059 testAllocations.reserve(allocCount);
4060 duration allocTotalDuration = duration::zero();
4061 duration freeTotalDuration = duration::zero();
4062 for(uint32_t iterationIndex = 0; iterationIndex < iterationCount; ++iterationIndex)
4063 {
4064 // Allocations
4065 time_point allocTimeBeg = std::chrono::high_resolution_clock::now();
4066 for(size_t i = 0; i < allocCount; ++i)
4067 {
4068 memReq.size = bufSizeMin + rand.Generate() % (bufSizeMax - bufSizeMin);
4069 res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
4070 TEST(res == VK_SUCCESS);
4071 testAllocations.push_back(alloc);
4072 }
4073 allocTotalDuration += std::chrono::high_resolution_clock::now() - allocTimeBeg;
4074
4075 // Deallocations
4076 switch(freeOrder)
4077 {
4078 case FREE_ORDER::FORWARD:
4079 // Leave testAllocations unchanged.
4080 break;
4081 case FREE_ORDER::BACKWARD:
4082 std::reverse(testAllocations.begin(), testAllocations.end());
4083 break;
4084 case FREE_ORDER::RANDOM:
4085 std::shuffle(testAllocations.begin(), testAllocations.end(), MyUniformRandomNumberGenerator(rand));
4086 break;
4087 default: assert(0);
4088 }
4089
4090 time_point freeTimeBeg = std::chrono::high_resolution_clock::now();
4091 for(size_t i = 0; i < allocCount; ++i)
4092 vmaFreeMemory(g_hAllocator, testAllocations[i]);
4093 freeTotalDuration += std::chrono::high_resolution_clock::now() - freeTimeBeg;
4094
4095 testAllocations.clear();
4096 }
4097
4098 // Delete baseAllocations
4099 while(!baseAllocations.empty())
4100 {
4101 vmaFreeMemory(g_hAllocator, baseAllocations.back());
4102 baseAllocations.pop_back();
4103 }
4104
4105 vmaDestroyPool(g_hAllocator, pool);
4106
4107 const float allocTotalSeconds = ToFloatSeconds(allocTotalDuration);
4108 const float freeTotalSeconds = ToFloatSeconds(freeTotalDuration);
4109
4110 printf(" Algorithm=%s %s Allocation=%s FreeOrder=%s: allocations %g s, free %g s\n",
4111 AlgorithmToStr(algorithm),
4112 empty ? "Empty" : "Not empty",
4113 GetAllocationStrategyName(allocStrategy),
4114 FREE_ORDER_NAMES[(size_t)freeOrder],
4115 allocTotalSeconds,
4116 freeTotalSeconds);
4117
4118 if(file)
4119 {
4120 std::string currTime;
4121 CurrentTimeToStr(currTime);
4122
4123 fprintf(file, "%s,%s,%s,%u,%s,%s,%g,%g\n",
4124 CODE_DESCRIPTION, currTime.c_str(),
4125 AlgorithmToStr(algorithm),
4126 empty ? 1 : 0,
4127 GetAllocationStrategyName(allocStrategy),
4128 FREE_ORDER_NAMES[(uint32_t)freeOrder],
4129 allocTotalSeconds,
4130 freeTotalSeconds);
4131 }
4132 }
4133
TestBufferDeviceAddress()4134 static void TestBufferDeviceAddress()
4135 {
4136 wprintf(L"Test buffer device address\n");
4137
4138 assert(VK_KHR_buffer_device_address_enabled);
4139
4140 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4141 bufCreateInfo.size = 0x10000;
4142 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
4143 VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; // !!!
4144
4145 VmaAllocationCreateInfo allocCreateInfo = {};
4146 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
4147
4148 for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
4149 {
4150 // 1st is placed, 2nd is dedicated.
4151 if(testIndex == 1)
4152 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
4153
4154 BufferInfo bufInfo = {};
4155 VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
4156 &bufInfo.Buffer, &bufInfo.Allocation, nullptr);
4157 TEST(res == VK_SUCCESS);
4158
4159 VkBufferDeviceAddressInfoEXT bufferDeviceAddressInfo = { VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT };
4160 bufferDeviceAddressInfo.buffer = bufInfo.Buffer;
4161 TEST(g_vkGetBufferDeviceAddressKHR != nullptr);
4162 VkDeviceAddress addr = g_vkGetBufferDeviceAddressKHR(g_hDevice, &bufferDeviceAddressInfo);
4163 TEST(addr != 0);
4164
4165 vmaDestroyBuffer(g_hAllocator, bufInfo.Buffer, bufInfo.Allocation);
4166 }
4167 }
4168
TestMemoryPriority()4169 static void TestMemoryPriority()
4170 {
4171 wprintf(L"Test memory priority\n");
4172
4173 assert(VK_EXT_memory_priority_enabled);
4174
4175 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4176 bufCreateInfo.size = 0x10000;
4177 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
4178
4179 VmaAllocationCreateInfo allocCreateInfo = {};
4180 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
4181 allocCreateInfo.priority = 1.f;
4182
4183 for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
4184 {
4185 // 1st is placed, 2nd is dedicated.
4186 if(testIndex == 1)
4187 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
4188
4189 BufferInfo bufInfo = {};
4190 VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
4191 &bufInfo.Buffer, &bufInfo.Allocation, nullptr);
4192 TEST(res == VK_SUCCESS);
4193
4194 // There is nothing we can do to validate the priority.
4195
4196 vmaDestroyBuffer(g_hAllocator, bufInfo.Buffer, bufInfo.Allocation);
4197 }
4198 }
4199
BenchmarkAlgorithms(FILE * file)4200 static void BenchmarkAlgorithms(FILE* file)
4201 {
4202 wprintf(L"Benchmark algorithms\n");
4203
4204 if(file)
4205 {
4206 fprintf(file,
4207 "Code,Time,"
4208 "Algorithm,Empty,Allocation strategy,Free order,"
4209 "Allocation time (s),Deallocation time (s)\n");
4210 }
4211
4212 uint32_t freeOrderCount = 1;
4213 if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_LARGE)
4214 freeOrderCount = 3;
4215 else if(ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL)
4216 freeOrderCount = 2;
4217
4218 const uint32_t emptyCount = ConfigType >= CONFIG_TYPE::CONFIG_TYPE_SMALL ? 2 : 1;
4219 const uint32_t allocStrategyCount = GetAllocationStrategyCount();
4220
4221 for(uint32_t freeOrderIndex = 0; freeOrderIndex < freeOrderCount; ++freeOrderIndex)
4222 {
4223 FREE_ORDER freeOrder = FREE_ORDER::COUNT;
4224 switch(freeOrderIndex)
4225 {
4226 case 0: freeOrder = FREE_ORDER::BACKWARD; break;
4227 case 1: freeOrder = FREE_ORDER::FORWARD; break;
4228 case 2: freeOrder = FREE_ORDER::RANDOM; break;
4229 default: assert(0);
4230 }
4231
4232 for(uint32_t emptyIndex = 0; emptyIndex < emptyCount; ++emptyIndex)
4233 {
4234 for(uint32_t algorithmIndex = 0; algorithmIndex < 3; ++algorithmIndex)
4235 {
4236 uint32_t algorithm = 0;
4237 switch(algorithmIndex)
4238 {
4239 case 0:
4240 break;
4241 case 1:
4242 algorithm = VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT;
4243 break;
4244 case 2:
4245 algorithm = VMA_POOL_CREATE_LINEAR_ALGORITHM_BIT;
4246 break;
4247 default:
4248 assert(0);
4249 }
4250
4251 uint32_t currAllocStrategyCount = algorithm != 0 ? 1 : allocStrategyCount;
4252 for(uint32_t allocStrategyIndex = 0; allocStrategyIndex < currAllocStrategyCount; ++allocStrategyIndex)
4253 {
4254 VmaAllocatorCreateFlags strategy = 0;
4255 if(currAllocStrategyCount > 1)
4256 {
4257 switch(allocStrategyIndex)
4258 {
4259 case 0: strategy = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT; break;
4260 case 1: strategy = VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT; break;
4261 case 2: strategy = VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT; break;
4262 default: assert(0);
4263 }
4264 }
4265
4266 BenchmarkAlgorithmsCase(
4267 file,
4268 algorithm,
4269 (emptyIndex == 0), // empty
4270 strategy,
4271 freeOrder); // freeOrder
4272 }
4273 }
4274 }
4275 }
4276 }
4277
TestPool_SameSize()4278 static void TestPool_SameSize()
4279 {
4280 const VkDeviceSize BUF_SIZE = 1024 * 1024;
4281 const size_t BUF_COUNT = 100;
4282 VkResult res;
4283
4284 RandomNumberGenerator rand{123};
4285
4286 VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4287 bufferInfo.size = BUF_SIZE;
4288 bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4289
4290 uint32_t memoryTypeBits = UINT32_MAX;
4291 {
4292 VkBuffer dummyBuffer;
4293 res = vkCreateBuffer(g_hDevice, &bufferInfo, g_Allocs, &dummyBuffer);
4294 TEST(res == VK_SUCCESS);
4295
4296 VkMemoryRequirements memReq;
4297 vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
4298 memoryTypeBits = memReq.memoryTypeBits;
4299
4300 vkDestroyBuffer(g_hDevice, dummyBuffer, g_Allocs);
4301 }
4302
4303 VmaAllocationCreateInfo poolAllocInfo = {};
4304 poolAllocInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
4305 uint32_t memTypeIndex;
4306 res = vmaFindMemoryTypeIndex(
4307 g_hAllocator,
4308 memoryTypeBits,
4309 &poolAllocInfo,
4310 &memTypeIndex);
4311
4312 VmaPoolCreateInfo poolCreateInfo = {};
4313 poolCreateInfo.memoryTypeIndex = memTypeIndex;
4314 poolCreateInfo.blockSize = BUF_SIZE * BUF_COUNT / 4;
4315 poolCreateInfo.minBlockCount = 1;
4316 poolCreateInfo.maxBlockCount = 4;
4317 poolCreateInfo.frameInUseCount = 0;
4318
4319 VmaPool pool;
4320 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
4321 TEST(res == VK_SUCCESS);
4322
4323 // Test pool name
4324 {
4325 static const char* const POOL_NAME = "Pool name";
4326 vmaSetPoolName(g_hAllocator, pool, POOL_NAME);
4327
4328 const char* fetchedPoolName = nullptr;
4329 vmaGetPoolName(g_hAllocator, pool, &fetchedPoolName);
4330 TEST(strcmp(fetchedPoolName, POOL_NAME) == 0);
4331
4332 vmaSetPoolName(g_hAllocator, pool, nullptr);
4333 }
4334
4335 vmaSetCurrentFrameIndex(g_hAllocator, 1);
4336
4337 VmaAllocationCreateInfo allocInfo = {};
4338 allocInfo.pool = pool;
4339 allocInfo.flags = VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT |
4340 VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
4341
4342 struct BufItem
4343 {
4344 VkBuffer Buf;
4345 VmaAllocation Alloc;
4346 };
4347 std::vector<BufItem> items;
4348
4349 // Fill entire pool.
4350 for(size_t i = 0; i < BUF_COUNT; ++i)
4351 {
4352 BufItem item;
4353 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4354 TEST(res == VK_SUCCESS);
4355 items.push_back(item);
4356 }
4357
4358 // Make sure that another allocation would fail.
4359 {
4360 BufItem item;
4361 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4362 TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY);
4363 }
4364
4365 // Validate that no buffer is lost. Also check that they are not mapped.
4366 for(size_t i = 0; i < items.size(); ++i)
4367 {
4368 VmaAllocationInfo allocInfo;
4369 vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4370 TEST(allocInfo.deviceMemory != VK_NULL_HANDLE);
4371 TEST(allocInfo.pMappedData == nullptr);
4372 }
4373
4374 // Free some percent of random items.
4375 {
4376 const size_t PERCENT_TO_FREE = 10;
4377 size_t itemsToFree = items.size() * PERCENT_TO_FREE / 100;
4378 for(size_t i = 0; i < itemsToFree; ++i)
4379 {
4380 size_t index = (size_t)rand.Generate() % items.size();
4381 vmaDestroyBuffer(g_hAllocator, items[index].Buf, items[index].Alloc);
4382 items.erase(items.begin() + index);
4383 }
4384 }
4385
4386 // Randomly allocate and free items.
4387 {
4388 const size_t OPERATION_COUNT = BUF_COUNT;
4389 for(size_t i = 0; i < OPERATION_COUNT; ++i)
4390 {
4391 bool allocate = rand.Generate() % 2 != 0;
4392 if(allocate)
4393 {
4394 if(items.size() < BUF_COUNT)
4395 {
4396 BufItem item;
4397 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4398 TEST(res == VK_SUCCESS);
4399 items.push_back(item);
4400 }
4401 }
4402 else // Free
4403 {
4404 if(!items.empty())
4405 {
4406 size_t index = (size_t)rand.Generate() % items.size();
4407 vmaDestroyBuffer(g_hAllocator, items[index].Buf, items[index].Alloc);
4408 items.erase(items.begin() + index);
4409 }
4410 }
4411 }
4412 }
4413
4414 // Allocate up to maximum.
4415 while(items.size() < BUF_COUNT)
4416 {
4417 BufItem item;
4418 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4419 TEST(res == VK_SUCCESS);
4420 items.push_back(item);
4421 }
4422
4423 // Validate that no buffer is lost.
4424 for(size_t i = 0; i < items.size(); ++i)
4425 {
4426 VmaAllocationInfo allocInfo;
4427 vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4428 TEST(allocInfo.deviceMemory != VK_NULL_HANDLE);
4429 }
4430
4431 // Next frame.
4432 vmaSetCurrentFrameIndex(g_hAllocator, 2);
4433
4434 // Allocate another BUF_COUNT buffers.
4435 for(size_t i = 0; i < BUF_COUNT; ++i)
4436 {
4437 BufItem item;
4438 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4439 TEST(res == VK_SUCCESS);
4440 items.push_back(item);
4441 }
4442
4443 // Make sure the first BUF_COUNT is lost. Delete them.
4444 for(size_t i = 0; i < BUF_COUNT; ++i)
4445 {
4446 VmaAllocationInfo allocInfo;
4447 vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4448 TEST(allocInfo.deviceMemory == VK_NULL_HANDLE);
4449 vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4450 }
4451 items.erase(items.begin(), items.begin() + BUF_COUNT);
4452
4453 // Validate that no buffer is lost.
4454 for(size_t i = 0; i < items.size(); ++i)
4455 {
4456 VmaAllocationInfo allocInfo;
4457 vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4458 TEST(allocInfo.deviceMemory != VK_NULL_HANDLE);
4459 }
4460
4461 // Free one item.
4462 vmaDestroyBuffer(g_hAllocator, items.back().Buf, items.back().Alloc);
4463 items.pop_back();
4464
4465 // Validate statistics.
4466 {
4467 VmaPoolStats poolStats = {};
4468 vmaGetPoolStats(g_hAllocator, pool, &poolStats);
4469 TEST(poolStats.allocationCount == items.size());
4470 TEST(poolStats.size = BUF_COUNT * BUF_SIZE);
4471 TEST(poolStats.unusedRangeCount == 1);
4472 TEST(poolStats.unusedRangeSizeMax == BUF_SIZE);
4473 TEST(poolStats.unusedSize == BUF_SIZE);
4474 }
4475
4476 // Free all remaining items.
4477 for(size_t i = items.size(); i--; )
4478 vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4479 items.clear();
4480
4481 // Allocate maximum items again.
4482 for(size_t i = 0; i < BUF_COUNT; ++i)
4483 {
4484 BufItem item;
4485 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4486 TEST(res == VK_SUCCESS);
4487 items.push_back(item);
4488 }
4489
4490 // Delete every other item.
4491 for(size_t i = 0; i < BUF_COUNT / 2; ++i)
4492 {
4493 vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4494 items.erase(items.begin() + i);
4495 }
4496
4497 // Defragment!
4498 {
4499 std::vector<VmaAllocation> allocationsToDefragment(items.size());
4500 for(size_t i = 0; i < items.size(); ++i)
4501 allocationsToDefragment[i] = items[i].Alloc;
4502
4503 VmaDefragmentationStats defragmentationStats;
4504 res = vmaDefragment(g_hAllocator, allocationsToDefragment.data(), items.size(), nullptr, nullptr, &defragmentationStats);
4505 TEST(res == VK_SUCCESS);
4506 TEST(defragmentationStats.deviceMemoryBlocksFreed == 2);
4507 }
4508
4509 // Free all remaining items.
4510 for(size_t i = items.size(); i--; )
4511 vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4512 items.clear();
4513
4514 ////////////////////////////////////////////////////////////////////////////////
4515 // Test for vmaMakePoolAllocationsLost
4516
4517 // Allocate 4 buffers on frame 10.
4518 vmaSetCurrentFrameIndex(g_hAllocator, 10);
4519 for(size_t i = 0; i < 4; ++i)
4520 {
4521 BufItem item;
4522 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocInfo, &item.Buf, &item.Alloc, nullptr);
4523 TEST(res == VK_SUCCESS);
4524 items.push_back(item);
4525 }
4526
4527 // Touch first 2 of them on frame 11.
4528 vmaSetCurrentFrameIndex(g_hAllocator, 11);
4529 for(size_t i = 0; i < 2; ++i)
4530 {
4531 VmaAllocationInfo allocInfo;
4532 vmaGetAllocationInfo(g_hAllocator, items[i].Alloc, &allocInfo);
4533 }
4534
4535 // vmaMakePoolAllocationsLost. Only remaining 2 should be lost.
4536 size_t lostCount = 0xDEADC0DE;
4537 vmaMakePoolAllocationsLost(g_hAllocator, pool, &lostCount);
4538 TEST(lostCount == 2);
4539
4540 // Make another call. Now 0 should be lost.
4541 vmaMakePoolAllocationsLost(g_hAllocator, pool, &lostCount);
4542 TEST(lostCount == 0);
4543
4544 // Make another call, with null count. Should not crash.
4545 vmaMakePoolAllocationsLost(g_hAllocator, pool, nullptr);
4546
4547 // END: Free all remaining items.
4548 for(size_t i = items.size(); i--; )
4549 vmaDestroyBuffer(g_hAllocator, items[i].Buf, items[i].Alloc);
4550
4551 items.clear();
4552
4553 ////////////////////////////////////////////////////////////////////////////////
4554 // Test for allocation too large for pool
4555
4556 {
4557 VmaAllocationCreateInfo allocCreateInfo = {};
4558 allocCreateInfo.pool = pool;
4559
4560 VkMemoryRequirements memReq;
4561 memReq.memoryTypeBits = UINT32_MAX;
4562 memReq.alignment = 1;
4563 memReq.size = poolCreateInfo.blockSize + 4;
4564
4565 VmaAllocation alloc = nullptr;
4566 res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo, &alloc, nullptr);
4567 TEST(res == VK_ERROR_OUT_OF_DEVICE_MEMORY && alloc == nullptr);
4568 }
4569
4570 vmaDestroyPool(g_hAllocator, pool);
4571 }
4572
ValidatePattern(const void * pMemory,size_t size,uint8_t pattern)4573 static bool ValidatePattern(const void* pMemory, size_t size, uint8_t pattern)
4574 {
4575 const uint8_t* pBytes = (const uint8_t*)pMemory;
4576 for(size_t i = 0; i < size; ++i)
4577 {
4578 if(pBytes[i] != pattern)
4579 {
4580 return false;
4581 }
4582 }
4583 return true;
4584 }
4585
TestAllocationsInitialization()4586 static void TestAllocationsInitialization()
4587 {
4588 VkResult res;
4589
4590 const size_t BUF_SIZE = 1024;
4591
4592 // Create pool.
4593
4594 VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4595 bufInfo.size = BUF_SIZE;
4596 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
4597
4598 VmaAllocationCreateInfo dummyBufAllocCreateInfo = {};
4599 dummyBufAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
4600
4601 VmaPoolCreateInfo poolCreateInfo = {};
4602 poolCreateInfo.blockSize = BUF_SIZE * 10;
4603 poolCreateInfo.minBlockCount = 1; // To keep memory alive while pool exists.
4604 poolCreateInfo.maxBlockCount = 1;
4605 res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufInfo, &dummyBufAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
4606 TEST(res == VK_SUCCESS);
4607
4608 VmaAllocationCreateInfo bufAllocCreateInfo = {};
4609 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &bufAllocCreateInfo.pool);
4610 TEST(res == VK_SUCCESS);
4611
4612 // Create one persistently mapped buffer to keep memory of this block mapped,
4613 // so that pointer to mapped data will remain (more or less...) valid even
4614 // after destruction of other allocations.
4615
4616 bufAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
4617 VkBuffer firstBuf;
4618 VmaAllocation firstAlloc;
4619 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &bufAllocCreateInfo, &firstBuf, &firstAlloc, nullptr);
4620 TEST(res == VK_SUCCESS);
4621
4622 // Test buffers.
4623
4624 for(uint32_t i = 0; i < 2; ++i)
4625 {
4626 const bool persistentlyMapped = i == 0;
4627 bufAllocCreateInfo.flags = persistentlyMapped ? VMA_ALLOCATION_CREATE_MAPPED_BIT : 0;
4628 VkBuffer buf;
4629 VmaAllocation alloc;
4630 VmaAllocationInfo allocInfo;
4631 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &bufAllocCreateInfo, &buf, &alloc, &allocInfo);
4632 TEST(res == VK_SUCCESS);
4633
4634 void* pMappedData;
4635 if(!persistentlyMapped)
4636 {
4637 res = vmaMapMemory(g_hAllocator, alloc, &pMappedData);
4638 TEST(res == VK_SUCCESS);
4639 }
4640 else
4641 {
4642 pMappedData = allocInfo.pMappedData;
4643 }
4644
4645 // Validate initialized content
4646 bool valid = ValidatePattern(pMappedData, BUF_SIZE, 0xDC);
4647 TEST(valid);
4648
4649 if(!persistentlyMapped)
4650 {
4651 vmaUnmapMemory(g_hAllocator, alloc);
4652 }
4653
4654 vmaDestroyBuffer(g_hAllocator, buf, alloc);
4655
4656 // Validate freed content
4657 valid = ValidatePattern(pMappedData, BUF_SIZE, 0xEF);
4658 TEST(valid);
4659 }
4660
4661 vmaDestroyBuffer(g_hAllocator, firstBuf, firstAlloc);
4662 vmaDestroyPool(g_hAllocator, bufAllocCreateInfo.pool);
4663 }
4664
TestPool_Benchmark(PoolTestResult & outResult,const PoolTestConfig & config)4665 static void TestPool_Benchmark(
4666 PoolTestResult& outResult,
4667 const PoolTestConfig& config)
4668 {
4669 TEST(config.ThreadCount > 0);
4670
4671 RandomNumberGenerator mainRand{config.RandSeed};
4672
4673 uint32_t allocationSizeProbabilitySum = std::accumulate(
4674 config.AllocationSizes.begin(),
4675 config.AllocationSizes.end(),
4676 0u,
4677 [](uint32_t sum, const AllocationSize& allocSize) {
4678 return sum + allocSize.Probability;
4679 });
4680
4681 VkBufferCreateInfo bufferTemplateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4682 bufferTemplateInfo.size = 256; // Whatever.
4683 bufferTemplateInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4684
4685 VkImageCreateInfo imageTemplateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
4686 imageTemplateInfo.imageType = VK_IMAGE_TYPE_2D;
4687 imageTemplateInfo.extent.width = 256; // Whatever.
4688 imageTemplateInfo.extent.height = 256; // Whatever.
4689 imageTemplateInfo.extent.depth = 1;
4690 imageTemplateInfo.mipLevels = 1;
4691 imageTemplateInfo.arrayLayers = 1;
4692 imageTemplateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
4693 imageTemplateInfo.tiling = VK_IMAGE_TILING_OPTIMAL; // LINEAR if CPU memory.
4694 imageTemplateInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
4695 imageTemplateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; // TRANSFER_SRC if CPU memory.
4696 imageTemplateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
4697
4698 uint32_t bufferMemoryTypeBits = UINT32_MAX;
4699 {
4700 VkBuffer dummyBuffer;
4701 VkResult res = vkCreateBuffer(g_hDevice, &bufferTemplateInfo, g_Allocs, &dummyBuffer);
4702 TEST(res == VK_SUCCESS);
4703
4704 VkMemoryRequirements memReq;
4705 vkGetBufferMemoryRequirements(g_hDevice, dummyBuffer, &memReq);
4706 bufferMemoryTypeBits = memReq.memoryTypeBits;
4707
4708 vkDestroyBuffer(g_hDevice, dummyBuffer, g_Allocs);
4709 }
4710
4711 uint32_t imageMemoryTypeBits = UINT32_MAX;
4712 {
4713 VkImage dummyImage;
4714 VkResult res = vkCreateImage(g_hDevice, &imageTemplateInfo, g_Allocs, &dummyImage);
4715 TEST(res == VK_SUCCESS);
4716
4717 VkMemoryRequirements memReq;
4718 vkGetImageMemoryRequirements(g_hDevice, dummyImage, &memReq);
4719 imageMemoryTypeBits = memReq.memoryTypeBits;
4720
4721 vkDestroyImage(g_hDevice, dummyImage, g_Allocs);
4722 }
4723
4724 uint32_t memoryTypeBits = 0;
4725 if(config.UsesBuffers() && config.UsesImages())
4726 {
4727 memoryTypeBits = bufferMemoryTypeBits & imageMemoryTypeBits;
4728 if(memoryTypeBits == 0)
4729 {
4730 PrintWarning(L"Cannot test buffers + images in the same memory pool on this GPU.");
4731 return;
4732 }
4733 }
4734 else if(config.UsesBuffers())
4735 memoryTypeBits = bufferMemoryTypeBits;
4736 else if(config.UsesImages())
4737 memoryTypeBits = imageMemoryTypeBits;
4738 else
4739 TEST(0);
4740
4741 VmaPoolCreateInfo poolCreateInfo = {};
4742 poolCreateInfo.minBlockCount = 1;
4743 poolCreateInfo.maxBlockCount = 1;
4744 poolCreateInfo.blockSize = config.PoolSize;
4745 poolCreateInfo.frameInUseCount = 1;
4746
4747 const VkPhysicalDeviceMemoryProperties* memProps = nullptr;
4748 vmaGetMemoryProperties(g_hAllocator, &memProps);
4749
4750 VmaPool pool = VK_NULL_HANDLE;
4751 VkResult res;
4752 // Loop over memory types because we sometimes allocate a big block here,
4753 // while the most eligible DEVICE_LOCAL heap may be only 256 MB on some GPUs.
4754 while(memoryTypeBits)
4755 {
4756 VmaAllocationCreateInfo dummyAllocCreateInfo = {};
4757 dummyAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
4758 vmaFindMemoryTypeIndex(g_hAllocator, memoryTypeBits, &dummyAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
4759
4760 const uint32_t heapIndex = memProps->memoryTypes[poolCreateInfo.memoryTypeIndex].heapIndex;
4761 // Protection against validation layer error when trying to allocate a block larger than entire heap size,
4762 // which may be only 256 MB on some platforms.
4763 if(poolCreateInfo.blockSize * poolCreateInfo.minBlockCount < memProps->memoryHeaps[heapIndex].size)
4764 {
4765 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
4766 if(res == VK_SUCCESS)
4767 break;
4768 }
4769 memoryTypeBits &= ~(1u << poolCreateInfo.memoryTypeIndex);
4770 }
4771 TEST(pool);
4772
4773 // Start time measurement - after creating pool and initializing data structures.
4774 time_point timeBeg = std::chrono::high_resolution_clock::now();
4775
4776 ////////////////////////////////////////////////////////////////////////////////
4777 // ThreadProc
4778 auto ThreadProc = [&config, allocationSizeProbabilitySum, pool](
4779 PoolTestThreadResult* outThreadResult,
4780 uint32_t randSeed,
4781 HANDLE frameStartEvent,
4782 HANDLE frameEndEvent) -> void
4783 {
4784 RandomNumberGenerator threadRand{randSeed};
4785 VkResult res = VK_SUCCESS;
4786
4787 VkBufferCreateInfo bufferInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
4788 bufferInfo.size = 256; // Whatever.
4789 bufferInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
4790
4791 VkImageCreateInfo imageInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
4792 imageInfo.imageType = VK_IMAGE_TYPE_2D;
4793 imageInfo.extent.width = 256; // Whatever.
4794 imageInfo.extent.height = 256; // Whatever.
4795 imageInfo.extent.depth = 1;
4796 imageInfo.mipLevels = 1;
4797 imageInfo.arrayLayers = 1;
4798 imageInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
4799 imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; // LINEAR if CPU memory.
4800 imageInfo.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
4801 imageInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT; // TRANSFER_SRC if CPU memory.
4802 imageInfo.samples = VK_SAMPLE_COUNT_1_BIT;
4803
4804 outThreadResult->AllocationTimeMin = duration::max();
4805 outThreadResult->AllocationTimeSum = duration::zero();
4806 outThreadResult->AllocationTimeMax = duration::min();
4807 outThreadResult->DeallocationTimeMin = duration::max();
4808 outThreadResult->DeallocationTimeSum = duration::zero();
4809 outThreadResult->DeallocationTimeMax = duration::min();
4810 outThreadResult->AllocationCount = 0;
4811 outThreadResult->DeallocationCount = 0;
4812 outThreadResult->LostAllocationCount = 0;
4813 outThreadResult->LostAllocationTotalSize = 0;
4814 outThreadResult->FailedAllocationCount = 0;
4815 outThreadResult->FailedAllocationTotalSize = 0;
4816
4817 struct Item
4818 {
4819 VkDeviceSize BufferSize = 0;
4820 VkExtent2D ImageSize = { 0, 0 };
4821 VkBuffer Buf = VK_NULL_HANDLE;
4822 VkImage Image = VK_NULL_HANDLE;
4823 VmaAllocation Alloc = VK_NULL_HANDLE;
4824
4825 Item() { }
4826 Item(Item&& src) :
4827 BufferSize(src.BufferSize), ImageSize(src.ImageSize), Buf(src.Buf), Image(src.Image), Alloc(src.Alloc)
4828 {
4829 src.BufferSize = 0;
4830 src.ImageSize = {0, 0};
4831 src.Buf = VK_NULL_HANDLE;
4832 src.Image = VK_NULL_HANDLE;
4833 src.Alloc = VK_NULL_HANDLE;
4834 }
4835 Item(const Item& src) = delete;
4836 ~Item()
4837 {
4838 DestroyResources();
4839 }
4840 Item& operator=(Item&& src)
4841 {
4842 if(&src != this)
4843 {
4844 DestroyResources();
4845 BufferSize = src.BufferSize; ImageSize = src.ImageSize;
4846 Buf = src.Buf; Image = src.Image; Alloc = src.Alloc;
4847 src.BufferSize = 0;
4848 src.ImageSize = {0, 0};
4849 src.Buf = VK_NULL_HANDLE;
4850 src.Image = VK_NULL_HANDLE;
4851 src.Alloc = VK_NULL_HANDLE;
4852 }
4853 return *this;
4854 }
4855 Item& operator=(const Item& src) = delete;
4856 void DestroyResources()
4857 {
4858 if(Buf)
4859 {
4860 assert(Image == VK_NULL_HANDLE);
4861 vmaDestroyBuffer(g_hAllocator, Buf, Alloc);
4862 Buf = VK_NULL_HANDLE;
4863 }
4864 else
4865 {
4866 vmaDestroyImage(g_hAllocator, Image, Alloc);
4867 Image = VK_NULL_HANDLE;
4868 }
4869 Alloc = VK_NULL_HANDLE;
4870 }
4871 VkDeviceSize CalcSizeBytes() const
4872 {
4873 return BufferSize +
4874 4ull * ImageSize.width * ImageSize.height;
4875 }
4876 };
4877 std::vector<Item> unusedItems, usedItems;
4878
4879 const size_t threadTotalItemCount = config.TotalItemCount / config.ThreadCount;
4880
4881 // Create all items - all unused, not yet allocated.
4882 for(size_t i = 0; i < threadTotalItemCount; ++i)
4883 {
4884 Item item = {};
4885
4886 uint32_t allocSizeIndex = 0;
4887 uint32_t r = threadRand.Generate() % allocationSizeProbabilitySum;
4888 while(r >= config.AllocationSizes[allocSizeIndex].Probability)
4889 r -= config.AllocationSizes[allocSizeIndex++].Probability;
4890
4891 const AllocationSize& allocSize = config.AllocationSizes[allocSizeIndex];
4892 if(allocSize.BufferSizeMax > 0)
4893 {
4894 TEST(allocSize.BufferSizeMin > 0);
4895 TEST(allocSize.ImageSizeMin == 0 && allocSize.ImageSizeMax == 0);
4896 if(allocSize.BufferSizeMax == allocSize.BufferSizeMin)
4897 item.BufferSize = allocSize.BufferSizeMin;
4898 else
4899 {
4900 item.BufferSize = allocSize.BufferSizeMin + threadRand.Generate() % (allocSize.BufferSizeMax - allocSize.BufferSizeMin);
4901 item.BufferSize = item.BufferSize / 16 * 16;
4902 }
4903 }
4904 else
4905 {
4906 TEST(allocSize.ImageSizeMin > 0 && allocSize.ImageSizeMax > 0);
4907 if(allocSize.ImageSizeMax == allocSize.ImageSizeMin)
4908 item.ImageSize.width = item.ImageSize.height = allocSize.ImageSizeMax;
4909 else
4910 {
4911 item.ImageSize.width = allocSize.ImageSizeMin + threadRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
4912 item.ImageSize.height = allocSize.ImageSizeMin + threadRand.Generate() % (allocSize.ImageSizeMax - allocSize.ImageSizeMin);
4913 }
4914 }
4915
4916 unusedItems.push_back(std::move(item));
4917 }
4918
4919 auto Allocate = [&](Item& item) -> VkResult
4920 {
4921 assert(item.Buf == VK_NULL_HANDLE && item.Image == VK_NULL_HANDLE && item.Alloc == VK_NULL_HANDLE);
4922
4923 VmaAllocationCreateInfo allocCreateInfo = {};
4924 allocCreateInfo.pool = pool;
4925 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_CAN_BECOME_LOST_BIT |
4926 VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
4927
4928 if(item.BufferSize)
4929 {
4930 bufferInfo.size = item.BufferSize;
4931 VkResult res = VK_SUCCESS;
4932 {
4933 PoolAllocationTimeRegisterObj timeRegisterObj(*outThreadResult);
4934 res = vmaCreateBuffer(g_hAllocator, &bufferInfo, &allocCreateInfo, &item.Buf, &item.Alloc, nullptr);
4935 }
4936 if(res == VK_SUCCESS)
4937 SetDebugUtilsObjectName(VK_OBJECT_TYPE_BUFFER, (uint64_t)item.Buf, "TestPool_Benchmark_Buffer");
4938 return res;
4939 }
4940 else
4941 {
4942 TEST(item.ImageSize.width && item.ImageSize.height);
4943
4944 imageInfo.extent.width = item.ImageSize.width;
4945 imageInfo.extent.height = item.ImageSize.height;
4946 VkResult res = VK_SUCCESS;
4947 {
4948 PoolAllocationTimeRegisterObj timeRegisterObj(*outThreadResult);
4949 res = vmaCreateImage(g_hAllocator, &imageInfo, &allocCreateInfo, &item.Image, &item.Alloc, nullptr);
4950 }
4951 if(res == VK_SUCCESS)
4952 SetDebugUtilsObjectName(VK_OBJECT_TYPE_IMAGE, (uint64_t)item.Image, "TestPool_Benchmark_Image");
4953 return res;
4954 }
4955 };
4956
4957 ////////////////////////////////////////////////////////////////////////////////
4958 // Frames
4959 for(uint32_t frameIndex = 0; frameIndex < config.FrameCount; ++frameIndex)
4960 {
4961 WaitForSingleObject(frameStartEvent, INFINITE);
4962
4963 // Always make some percent of used bufs unused, to choose different used ones.
4964 const size_t bufsToMakeUnused = usedItems.size() * config.ItemsToMakeUnusedPercent / 100;
4965 for(size_t i = 0; i < bufsToMakeUnused; ++i)
4966 {
4967 size_t index = threadRand.Generate() % usedItems.size();
4968 auto it = usedItems.begin() + index;
4969 Item item = std::move(*it);
4970 usedItems.erase(it);
4971 unusedItems.push_back(std::move(item));
4972 }
4973
4974 // Determine which bufs we want to use in this frame.
4975 const size_t usedBufCount = (threadRand.Generate() % (config.UsedItemCountMax - config.UsedItemCountMin) + config.UsedItemCountMin)
4976 / config.ThreadCount;
4977 TEST(usedBufCount < usedItems.size() + unusedItems.size());
4978 // Move some used to unused.
4979 while(usedBufCount < usedItems.size())
4980 {
4981 size_t index = threadRand.Generate() % usedItems.size();
4982 auto it = usedItems.begin() + index;
4983 Item item = std::move(*it);
4984 usedItems.erase(it);
4985 unusedItems.push_back(std::move(item));
4986 }
4987 // Move some unused to used.
4988 while(usedBufCount > usedItems.size())
4989 {
4990 size_t index = threadRand.Generate() % unusedItems.size();
4991 auto it = unusedItems.begin() + index;
4992 Item item = std::move(*it);
4993 unusedItems.erase(it);
4994 usedItems.push_back(std::move(item));
4995 }
4996
4997 uint32_t touchExistingCount = 0;
4998 uint32_t touchLostCount = 0;
4999 uint32_t createSucceededCount = 0;
5000 uint32_t createFailedCount = 0;
5001
5002 // Touch all used bufs. If not created or lost, allocate.
5003 for(size_t i = 0; i < usedItems.size(); ++i)
5004 {
5005 Item& item = usedItems[i];
5006 // Not yet created.
5007 if(item.Alloc == VK_NULL_HANDLE)
5008 {
5009 res = Allocate(item);
5010 ++outThreadResult->AllocationCount;
5011 if(res != VK_SUCCESS)
5012 {
5013 assert(item.Alloc == VK_NULL_HANDLE && item.Buf == VK_NULL_HANDLE && item.Image == VK_NULL_HANDLE);
5014 ++outThreadResult->FailedAllocationCount;
5015 outThreadResult->FailedAllocationTotalSize += item.CalcSizeBytes();
5016 ++createFailedCount;
5017 }
5018 else
5019 ++createSucceededCount;
5020 }
5021 else
5022 {
5023 // Touch.
5024 VmaAllocationInfo allocInfo;
5025 vmaGetAllocationInfo(g_hAllocator, item.Alloc, &allocInfo);
5026 // Lost.
5027 if(allocInfo.deviceMemory == VK_NULL_HANDLE)
5028 {
5029 ++touchLostCount;
5030
5031 // Destroy.
5032 {
5033 PoolDeallocationTimeRegisterObj timeRegisterObj(*outThreadResult);
5034 item.DestroyResources();
5035 ++outThreadResult->DeallocationCount;
5036 }
5037
5038 ++outThreadResult->LostAllocationCount;
5039 outThreadResult->LostAllocationTotalSize += item.CalcSizeBytes();
5040
5041 // Recreate.
5042 res = Allocate(item);
5043 ++outThreadResult->AllocationCount;
5044 // Creation failed.
5045 if(res != VK_SUCCESS)
5046 {
5047 TEST(item.Alloc == VK_NULL_HANDLE && item.Buf == VK_NULL_HANDLE && item.Image == VK_NULL_HANDLE);
5048 ++outThreadResult->FailedAllocationCount;
5049 outThreadResult->FailedAllocationTotalSize += item.CalcSizeBytes();
5050 ++createFailedCount;
5051 }
5052 else
5053 ++createSucceededCount;
5054 }
5055 else
5056 ++touchExistingCount;
5057 }
5058 }
5059
5060 /*
5061 printf("Thread %u frame %u: Touch existing %u lost %u, create succeeded %u failed %u\n",
5062 randSeed, frameIndex,
5063 touchExistingCount, touchLostCount,
5064 createSucceededCount, createFailedCount);
5065 */
5066
5067 SetEvent(frameEndEvent);
5068 }
5069
5070 // Free all remaining items.
5071 for(size_t i = usedItems.size(); i--; )
5072 {
5073 PoolDeallocationTimeRegisterObj timeRegisterObj(*outThreadResult);
5074 usedItems[i].DestroyResources();
5075 ++outThreadResult->DeallocationCount;
5076 }
5077 for(size_t i = unusedItems.size(); i--; )
5078 {
5079 PoolDeallocationTimeRegisterObj timeRegisterOb(*outThreadResult);
5080 unusedItems[i].DestroyResources();
5081 ++outThreadResult->DeallocationCount;
5082 }
5083 };
5084
5085 // Launch threads.
5086 uint32_t threadRandSeed = mainRand.Generate();
5087 std::vector<HANDLE> frameStartEvents{config.ThreadCount};
5088 std::vector<HANDLE> frameEndEvents{config.ThreadCount};
5089 std::vector<std::thread> bkgThreads;
5090 std::vector<PoolTestThreadResult> threadResults{config.ThreadCount};
5091 for(uint32_t threadIndex = 0; threadIndex < config.ThreadCount; ++threadIndex)
5092 {
5093 frameStartEvents[threadIndex] = CreateEvent(NULL, FALSE, FALSE, NULL);
5094 frameEndEvents[threadIndex] = CreateEvent(NULL, FALSE, FALSE, NULL);
5095 bkgThreads.emplace_back(std::bind(
5096 ThreadProc,
5097 &threadResults[threadIndex],
5098 threadRandSeed + threadIndex,
5099 frameStartEvents[threadIndex],
5100 frameEndEvents[threadIndex]));
5101 }
5102
5103 // Execute frames.
5104 TEST(config.ThreadCount <= MAXIMUM_WAIT_OBJECTS);
5105 for(uint32_t frameIndex = 0; frameIndex < config.FrameCount; ++frameIndex)
5106 {
5107 vmaSetCurrentFrameIndex(g_hAllocator, frameIndex);
5108 for(size_t threadIndex = 0; threadIndex < config.ThreadCount; ++threadIndex)
5109 SetEvent(frameStartEvents[threadIndex]);
5110 WaitForMultipleObjects(config.ThreadCount, &frameEndEvents[0], TRUE, INFINITE);
5111 }
5112
5113 // Wait for threads finished
5114 for(size_t i = 0; i < bkgThreads.size(); ++i)
5115 {
5116 bkgThreads[i].join();
5117 CloseHandle(frameEndEvents[i]);
5118 CloseHandle(frameStartEvents[i]);
5119 }
5120 bkgThreads.clear();
5121
5122 // Finish time measurement - before destroying pool.
5123 outResult.TotalTime = std::chrono::high_resolution_clock::now() - timeBeg;
5124
5125 vmaDestroyPool(g_hAllocator, pool);
5126
5127 outResult.AllocationTimeMin = duration::max();
5128 outResult.AllocationTimeAvg = duration::zero();
5129 outResult.AllocationTimeMax = duration::min();
5130 outResult.DeallocationTimeMin = duration::max();
5131 outResult.DeallocationTimeAvg = duration::zero();
5132 outResult.DeallocationTimeMax = duration::min();
5133 outResult.LostAllocationCount = 0;
5134 outResult.LostAllocationTotalSize = 0;
5135 outResult.FailedAllocationCount = 0;
5136 outResult.FailedAllocationTotalSize = 0;
5137 size_t allocationCount = 0;
5138 size_t deallocationCount = 0;
5139 for(size_t threadIndex = 0; threadIndex < config.ThreadCount; ++threadIndex)
5140 {
5141 const PoolTestThreadResult& threadResult = threadResults[threadIndex];
5142 outResult.AllocationTimeMin = std::min(outResult.AllocationTimeMin, threadResult.AllocationTimeMin);
5143 outResult.AllocationTimeMax = std::max(outResult.AllocationTimeMax, threadResult.AllocationTimeMax);
5144 outResult.AllocationTimeAvg += threadResult.AllocationTimeSum;
5145 outResult.DeallocationTimeMin = std::min(outResult.DeallocationTimeMin, threadResult.DeallocationTimeMin);
5146 outResult.DeallocationTimeMax = std::max(outResult.DeallocationTimeMax, threadResult.DeallocationTimeMax);
5147 outResult.DeallocationTimeAvg += threadResult.DeallocationTimeSum;
5148 allocationCount += threadResult.AllocationCount;
5149 deallocationCount += threadResult.DeallocationCount;
5150 outResult.FailedAllocationCount += threadResult.FailedAllocationCount;
5151 outResult.FailedAllocationTotalSize += threadResult.FailedAllocationTotalSize;
5152 outResult.LostAllocationCount += threadResult.LostAllocationCount;
5153 outResult.LostAllocationTotalSize += threadResult.LostAllocationTotalSize;
5154 }
5155 if(allocationCount)
5156 outResult.AllocationTimeAvg /= allocationCount;
5157 if(deallocationCount)
5158 outResult.DeallocationTimeAvg /= deallocationCount;
5159 }
5160
MemoryRegionsOverlap(char * ptr1,size_t size1,char * ptr2,size_t size2)5161 static inline bool MemoryRegionsOverlap(char* ptr1, size_t size1, char* ptr2, size_t size2)
5162 {
5163 if(ptr1 < ptr2)
5164 return ptr1 + size1 > ptr2;
5165 else if(ptr2 < ptr1)
5166 return ptr2 + size2 > ptr1;
5167 else
5168 return true;
5169 }
5170
TestMemoryUsage()5171 static void TestMemoryUsage()
5172 {
5173 wprintf(L"Testing memory usage:\n");
5174
5175 static const VmaMemoryUsage lastUsage = VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED;
5176 for(uint32_t usage = 0; usage <= lastUsage; ++usage)
5177 {
5178 switch(usage)
5179 {
5180 case VMA_MEMORY_USAGE_UNKNOWN: printf(" VMA_MEMORY_USAGE_UNKNOWN:\n"); break;
5181 case VMA_MEMORY_USAGE_GPU_ONLY: printf(" VMA_MEMORY_USAGE_GPU_ONLY:\n"); break;
5182 case VMA_MEMORY_USAGE_CPU_ONLY: printf(" VMA_MEMORY_USAGE_CPU_ONLY:\n"); break;
5183 case VMA_MEMORY_USAGE_CPU_TO_GPU: printf(" VMA_MEMORY_USAGE_CPU_TO_GPU:\n"); break;
5184 case VMA_MEMORY_USAGE_GPU_TO_CPU: printf(" VMA_MEMORY_USAGE_GPU_TO_CPU:\n"); break;
5185 case VMA_MEMORY_USAGE_CPU_COPY: printf(" VMA_MEMORY_USAGE_CPU_COPY:\n"); break;
5186 case VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED: printf(" VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED:\n"); break;
5187 default: assert(0);
5188 }
5189
5190 auto printResult = [](const char* testName, VkResult res, uint32_t memoryTypeBits, uint32_t memoryTypeIndex)
5191 {
5192 if(res == VK_SUCCESS)
5193 printf(" %s: memoryTypeBits=0x%X, memoryTypeIndex=%u\n", testName, memoryTypeBits, memoryTypeIndex);
5194 else
5195 printf(" %s: memoryTypeBits=0x%X, FAILED with res=%d\n", testName, memoryTypeBits, (int32_t)res);
5196 };
5197
5198 // 1: Buffer for copy
5199 {
5200 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5201 bufCreateInfo.size = 65536;
5202 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
5203
5204 VkBuffer buf = VK_NULL_HANDLE;
5205 VkResult res = vkCreateBuffer(g_hDevice, &bufCreateInfo, g_Allocs, &buf);
5206 TEST(res == VK_SUCCESS && buf != VK_NULL_HANDLE);
5207
5208 VkMemoryRequirements memReq = {};
5209 vkGetBufferMemoryRequirements(g_hDevice, buf, &memReq);
5210
5211 VmaAllocationCreateInfo allocCreateInfo = {};
5212 allocCreateInfo.usage = (VmaMemoryUsage)usage;
5213 VmaAllocation alloc = VK_NULL_HANDLE;
5214 VmaAllocationInfo allocInfo = {};
5215 res = vmaAllocateMemoryForBuffer(g_hAllocator, buf, &allocCreateInfo, &alloc, &allocInfo);
5216 if(res == VK_SUCCESS)
5217 {
5218 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
5219 res = vkBindBufferMemory(g_hDevice, buf, allocInfo.deviceMemory, allocInfo.offset);
5220 TEST(res == VK_SUCCESS);
5221 }
5222 printResult("Buffer TRANSFER_DST + TRANSFER_SRC", res, memReq.memoryTypeBits, allocInfo.memoryType);
5223 vmaDestroyBuffer(g_hAllocator, buf, alloc);
5224 }
5225
5226 // 2: Vertex buffer
5227 {
5228 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5229 bufCreateInfo.size = 65536;
5230 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
5231
5232 VkBuffer buf = VK_NULL_HANDLE;
5233 VkResult res = vkCreateBuffer(g_hDevice, &bufCreateInfo, g_Allocs, &buf);
5234 TEST(res == VK_SUCCESS && buf != VK_NULL_HANDLE);
5235
5236 VkMemoryRequirements memReq = {};
5237 vkGetBufferMemoryRequirements(g_hDevice, buf, &memReq);
5238
5239 VmaAllocationCreateInfo allocCreateInfo = {};
5240 allocCreateInfo.usage = (VmaMemoryUsage)usage;
5241 VmaAllocation alloc = VK_NULL_HANDLE;
5242 VmaAllocationInfo allocInfo = {};
5243 res = vmaAllocateMemoryForBuffer(g_hAllocator, buf, &allocCreateInfo, &alloc, &allocInfo);
5244 if(res == VK_SUCCESS)
5245 {
5246 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
5247 res = vkBindBufferMemory(g_hDevice, buf, allocInfo.deviceMemory, allocInfo.offset);
5248 TEST(res == VK_SUCCESS);
5249 }
5250 printResult("Buffer TRANSFER_DST + VERTEX_BUFFER", res, memReq.memoryTypeBits, allocInfo.memoryType);
5251 vmaDestroyBuffer(g_hAllocator, buf, alloc);
5252 }
5253
5254 // 3: Image for copy, OPTIMAL
5255 {
5256 VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
5257 imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
5258 imgCreateInfo.extent.width = 256;
5259 imgCreateInfo.extent.height = 256;
5260 imgCreateInfo.extent.depth = 1;
5261 imgCreateInfo.mipLevels = 1;
5262 imgCreateInfo.arrayLayers = 1;
5263 imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
5264 imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
5265 imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
5266 imgCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
5267 imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
5268
5269 VkImage img = VK_NULL_HANDLE;
5270 VkResult res = vkCreateImage(g_hDevice, &imgCreateInfo, g_Allocs, &img);
5271 TEST(res == VK_SUCCESS && img != VK_NULL_HANDLE);
5272
5273 VkMemoryRequirements memReq = {};
5274 vkGetImageMemoryRequirements(g_hDevice, img, &memReq);
5275
5276 VmaAllocationCreateInfo allocCreateInfo = {};
5277 allocCreateInfo.usage = (VmaMemoryUsage)usage;
5278 VmaAllocation alloc = VK_NULL_HANDLE;
5279 VmaAllocationInfo allocInfo = {};
5280 res = vmaAllocateMemoryForImage(g_hAllocator, img, &allocCreateInfo, &alloc, &allocInfo);
5281 if(res == VK_SUCCESS)
5282 {
5283 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
5284 res = vkBindImageMemory(g_hDevice, img, allocInfo.deviceMemory, allocInfo.offset);
5285 TEST(res == VK_SUCCESS);
5286 }
5287 printResult("Image OPTIMAL TRANSFER_DST + TRANSFER_SRC", res, memReq.memoryTypeBits, allocInfo.memoryType);
5288
5289 vmaDestroyImage(g_hAllocator, img, alloc);
5290 }
5291
5292 // 4: Image SAMPLED, OPTIMAL
5293 {
5294 VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
5295 imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
5296 imgCreateInfo.extent.width = 256;
5297 imgCreateInfo.extent.height = 256;
5298 imgCreateInfo.extent.depth = 1;
5299 imgCreateInfo.mipLevels = 1;
5300 imgCreateInfo.arrayLayers = 1;
5301 imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
5302 imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
5303 imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
5304 imgCreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
5305 imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
5306
5307 VkImage img = VK_NULL_HANDLE;
5308 VkResult res = vkCreateImage(g_hDevice, &imgCreateInfo, g_Allocs, &img);
5309 TEST(res == VK_SUCCESS && img != VK_NULL_HANDLE);
5310
5311 VkMemoryRequirements memReq = {};
5312 vkGetImageMemoryRequirements(g_hDevice, img, &memReq);
5313
5314 VmaAllocationCreateInfo allocCreateInfo = {};
5315 allocCreateInfo.usage = (VmaMemoryUsage)usage;
5316 VmaAllocation alloc = VK_NULL_HANDLE;
5317 VmaAllocationInfo allocInfo = {};
5318 res = vmaAllocateMemoryForImage(g_hAllocator, img, &allocCreateInfo, &alloc, &allocInfo);
5319 if(res == VK_SUCCESS)
5320 {
5321 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
5322 res = vkBindImageMemory(g_hDevice, img, allocInfo.deviceMemory, allocInfo.offset);
5323 TEST(res == VK_SUCCESS);
5324 }
5325 printResult("Image OPTIMAL TRANSFER_DST + SAMPLED", res, memReq.memoryTypeBits, allocInfo.memoryType);
5326 vmaDestroyImage(g_hAllocator, img, alloc);
5327 }
5328
5329 // 5: Image COLOR_ATTACHMENT, OPTIMAL
5330 {
5331 VkImageCreateInfo imgCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
5332 imgCreateInfo.imageType = VK_IMAGE_TYPE_2D;
5333 imgCreateInfo.extent.width = 256;
5334 imgCreateInfo.extent.height = 256;
5335 imgCreateInfo.extent.depth = 1;
5336 imgCreateInfo.mipLevels = 1;
5337 imgCreateInfo.arrayLayers = 1;
5338 imgCreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
5339 imgCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
5340 imgCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
5341 imgCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
5342 imgCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
5343
5344 VkImage img = VK_NULL_HANDLE;
5345 VkResult res = vkCreateImage(g_hDevice, &imgCreateInfo, g_Allocs, &img);
5346 TEST(res == VK_SUCCESS && img != VK_NULL_HANDLE);
5347
5348 VkMemoryRequirements memReq = {};
5349 vkGetImageMemoryRequirements(g_hDevice, img, &memReq);
5350
5351 VmaAllocationCreateInfo allocCreateInfo = {};
5352 allocCreateInfo.usage = (VmaMemoryUsage)usage;
5353 VmaAllocation alloc = VK_NULL_HANDLE;
5354 VmaAllocationInfo allocInfo = {};
5355 res = vmaAllocateMemoryForImage(g_hAllocator, img, &allocCreateInfo, &alloc, &allocInfo);
5356 if(res == VK_SUCCESS)
5357 {
5358 TEST((memReq.memoryTypeBits & (1u << allocInfo.memoryType)) != 0);
5359 res = vkBindImageMemory(g_hDevice, img, allocInfo.deviceMemory, allocInfo.offset);
5360 TEST(res == VK_SUCCESS);
5361 }
5362 printResult("Image OPTIMAL SAMPLED + COLOR_ATTACHMENT", res, memReq.memoryTypeBits, allocInfo.memoryType);
5363 vmaDestroyImage(g_hAllocator, img, alloc);
5364 }
5365 }
5366 }
5367
FindDeviceCoherentMemoryTypeBits()5368 static uint32_t FindDeviceCoherentMemoryTypeBits()
5369 {
5370 VkPhysicalDeviceMemoryProperties memProps;
5371 vkGetPhysicalDeviceMemoryProperties(g_hPhysicalDevice, &memProps);
5372
5373 uint32_t memTypeBits = 0;
5374 for(uint32_t i = 0; i < memProps.memoryTypeCount; ++i)
5375 {
5376 if(memProps.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD)
5377 memTypeBits |= 1u << i;
5378 }
5379 return memTypeBits;
5380 }
5381
TestDeviceCoherentMemory()5382 static void TestDeviceCoherentMemory()
5383 {
5384 if(!VK_AMD_device_coherent_memory_enabled)
5385 return;
5386
5387 uint32_t deviceCoherentMemoryTypeBits = FindDeviceCoherentMemoryTypeBits();
5388 // Extension is enabled, feature is enabled, and the device still doesn't support any such memory type?
5389 // OK then, so it's just fake!
5390 if(deviceCoherentMemoryTypeBits == 0)
5391 return;
5392
5393 wprintf(L"Testing device coherent memory...\n");
5394
5395 // 1. Try to allocate buffer from a memory type that is DEVICE_COHERENT.
5396
5397 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5398 bufCreateInfo.size = 0x10000;
5399 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
5400
5401 VmaAllocationCreateInfo allocCreateInfo = {};
5402 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
5403 allocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD;
5404
5405 AllocInfo alloc = {};
5406 VmaAllocationInfo allocInfo = {};
5407 VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &alloc.m_Buffer, &alloc.m_Allocation, &allocInfo);
5408
5409 // Make sure it succeeded and was really created in such memory type.
5410 TEST(res == VK_SUCCESS);
5411 TEST((1u << allocInfo.memoryType) & deviceCoherentMemoryTypeBits);
5412
5413 alloc.Destroy();
5414
5415 // 2. Try to create a pool in such memory type.
5416 {
5417 VmaPoolCreateInfo poolCreateInfo = {};
5418
5419 res = vmaFindMemoryTypeIndex(g_hAllocator, UINT32_MAX, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex);
5420 TEST(res == VK_SUCCESS);
5421 TEST((1u << poolCreateInfo.memoryTypeIndex) & deviceCoherentMemoryTypeBits);
5422
5423 VmaPool pool = VK_NULL_HANDLE;
5424 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
5425 TEST(res == VK_SUCCESS);
5426
5427 vmaDestroyPool(g_hAllocator, pool);
5428 }
5429
5430 // 3. Try the same with a local allocator created without VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT.
5431
5432 VmaAllocatorCreateInfo allocatorCreateInfo = {};
5433 SetAllocatorCreateInfo(allocatorCreateInfo);
5434 allocatorCreateInfo.flags &= ~VMA_ALLOCATOR_CREATE_AMD_DEVICE_COHERENT_MEMORY_BIT;
5435
5436 VmaAllocator localAllocator = VK_NULL_HANDLE;
5437 res = vmaCreateAllocator(&allocatorCreateInfo, &localAllocator);
5438 TEST(res == VK_SUCCESS && localAllocator);
5439
5440 res = vmaCreateBuffer(localAllocator, &bufCreateInfo, &allocCreateInfo, &alloc.m_Buffer, &alloc.m_Allocation, &allocInfo);
5441
5442 // Make sure it failed.
5443 TEST(res != VK_SUCCESS && !alloc.m_Buffer && !alloc.m_Allocation);
5444
5445 // 4. Try to find memory type.
5446 {
5447 uint32_t memTypeIndex = UINT_MAX;
5448 res = vmaFindMemoryTypeIndex(localAllocator, UINT32_MAX, &allocCreateInfo, &memTypeIndex);
5449 TEST(res != VK_SUCCESS);
5450 }
5451
5452 vmaDestroyAllocator(localAllocator);
5453 }
5454
TestBudget()5455 static void TestBudget()
5456 {
5457 wprintf(L"Testing budget...\n");
5458
5459 static const VkDeviceSize BUF_SIZE = 10ull * 1024 * 1024;
5460 static const uint32_t BUF_COUNT = 4;
5461
5462 const VkPhysicalDeviceMemoryProperties* memProps = {};
5463 vmaGetMemoryProperties(g_hAllocator, &memProps);
5464
5465 for(uint32_t testIndex = 0; testIndex < 2; ++testIndex)
5466 {
5467 vmaSetCurrentFrameIndex(g_hAllocator, ++g_FrameIndex);
5468
5469 VmaBudget budgetBeg[VK_MAX_MEMORY_HEAPS] = {};
5470 vmaGetBudget(g_hAllocator, budgetBeg);
5471
5472 for(uint32_t i = 0; i < memProps->memoryHeapCount; ++i)
5473 {
5474 TEST(budgetBeg[i].budget > 0);
5475 TEST(budgetBeg[i].budget <= memProps->memoryHeaps[i].size);
5476 TEST(budgetBeg[i].allocationBytes <= budgetBeg[i].blockBytes);
5477 }
5478
5479 VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5480 bufInfo.size = BUF_SIZE;
5481 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
5482
5483 VmaAllocationCreateInfo allocCreateInfo = {};
5484 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
5485 if(testIndex == 0)
5486 {
5487 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
5488 }
5489
5490 // CREATE BUFFERS
5491 uint32_t heapIndex = 0;
5492 BufferInfo bufInfos[BUF_COUNT] = {};
5493 for(uint32_t bufIndex = 0; bufIndex < BUF_COUNT; ++bufIndex)
5494 {
5495 VmaAllocationInfo allocInfo;
5496 VkResult res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo,
5497 &bufInfos[bufIndex].Buffer, &bufInfos[bufIndex].Allocation, &allocInfo);
5498 TEST(res == VK_SUCCESS);
5499 if(bufIndex == 0)
5500 {
5501 heapIndex = MemoryTypeToHeap(allocInfo.memoryType);
5502 }
5503 else
5504 {
5505 // All buffers need to fall into the same heap.
5506 TEST(MemoryTypeToHeap(allocInfo.memoryType) == heapIndex);
5507 }
5508 }
5509
5510 VmaBudget budgetWithBufs[VK_MAX_MEMORY_HEAPS] = {};
5511 vmaGetBudget(g_hAllocator, budgetWithBufs);
5512
5513 // DESTROY BUFFERS
5514 for(size_t bufIndex = BUF_COUNT; bufIndex--; )
5515 {
5516 vmaDestroyBuffer(g_hAllocator, bufInfos[bufIndex].Buffer, bufInfos[bufIndex].Allocation);
5517 }
5518
5519 VmaBudget budgetEnd[VK_MAX_MEMORY_HEAPS] = {};
5520 vmaGetBudget(g_hAllocator, budgetEnd);
5521
5522 // CHECK
5523 for(uint32_t i = 0; i < memProps->memoryHeapCount; ++i)
5524 {
5525 TEST(budgetEnd[i].allocationBytes <= budgetEnd[i].blockBytes);
5526 if(i == heapIndex)
5527 {
5528 TEST(budgetEnd[i].allocationBytes == budgetBeg[i].allocationBytes);
5529 TEST(budgetWithBufs[i].allocationBytes == budgetBeg[i].allocationBytes + BUF_SIZE * BUF_COUNT);
5530 TEST(budgetWithBufs[i].blockBytes >= budgetEnd[i].blockBytes);
5531 }
5532 else
5533 {
5534 TEST(budgetEnd[i].allocationBytes == budgetEnd[i].allocationBytes &&
5535 budgetEnd[i].allocationBytes == budgetWithBufs[i].allocationBytes);
5536 TEST(budgetEnd[i].blockBytes == budgetEnd[i].blockBytes &&
5537 budgetEnd[i].blockBytes == budgetWithBufs[i].blockBytes);
5538 }
5539 }
5540 }
5541 }
5542
TestAliasing()5543 static void TestAliasing()
5544 {
5545 wprintf(L"Testing aliasing...\n");
5546
5547 /*
5548 This is just a simple test, more like a code sample to demonstrate it's possible.
5549 */
5550
5551 // A 512x512 texture to be sampled.
5552 VkImageCreateInfo img1CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
5553 img1CreateInfo.imageType = VK_IMAGE_TYPE_2D;
5554 img1CreateInfo.extent.width = 512;
5555 img1CreateInfo.extent.height = 512;
5556 img1CreateInfo.extent.depth = 1;
5557 img1CreateInfo.mipLevels = 10;
5558 img1CreateInfo.arrayLayers = 1;
5559 img1CreateInfo.format = VK_FORMAT_R8G8B8A8_SRGB;
5560 img1CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
5561 img1CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
5562 img1CreateInfo.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT;
5563 img1CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
5564
5565 // A full screen texture to be used as color attachment.
5566 VkImageCreateInfo img2CreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
5567 img2CreateInfo.imageType = VK_IMAGE_TYPE_2D;
5568 img2CreateInfo.extent.width = 1920;
5569 img2CreateInfo.extent.height = 1080;
5570 img2CreateInfo.extent.depth = 1;
5571 img2CreateInfo.mipLevels = 1;
5572 img2CreateInfo.arrayLayers = 1;
5573 img2CreateInfo.format = VK_FORMAT_R8G8B8A8_UNORM;
5574 img2CreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
5575 img2CreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
5576 img2CreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
5577 img2CreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
5578
5579 VkImage img1 = VK_NULL_HANDLE;
5580 ERR_GUARD_VULKAN(vkCreateImage(g_hDevice, &img1CreateInfo, g_Allocs, &img1));
5581 VkImage img2 = VK_NULL_HANDLE;
5582 ERR_GUARD_VULKAN(vkCreateImage(g_hDevice, &img2CreateInfo, g_Allocs, &img2));
5583
5584 VkMemoryRequirements img1MemReq = {};
5585 vkGetImageMemoryRequirements(g_hDevice, img1, &img1MemReq);
5586 VkMemoryRequirements img2MemReq = {};
5587 vkGetImageMemoryRequirements(g_hDevice, img2, &img2MemReq);
5588
5589 VkMemoryRequirements finalMemReq = {};
5590 finalMemReq.size = std::max(img1MemReq.size, img2MemReq.size);
5591 finalMemReq.alignment = std::max(img1MemReq.alignment, img2MemReq.alignment);
5592 finalMemReq.memoryTypeBits = img1MemReq.memoryTypeBits & img2MemReq.memoryTypeBits;
5593 if(finalMemReq.memoryTypeBits != 0)
5594 {
5595 wprintf(L" size: max(%llu, %llu) = %llu\n",
5596 img1MemReq.size, img2MemReq.size, finalMemReq.size);
5597 wprintf(L" alignment: max(%llu, %llu) = %llu\n",
5598 img1MemReq.alignment, img2MemReq.alignment, finalMemReq.alignment);
5599 wprintf(L" memoryTypeBits: %u & %u = %u\n",
5600 img1MemReq.memoryTypeBits, img2MemReq.memoryTypeBits, finalMemReq.memoryTypeBits);
5601
5602 VmaAllocationCreateInfo allocCreateInfo = {};
5603 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
5604
5605 VmaAllocation alloc = VK_NULL_HANDLE;
5606 ERR_GUARD_VULKAN(vmaAllocateMemory(g_hAllocator, &finalMemReq, &allocCreateInfo, &alloc, nullptr));
5607
5608 ERR_GUARD_VULKAN(vmaBindImageMemory(g_hAllocator, alloc, img1));
5609 ERR_GUARD_VULKAN(vmaBindImageMemory(g_hAllocator, alloc, img2));
5610
5611 // You can use img1, img2 here, but not at the same time!
5612
5613 vmaFreeMemory(g_hAllocator, alloc);
5614 }
5615 else
5616 {
5617 wprintf(L" Textures cannot alias!\n");
5618 }
5619
5620 vkDestroyImage(g_hDevice, img2, g_Allocs);
5621 vkDestroyImage(g_hDevice, img1, g_Allocs);
5622 }
5623
TestMapping()5624 static void TestMapping()
5625 {
5626 wprintf(L"Testing mapping...\n");
5627
5628 VkResult res;
5629 uint32_t memTypeIndex = UINT32_MAX;
5630
5631 enum TEST
5632 {
5633 TEST_NORMAL,
5634 TEST_POOL,
5635 TEST_DEDICATED,
5636 TEST_COUNT
5637 };
5638 for(uint32_t testIndex = 0; testIndex < TEST_COUNT; ++testIndex)
5639 {
5640 VmaPool pool = nullptr;
5641 if(testIndex == TEST_POOL)
5642 {
5643 TEST(memTypeIndex != UINT32_MAX);
5644 VmaPoolCreateInfo poolInfo = {};
5645 poolInfo.memoryTypeIndex = memTypeIndex;
5646 res = vmaCreatePool(g_hAllocator, &poolInfo, &pool);
5647 TEST(res == VK_SUCCESS);
5648 }
5649
5650 VkBufferCreateInfo bufInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5651 bufInfo.size = 0x10000;
5652 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
5653
5654 VmaAllocationCreateInfo allocCreateInfo = {};
5655 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
5656 allocCreateInfo.pool = pool;
5657 if(testIndex == TEST_DEDICATED)
5658 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
5659
5660 VmaAllocationInfo allocInfo;
5661
5662 // Mapped manually
5663
5664 // Create 2 buffers.
5665 BufferInfo bufferInfos[3];
5666 for(size_t i = 0; i < 2; ++i)
5667 {
5668 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo,
5669 &bufferInfos[i].Buffer, &bufferInfos[i].Allocation, &allocInfo);
5670 TEST(res == VK_SUCCESS);
5671 TEST(allocInfo.pMappedData == nullptr);
5672 memTypeIndex = allocInfo.memoryType;
5673 }
5674
5675 // Map buffer 0.
5676 char* data00 = nullptr;
5677 res = vmaMapMemory(g_hAllocator, bufferInfos[0].Allocation, (void**)&data00);
5678 TEST(res == VK_SUCCESS && data00 != nullptr);
5679 data00[0xFFFF] = data00[0];
5680
5681 // Map buffer 0 second time.
5682 char* data01 = nullptr;
5683 res = vmaMapMemory(g_hAllocator, bufferInfos[0].Allocation, (void**)&data01);
5684 TEST(res == VK_SUCCESS && data01 == data00);
5685
5686 // Map buffer 1.
5687 char* data1 = nullptr;
5688 res = vmaMapMemory(g_hAllocator, bufferInfos[1].Allocation, (void**)&data1);
5689 TEST(res == VK_SUCCESS && data1 != nullptr);
5690 TEST(!MemoryRegionsOverlap(data00, (size_t)bufInfo.size, data1, (size_t)bufInfo.size));
5691 data1[0xFFFF] = data1[0];
5692
5693 // Unmap buffer 0 two times.
5694 vmaUnmapMemory(g_hAllocator, bufferInfos[0].Allocation);
5695 vmaUnmapMemory(g_hAllocator, bufferInfos[0].Allocation);
5696 vmaGetAllocationInfo(g_hAllocator, bufferInfos[0].Allocation, &allocInfo);
5697 TEST(allocInfo.pMappedData == nullptr);
5698
5699 // Unmap buffer 1.
5700 vmaUnmapMemory(g_hAllocator, bufferInfos[1].Allocation);
5701 vmaGetAllocationInfo(g_hAllocator, bufferInfos[1].Allocation, &allocInfo);
5702 TEST(allocInfo.pMappedData == nullptr);
5703
5704 // Create 3rd buffer - persistently mapped.
5705 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
5706 res = vmaCreateBuffer(g_hAllocator, &bufInfo, &allocCreateInfo,
5707 &bufferInfos[2].Buffer, &bufferInfos[2].Allocation, &allocInfo);
5708 TEST(res == VK_SUCCESS && allocInfo.pMappedData != nullptr);
5709
5710 // Map buffer 2.
5711 char* data2 = nullptr;
5712 res = vmaMapMemory(g_hAllocator, bufferInfos[2].Allocation, (void**)&data2);
5713 TEST(res == VK_SUCCESS && data2 == allocInfo.pMappedData);
5714 data2[0xFFFF] = data2[0];
5715
5716 // Unmap buffer 2.
5717 vmaUnmapMemory(g_hAllocator, bufferInfos[2].Allocation);
5718 vmaGetAllocationInfo(g_hAllocator, bufferInfos[2].Allocation, &allocInfo);
5719 TEST(allocInfo.pMappedData == data2);
5720
5721 // Destroy all buffers.
5722 for(size_t i = 3; i--; )
5723 vmaDestroyBuffer(g_hAllocator, bufferInfos[i].Buffer, bufferInfos[i].Allocation);
5724
5725 vmaDestroyPool(g_hAllocator, pool);
5726 }
5727 }
5728
5729 // Test CREATE_MAPPED with required DEVICE_LOCAL. There was a bug with it.
TestDeviceLocalMapped()5730 static void TestDeviceLocalMapped()
5731 {
5732 VkResult res;
5733
5734 for(uint32_t testIndex = 0; testIndex < 3; ++testIndex)
5735 {
5736 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5737 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
5738 bufCreateInfo.size = 4096;
5739
5740 VmaPool pool = VK_NULL_HANDLE;
5741 VmaAllocationCreateInfo allocCreateInfo = {};
5742 allocCreateInfo.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
5743 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
5744 if(testIndex == 2)
5745 {
5746 VmaPoolCreateInfo poolCreateInfo = {};
5747 res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &poolCreateInfo.memoryTypeIndex);
5748 TEST(res == VK_SUCCESS);
5749 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
5750 TEST(res == VK_SUCCESS);
5751 allocCreateInfo.pool = pool;
5752 }
5753 else if(testIndex == 1)
5754 {
5755 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_CAN_MAKE_OTHER_LOST_BIT;
5756 }
5757
5758 VkBuffer buf = VK_NULL_HANDLE;
5759 VmaAllocation alloc = VK_NULL_HANDLE;
5760 VmaAllocationInfo allocInfo = {};
5761 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo, &buf, &alloc, &allocInfo);
5762 TEST(res == VK_SUCCESS && alloc);
5763
5764 VkMemoryPropertyFlags memTypeFlags = 0;
5765 vmaGetMemoryTypeProperties(g_hAllocator, allocInfo.memoryType, &memTypeFlags);
5766 const bool shouldBeMapped = (memTypeFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
5767 TEST((allocInfo.pMappedData != nullptr) == shouldBeMapped);
5768
5769 vmaDestroyBuffer(g_hAllocator, buf, alloc);
5770 vmaDestroyPool(g_hAllocator, pool);
5771 }
5772 }
5773
TestMappingMultithreaded()5774 static void TestMappingMultithreaded()
5775 {
5776 wprintf(L"Testing mapping multithreaded...\n");
5777
5778 static const uint32_t threadCount = 16;
5779 static const uint32_t bufferCount = 1024;
5780 static const uint32_t threadBufferCount = bufferCount / threadCount;
5781
5782 VkResult res;
5783 volatile uint32_t memTypeIndex = UINT32_MAX;
5784
5785 enum TEST
5786 {
5787 TEST_NORMAL,
5788 TEST_POOL,
5789 TEST_DEDICATED,
5790 TEST_COUNT
5791 };
5792 for(uint32_t testIndex = 0; testIndex < TEST_COUNT; ++testIndex)
5793 {
5794 VmaPool pool = nullptr;
5795 if(testIndex == TEST_POOL)
5796 {
5797 TEST(memTypeIndex != UINT32_MAX);
5798 VmaPoolCreateInfo poolInfo = {};
5799 poolInfo.memoryTypeIndex = memTypeIndex;
5800 res = vmaCreatePool(g_hAllocator, &poolInfo, &pool);
5801 TEST(res == VK_SUCCESS);
5802 }
5803
5804 VkBufferCreateInfo bufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
5805 bufCreateInfo.size = 0x10000;
5806 bufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
5807
5808 VmaAllocationCreateInfo allocCreateInfo = {};
5809 allocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
5810 allocCreateInfo.pool = pool;
5811 if(testIndex == TEST_DEDICATED)
5812 allocCreateInfo.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
5813
5814 std::thread threads[threadCount];
5815 for(uint32_t threadIndex = 0; threadIndex < threadCount; ++threadIndex)
5816 {
5817 threads[threadIndex] = std::thread([=, &memTypeIndex](){
5818 // ======== THREAD FUNCTION ========
5819
5820 RandomNumberGenerator rand{threadIndex};
5821
5822 enum class MODE
5823 {
5824 // Don't map this buffer at all.
5825 DONT_MAP,
5826 // Map and quickly unmap.
5827 MAP_FOR_MOMENT,
5828 // Map and unmap before destruction.
5829 MAP_FOR_LONGER,
5830 // Map two times. Quickly unmap, second unmap before destruction.
5831 MAP_TWO_TIMES,
5832 // Create this buffer as persistently mapped.
5833 PERSISTENTLY_MAPPED,
5834 COUNT
5835 };
5836 std::vector<BufferInfo> bufInfos{threadBufferCount};
5837 std::vector<MODE> bufModes{threadBufferCount};
5838
5839 for(uint32_t bufferIndex = 0; bufferIndex < threadBufferCount; ++bufferIndex)
5840 {
5841 BufferInfo& bufInfo = bufInfos[bufferIndex];
5842 const MODE mode = (MODE)(rand.Generate() % (uint32_t)MODE::COUNT);
5843 bufModes[bufferIndex] = mode;
5844
5845 VmaAllocationCreateInfo localAllocCreateInfo = allocCreateInfo;
5846 if(mode == MODE::PERSISTENTLY_MAPPED)
5847 localAllocCreateInfo.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
5848
5849 VmaAllocationInfo allocInfo;
5850 VkResult res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &localAllocCreateInfo,
5851 &bufInfo.Buffer, &bufInfo.Allocation, &allocInfo);
5852 TEST(res == VK_SUCCESS);
5853
5854 if(memTypeIndex == UINT32_MAX)
5855 memTypeIndex = allocInfo.memoryType;
5856
5857 char* data = nullptr;
5858
5859 if(mode == MODE::PERSISTENTLY_MAPPED)
5860 {
5861 data = (char*)allocInfo.pMappedData;
5862 TEST(data != nullptr);
5863 }
5864 else if(mode == MODE::MAP_FOR_MOMENT || mode == MODE::MAP_FOR_LONGER ||
5865 mode == MODE::MAP_TWO_TIMES)
5866 {
5867 TEST(data == nullptr);
5868 res = vmaMapMemory(g_hAllocator, bufInfo.Allocation, (void**)&data);
5869 TEST(res == VK_SUCCESS && data != nullptr);
5870
5871 if(mode == MODE::MAP_TWO_TIMES)
5872 {
5873 char* data2 = nullptr;
5874 res = vmaMapMemory(g_hAllocator, bufInfo.Allocation, (void**)&data2);
5875 TEST(res == VK_SUCCESS && data2 == data);
5876 }
5877 }
5878 else if(mode == MODE::DONT_MAP)
5879 {
5880 TEST(allocInfo.pMappedData == nullptr);
5881 }
5882 else
5883 TEST(0);
5884
5885 // Test if reading and writing from the beginning and end of mapped memory doesn't crash.
5886 if(data)
5887 data[0xFFFF] = data[0];
5888
5889 if(mode == MODE::MAP_FOR_MOMENT || mode == MODE::MAP_TWO_TIMES)
5890 {
5891 vmaUnmapMemory(g_hAllocator, bufInfo.Allocation);
5892
5893 VmaAllocationInfo allocInfo;
5894 vmaGetAllocationInfo(g_hAllocator, bufInfo.Allocation, &allocInfo);
5895 if(mode == MODE::MAP_FOR_MOMENT)
5896 TEST(allocInfo.pMappedData == nullptr);
5897 else
5898 TEST(allocInfo.pMappedData == data);
5899 }
5900
5901 switch(rand.Generate() % 3)
5902 {
5903 case 0: Sleep(0); break; // Yield.
5904 case 1: Sleep(10); break; // 10 ms
5905 // default: No sleep.
5906 }
5907
5908 // Test if reading and writing from the beginning and end of mapped memory doesn't crash.
5909 if(data)
5910 data[0xFFFF] = data[0];
5911 }
5912
5913 for(size_t bufferIndex = threadBufferCount; bufferIndex--; )
5914 {
5915 if(bufModes[bufferIndex] == MODE::MAP_FOR_LONGER ||
5916 bufModes[bufferIndex] == MODE::MAP_TWO_TIMES)
5917 {
5918 vmaUnmapMemory(g_hAllocator, bufInfos[bufferIndex].Allocation);
5919
5920 VmaAllocationInfo allocInfo;
5921 vmaGetAllocationInfo(g_hAllocator, bufInfos[bufferIndex].Allocation, &allocInfo);
5922 TEST(allocInfo.pMappedData == nullptr);
5923 }
5924
5925 vmaDestroyBuffer(g_hAllocator, bufInfos[bufferIndex].Buffer, bufInfos[bufferIndex].Allocation);
5926 }
5927 });
5928 }
5929
5930 for(uint32_t threadIndex = 0; threadIndex < threadCount; ++threadIndex)
5931 threads[threadIndex].join();
5932
5933 vmaDestroyPool(g_hAllocator, pool);
5934 }
5935 }
5936
WriteMainTestResultHeader(FILE * file)5937 static void WriteMainTestResultHeader(FILE* file)
5938 {
5939 fprintf(file,
5940 "Code,Time,"
5941 "Threads,Buffers and images,Sizes,Operations,Allocation strategy,Free order,"
5942 "Total Time (us),"
5943 "Allocation Time Min (us),"
5944 "Allocation Time Avg (us),"
5945 "Allocation Time Max (us),"
5946 "Deallocation Time Min (us),"
5947 "Deallocation Time Avg (us),"
5948 "Deallocation Time Max (us),"
5949 "Total Memory Allocated (B),"
5950 "Free Range Size Avg (B),"
5951 "Free Range Size Max (B)\n");
5952 }
5953
WriteMainTestResult(FILE * file,const char * codeDescription,const char * testDescription,const Config & config,const Result & result)5954 static void WriteMainTestResult(
5955 FILE* file,
5956 const char* codeDescription,
5957 const char* testDescription,
5958 const Config& config, const Result& result)
5959 {
5960 float totalTimeSeconds = ToFloatSeconds(result.TotalTime);
5961 float allocationTimeMinSeconds = ToFloatSeconds(result.AllocationTimeMin);
5962 float allocationTimeAvgSeconds = ToFloatSeconds(result.AllocationTimeAvg);
5963 float allocationTimeMaxSeconds = ToFloatSeconds(result.AllocationTimeMax);
5964 float deallocationTimeMinSeconds = ToFloatSeconds(result.DeallocationTimeMin);
5965 float deallocationTimeAvgSeconds = ToFloatSeconds(result.DeallocationTimeAvg);
5966 float deallocationTimeMaxSeconds = ToFloatSeconds(result.DeallocationTimeMax);
5967
5968 std::string currTime;
5969 CurrentTimeToStr(currTime);
5970
5971 fprintf(file,
5972 "%s,%s,%s,"
5973 "%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%I64u,%I64u,%I64u\n",
5974 codeDescription,
5975 currTime.c_str(),
5976 testDescription,
5977 totalTimeSeconds * 1e6f,
5978 allocationTimeMinSeconds * 1e6f,
5979 allocationTimeAvgSeconds * 1e6f,
5980 allocationTimeMaxSeconds * 1e6f,
5981 deallocationTimeMinSeconds * 1e6f,
5982 deallocationTimeAvgSeconds * 1e6f,
5983 deallocationTimeMaxSeconds * 1e6f,
5984 result.TotalMemoryAllocated,
5985 result.FreeRangeSizeAvg,
5986 result.FreeRangeSizeMax);
5987 }
5988
WritePoolTestResultHeader(FILE * file)5989 static void WritePoolTestResultHeader(FILE* file)
5990 {
5991 fprintf(file,
5992 "Code,Test,Time,"
5993 "Config,"
5994 "Total Time (us),"
5995 "Allocation Time Min (us),"
5996 "Allocation Time Avg (us),"
5997 "Allocation Time Max (us),"
5998 "Deallocation Time Min (us),"
5999 "Deallocation Time Avg (us),"
6000 "Deallocation Time Max (us),"
6001 "Lost Allocation Count,"
6002 "Lost Allocation Total Size (B),"
6003 "Failed Allocation Count,"
6004 "Failed Allocation Total Size (B)\n");
6005 }
6006
WritePoolTestResult(FILE * file,const char * codeDescription,const char * testDescription,const PoolTestConfig & config,const PoolTestResult & result)6007 static void WritePoolTestResult(
6008 FILE* file,
6009 const char* codeDescription,
6010 const char* testDescription,
6011 const PoolTestConfig& config,
6012 const PoolTestResult& result)
6013 {
6014 float totalTimeSeconds = ToFloatSeconds(result.TotalTime);
6015 float allocationTimeMinSeconds = ToFloatSeconds(result.AllocationTimeMin);
6016 float allocationTimeAvgSeconds = ToFloatSeconds(result.AllocationTimeAvg);
6017 float allocationTimeMaxSeconds = ToFloatSeconds(result.AllocationTimeMax);
6018 float deallocationTimeMinSeconds = ToFloatSeconds(result.DeallocationTimeMin);
6019 float deallocationTimeAvgSeconds = ToFloatSeconds(result.DeallocationTimeAvg);
6020 float deallocationTimeMaxSeconds = ToFloatSeconds(result.DeallocationTimeMax);
6021
6022 std::string currTime;
6023 CurrentTimeToStr(currTime);
6024
6025 fprintf(file,
6026 "%s,%s,%s,"
6027 "ThreadCount=%u PoolSize=%llu FrameCount=%u TotalItemCount=%u UsedItemCount=%u...%u ItemsToMakeUnusedPercent=%u,"
6028 "%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%.2f,%I64u,%I64u,%I64u,%I64u\n",
6029 // General
6030 codeDescription,
6031 testDescription,
6032 currTime.c_str(),
6033 // Config
6034 config.ThreadCount,
6035 (unsigned long long)config.PoolSize,
6036 config.FrameCount,
6037 config.TotalItemCount,
6038 config.UsedItemCountMin,
6039 config.UsedItemCountMax,
6040 config.ItemsToMakeUnusedPercent,
6041 // Results
6042 totalTimeSeconds * 1e6f,
6043 allocationTimeMinSeconds * 1e6f,
6044 allocationTimeAvgSeconds * 1e6f,
6045 allocationTimeMaxSeconds * 1e6f,
6046 deallocationTimeMinSeconds * 1e6f,
6047 deallocationTimeAvgSeconds * 1e6f,
6048 deallocationTimeMaxSeconds * 1e6f,
6049 result.LostAllocationCount,
6050 result.LostAllocationTotalSize,
6051 result.FailedAllocationCount,
6052 result.FailedAllocationTotalSize);
6053 }
6054
PerformCustomMainTest(FILE * file)6055 static void PerformCustomMainTest(FILE* file)
6056 {
6057 Config config{};
6058 config.RandSeed = 65735476;
6059 //config.MaxBytesToAllocate = 4ull * 1024 * 1024; // 4 MB
6060 config.MaxBytesToAllocate = 4ull * 1024 * 1024 * 1024; // 4 GB
6061 config.MemUsageProbability[0] = 1; // VMA_MEMORY_USAGE_GPU_ONLY
6062 config.FreeOrder = FREE_ORDER::FORWARD;
6063 config.ThreadCount = 16;
6064 config.ThreadsUsingCommonAllocationsProbabilityPercent = 50;
6065 config.AllocationStrategy = 0;
6066
6067 // Buffers
6068 //config.AllocationSizes.push_back({4, 16, 1024});
6069 config.AllocationSizes.push_back({4, 0x10000, 0xA00000}); // 64 KB ... 10 MB
6070
6071 // Images
6072 //config.AllocationSizes.push_back({4, 0, 0, 4, 32});
6073 //config.AllocationSizes.push_back({4, 0, 0, 256, 2048});
6074
6075 config.BeginBytesToAllocate = config.MaxBytesToAllocate * 5 / 100;
6076 config.AdditionalOperationCount = 1024;
6077
6078 Result result{};
6079 VkResult res = MainTest(result, config);
6080 TEST(res == VK_SUCCESS);
6081 WriteMainTestResult(file, "Foo", "CustomTest", config, result);
6082 }
6083
PerformCustomPoolTest(FILE * file)6084 static void PerformCustomPoolTest(FILE* file)
6085 {
6086 PoolTestConfig config;
6087 config.PoolSize = 100 * 1024 * 1024;
6088 config.RandSeed = 2345764;
6089 config.ThreadCount = 1;
6090 config.FrameCount = 200;
6091 config.ItemsToMakeUnusedPercent = 2;
6092
6093 AllocationSize allocSize = {};
6094 allocSize.BufferSizeMin = 1024;
6095 allocSize.BufferSizeMax = 1024 * 1024;
6096 allocSize.Probability = 1;
6097 config.AllocationSizes.push_back(allocSize);
6098
6099 allocSize.BufferSizeMin = 0;
6100 allocSize.BufferSizeMax = 0;
6101 allocSize.ImageSizeMin = 128;
6102 allocSize.ImageSizeMax = 1024;
6103 allocSize.Probability = 1;
6104 config.AllocationSizes.push_back(allocSize);
6105
6106 config.PoolSize = config.CalcAvgResourceSize() * 200;
6107 config.UsedItemCountMax = 160;
6108 config.TotalItemCount = config.UsedItemCountMax * 10;
6109 config.UsedItemCountMin = config.UsedItemCountMax * 80 / 100;
6110
6111 PoolTestResult result = {};
6112 TestPool_Benchmark(result, config);
6113
6114 WritePoolTestResult(file, "Code desc", "Test desc", config, result);
6115 }
6116
PerformMainTests(FILE * file)6117 static void PerformMainTests(FILE* file)
6118 {
6119 wprintf(L"MAIN TESTS:\n");
6120
6121 uint32_t repeatCount = 1;
6122 if(ConfigType >= CONFIG_TYPE_MAXIMUM) repeatCount = 3;
6123
6124 Config config{};
6125 config.RandSeed = 65735476;
6126 config.MemUsageProbability[0] = 1; // VMA_MEMORY_USAGE_GPU_ONLY
6127 config.FreeOrder = FREE_ORDER::FORWARD;
6128
6129 size_t threadCountCount = 1;
6130 switch(ConfigType)
6131 {
6132 case CONFIG_TYPE_MINIMUM: threadCountCount = 1; break;
6133 case CONFIG_TYPE_SMALL: threadCountCount = 2; break;
6134 case CONFIG_TYPE_AVERAGE: threadCountCount = 3; break;
6135 case CONFIG_TYPE_LARGE: threadCountCount = 5; break;
6136 case CONFIG_TYPE_MAXIMUM: threadCountCount = 7; break;
6137 default: assert(0);
6138 }
6139
6140 const size_t strategyCount = GetAllocationStrategyCount();
6141
6142 for(size_t threadCountIndex = 0; threadCountIndex < threadCountCount; ++threadCountIndex)
6143 {
6144 std::string desc1;
6145
6146 switch(threadCountIndex)
6147 {
6148 case 0:
6149 desc1 += "1_thread";
6150 config.ThreadCount = 1;
6151 config.ThreadsUsingCommonAllocationsProbabilityPercent = 0;
6152 break;
6153 case 1:
6154 desc1 += "16_threads+0%_common";
6155 config.ThreadCount = 16;
6156 config.ThreadsUsingCommonAllocationsProbabilityPercent = 0;
6157 break;
6158 case 2:
6159 desc1 += "16_threads+50%_common";
6160 config.ThreadCount = 16;
6161 config.ThreadsUsingCommonAllocationsProbabilityPercent = 50;
6162 break;
6163 case 3:
6164 desc1 += "16_threads+100%_common";
6165 config.ThreadCount = 16;
6166 config.ThreadsUsingCommonAllocationsProbabilityPercent = 100;
6167 break;
6168 case 4:
6169 desc1 += "2_threads+0%_common";
6170 config.ThreadCount = 2;
6171 config.ThreadsUsingCommonAllocationsProbabilityPercent = 0;
6172 break;
6173 case 5:
6174 desc1 += "2_threads+50%_common";
6175 config.ThreadCount = 2;
6176 config.ThreadsUsingCommonAllocationsProbabilityPercent = 50;
6177 break;
6178 case 6:
6179 desc1 += "2_threads+100%_common";
6180 config.ThreadCount = 2;
6181 config.ThreadsUsingCommonAllocationsProbabilityPercent = 100;
6182 break;
6183 default:
6184 assert(0);
6185 }
6186
6187 // 0 = buffers, 1 = images, 2 = buffers and images
6188 size_t buffersVsImagesCount = 2;
6189 if(ConfigType >= CONFIG_TYPE_LARGE) ++buffersVsImagesCount;
6190 for(size_t buffersVsImagesIndex = 0; buffersVsImagesIndex < buffersVsImagesCount; ++buffersVsImagesIndex)
6191 {
6192 std::string desc2 = desc1;
6193 switch(buffersVsImagesIndex)
6194 {
6195 case 0: desc2 += ",Buffers"; break;
6196 case 1: desc2 += ",Images"; break;
6197 case 2: desc2 += ",Buffers+Images"; break;
6198 default: assert(0);
6199 }
6200
6201 // 0 = small, 1 = large, 2 = small and large
6202 size_t smallVsLargeCount = 2;
6203 if(ConfigType >= CONFIG_TYPE_LARGE) ++smallVsLargeCount;
6204 for(size_t smallVsLargeIndex = 0; smallVsLargeIndex < smallVsLargeCount; ++smallVsLargeIndex)
6205 {
6206 std::string desc3 = desc2;
6207 switch(smallVsLargeIndex)
6208 {
6209 case 0: desc3 += ",Small"; break;
6210 case 1: desc3 += ",Large"; break;
6211 case 2: desc3 += ",Small+Large"; break;
6212 default: assert(0);
6213 }
6214
6215 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
6216 config.MaxBytesToAllocate = 4ull * 1024 * 1024 * 1024; // 4 GB
6217 else
6218 config.MaxBytesToAllocate = 4ull * 1024 * 1024;
6219
6220 // 0 = varying sizes min...max, 1 = set of constant sizes
6221 size_t constantSizesCount = 1;
6222 if(ConfigType >= CONFIG_TYPE_SMALL) ++constantSizesCount;
6223 for(size_t constantSizesIndex = 0; constantSizesIndex < constantSizesCount; ++constantSizesIndex)
6224 {
6225 std::string desc4 = desc3;
6226 switch(constantSizesIndex)
6227 {
6228 case 0: desc4 += " Varying_sizes"; break;
6229 case 1: desc4 += " Constant_sizes"; break;
6230 default: assert(0);
6231 }
6232
6233 config.AllocationSizes.clear();
6234 // Buffers present
6235 if(buffersVsImagesIndex == 0 || buffersVsImagesIndex == 2)
6236 {
6237 // Small
6238 if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
6239 {
6240 // Varying size
6241 if(constantSizesIndex == 0)
6242 config.AllocationSizes.push_back({4, 16, 1024});
6243 // Constant sizes
6244 else
6245 {
6246 config.AllocationSizes.push_back({1, 16, 16});
6247 config.AllocationSizes.push_back({1, 64, 64});
6248 config.AllocationSizes.push_back({1, 256, 256});
6249 config.AllocationSizes.push_back({1, 1024, 1024});
6250 }
6251 }
6252 // Large
6253 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
6254 {
6255 // Varying size
6256 if(constantSizesIndex == 0)
6257 config.AllocationSizes.push_back({4, 0x10000, 0xA00000}); // 64 KB ... 10 MB
6258 // Constant sizes
6259 else
6260 {
6261 config.AllocationSizes.push_back({1, 0x10000, 0x10000});
6262 config.AllocationSizes.push_back({1, 0x80000, 0x80000});
6263 config.AllocationSizes.push_back({1, 0x200000, 0x200000});
6264 config.AllocationSizes.push_back({1, 0xA00000, 0xA00000});
6265 }
6266 }
6267 }
6268 // Images present
6269 if(buffersVsImagesIndex == 1 || buffersVsImagesIndex == 2)
6270 {
6271 // Small
6272 if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
6273 {
6274 // Varying size
6275 if(constantSizesIndex == 0)
6276 config.AllocationSizes.push_back({4, 0, 0, 4, 32});
6277 // Constant sizes
6278 else
6279 {
6280 config.AllocationSizes.push_back({1, 0, 0, 4, 4});
6281 config.AllocationSizes.push_back({1, 0, 0, 8, 8});
6282 config.AllocationSizes.push_back({1, 0, 0, 16, 16});
6283 config.AllocationSizes.push_back({1, 0, 0, 32, 32});
6284 }
6285 }
6286 // Large
6287 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
6288 {
6289 // Varying size
6290 if(constantSizesIndex == 0)
6291 config.AllocationSizes.push_back({4, 0, 0, 256, 2048});
6292 // Constant sizes
6293 else
6294 {
6295 config.AllocationSizes.push_back({1, 0, 0, 256, 256});
6296 config.AllocationSizes.push_back({1, 0, 0, 512, 512});
6297 config.AllocationSizes.push_back({1, 0, 0, 1024, 1024});
6298 config.AllocationSizes.push_back({1, 0, 0, 2048, 2048});
6299 }
6300 }
6301 }
6302
6303 // 0 = 100%, additional_operations = 0, 1 = 50%, 2 = 5%, 3 = 95% additional_operations = a lot
6304 size_t beginBytesToAllocateCount = 1;
6305 if(ConfigType >= CONFIG_TYPE_SMALL) ++beginBytesToAllocateCount;
6306 if(ConfigType >= CONFIG_TYPE_AVERAGE) ++beginBytesToAllocateCount;
6307 if(ConfigType >= CONFIG_TYPE_LARGE) ++beginBytesToAllocateCount;
6308 for(size_t beginBytesToAllocateIndex = 0; beginBytesToAllocateIndex < beginBytesToAllocateCount; ++beginBytesToAllocateIndex)
6309 {
6310 std::string desc5 = desc4;
6311
6312 switch(beginBytesToAllocateIndex)
6313 {
6314 case 0:
6315 desc5 += ",Allocate_100%";
6316 config.BeginBytesToAllocate = config.MaxBytesToAllocate;
6317 config.AdditionalOperationCount = 0;
6318 break;
6319 case 1:
6320 desc5 += ",Allocate_50%+Operations";
6321 config.BeginBytesToAllocate = config.MaxBytesToAllocate * 50 / 100;
6322 config.AdditionalOperationCount = 1024;
6323 break;
6324 case 2:
6325 desc5 += ",Allocate_5%+Operations";
6326 config.BeginBytesToAllocate = config.MaxBytesToAllocate * 5 / 100;
6327 config.AdditionalOperationCount = 1024;
6328 break;
6329 case 3:
6330 desc5 += ",Allocate_95%+Operations";
6331 config.BeginBytesToAllocate = config.MaxBytesToAllocate * 95 / 100;
6332 config.AdditionalOperationCount = 1024;
6333 break;
6334 default:
6335 assert(0);
6336 }
6337
6338 for(size_t strategyIndex = 0; strategyIndex < strategyCount; ++strategyIndex)
6339 {
6340 std::string desc6 = desc5;
6341 switch(strategyIndex)
6342 {
6343 case 0:
6344 desc6 += ",BestFit";
6345 config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_BEST_FIT_BIT;
6346 break;
6347 case 1:
6348 desc6 += ",WorstFit";
6349 config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_WORST_FIT_BIT;
6350 break;
6351 case 2:
6352 desc6 += ",FirstFit";
6353 config.AllocationStrategy = VMA_ALLOCATION_CREATE_STRATEGY_FIRST_FIT_BIT;
6354 break;
6355 default:
6356 assert(0);
6357 }
6358
6359 desc6 += ',';
6360 desc6 += FREE_ORDER_NAMES[(uint32_t)config.FreeOrder];
6361
6362 const char* testDescription = desc6.c_str();
6363
6364 for(size_t repeat = 0; repeat < repeatCount; ++repeat)
6365 {
6366 printf("%s #%u\n", testDescription, (uint32_t)repeat);
6367
6368 Result result{};
6369 VkResult res = MainTest(result, config);
6370 TEST(res == VK_SUCCESS);
6371 if(file)
6372 {
6373 WriteMainTestResult(file, CODE_DESCRIPTION, testDescription, config, result);
6374 }
6375 }
6376 }
6377 }
6378 }
6379 }
6380 }
6381 }
6382 }
6383
PerformPoolTests(FILE * file)6384 static void PerformPoolTests(FILE* file)
6385 {
6386 wprintf(L"POOL TESTS:\n");
6387
6388 const size_t AVG_RESOURCES_PER_POOL = 300;
6389
6390 uint32_t repeatCount = 1;
6391 if(ConfigType >= CONFIG_TYPE_MAXIMUM) repeatCount = 3;
6392
6393 PoolTestConfig config{};
6394 config.RandSeed = 2346343;
6395 config.FrameCount = 200;
6396 config.ItemsToMakeUnusedPercent = 2;
6397
6398 size_t threadCountCount = 1;
6399 switch(ConfigType)
6400 {
6401 case CONFIG_TYPE_MINIMUM: threadCountCount = 1; break;
6402 case CONFIG_TYPE_SMALL: threadCountCount = 2; break;
6403 case CONFIG_TYPE_AVERAGE: threadCountCount = 2; break;
6404 case CONFIG_TYPE_LARGE: threadCountCount = 3; break;
6405 case CONFIG_TYPE_MAXIMUM: threadCountCount = 3; break;
6406 default: assert(0);
6407 }
6408 for(size_t threadCountIndex = 0; threadCountIndex < threadCountCount; ++threadCountIndex)
6409 {
6410 std::string desc1;
6411
6412 switch(threadCountIndex)
6413 {
6414 case 0:
6415 desc1 += "1_thread";
6416 config.ThreadCount = 1;
6417 break;
6418 case 1:
6419 desc1 += "16_threads";
6420 config.ThreadCount = 16;
6421 break;
6422 case 2:
6423 desc1 += "2_threads";
6424 config.ThreadCount = 2;
6425 break;
6426 default:
6427 assert(0);
6428 }
6429
6430 // 0 = buffers, 1 = images, 2 = buffers and images
6431 size_t buffersVsImagesCount = 2;
6432 if(ConfigType >= CONFIG_TYPE_LARGE) ++buffersVsImagesCount;
6433 for(size_t buffersVsImagesIndex = 0; buffersVsImagesIndex < buffersVsImagesCount; ++buffersVsImagesIndex)
6434 {
6435 std::string desc2 = desc1;
6436 switch(buffersVsImagesIndex)
6437 {
6438 case 0: desc2 += " Buffers"; break;
6439 case 1: desc2 += " Images"; break;
6440 case 2: desc2 += " Buffers+Images"; break;
6441 default: assert(0);
6442 }
6443
6444 // 0 = small, 1 = large, 2 = small and large
6445 size_t smallVsLargeCount = 2;
6446 if(ConfigType >= CONFIG_TYPE_LARGE) ++smallVsLargeCount;
6447 for(size_t smallVsLargeIndex = 0; smallVsLargeIndex < smallVsLargeCount; ++smallVsLargeIndex)
6448 {
6449 std::string desc3 = desc2;
6450 switch(smallVsLargeIndex)
6451 {
6452 case 0: desc3 += " Small"; break;
6453 case 1: desc3 += " Large"; break;
6454 case 2: desc3 += " Small+Large"; break;
6455 default: assert(0);
6456 }
6457
6458 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
6459 config.PoolSize = 6ull * 1024 * 1024 * 1024; // 6 GB
6460 else
6461 config.PoolSize = 4ull * 1024 * 1024;
6462
6463 // 0 = varying sizes min...max, 1 = set of constant sizes
6464 size_t constantSizesCount = 1;
6465 if(ConfigType >= CONFIG_TYPE_SMALL) ++constantSizesCount;
6466 for(size_t constantSizesIndex = 0; constantSizesIndex < constantSizesCount; ++constantSizesIndex)
6467 {
6468 std::string desc4 = desc3;
6469 switch(constantSizesIndex)
6470 {
6471 case 0: desc4 += " Varying_sizes"; break;
6472 case 1: desc4 += " Constant_sizes"; break;
6473 default: assert(0);
6474 }
6475
6476 config.AllocationSizes.clear();
6477 // Buffers present
6478 if(buffersVsImagesIndex == 0 || buffersVsImagesIndex == 2)
6479 {
6480 // Small
6481 if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
6482 {
6483 // Varying size
6484 if(constantSizesIndex == 0)
6485 config.AllocationSizes.push_back({4, 16, 1024});
6486 // Constant sizes
6487 else
6488 {
6489 config.AllocationSizes.push_back({1, 16, 16});
6490 config.AllocationSizes.push_back({1, 64, 64});
6491 config.AllocationSizes.push_back({1, 256, 256});
6492 config.AllocationSizes.push_back({1, 1024, 1024});
6493 }
6494 }
6495 // Large
6496 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
6497 {
6498 // Varying size
6499 if(constantSizesIndex == 0)
6500 config.AllocationSizes.push_back({4, 0x10000, 0xA00000}); // 64 KB ... 10 MB
6501 // Constant sizes
6502 else
6503 {
6504 config.AllocationSizes.push_back({1, 0x10000, 0x10000});
6505 config.AllocationSizes.push_back({1, 0x80000, 0x80000});
6506 config.AllocationSizes.push_back({1, 0x200000, 0x200000});
6507 config.AllocationSizes.push_back({1, 0xA00000, 0xA00000});
6508 }
6509 }
6510 }
6511 // Images present
6512 if(buffersVsImagesIndex == 1 || buffersVsImagesIndex == 2)
6513 {
6514 // Small
6515 if(smallVsLargeIndex == 0 || smallVsLargeIndex == 2)
6516 {
6517 // Varying size
6518 if(constantSizesIndex == 0)
6519 config.AllocationSizes.push_back({4, 0, 0, 4, 32});
6520 // Constant sizes
6521 else
6522 {
6523 config.AllocationSizes.push_back({1, 0, 0, 4, 4});
6524 config.AllocationSizes.push_back({1, 0, 0, 8, 8});
6525 config.AllocationSizes.push_back({1, 0, 0, 16, 16});
6526 config.AllocationSizes.push_back({1, 0, 0, 32, 32});
6527 }
6528 }
6529 // Large
6530 if(smallVsLargeIndex == 1 || smallVsLargeIndex == 2)
6531 {
6532 // Varying size
6533 if(constantSizesIndex == 0)
6534 config.AllocationSizes.push_back({4, 0, 0, 256, 2048});
6535 // Constant sizes
6536 else
6537 {
6538 config.AllocationSizes.push_back({1, 0, 0, 256, 256});
6539 config.AllocationSizes.push_back({1, 0, 0, 512, 512});
6540 config.AllocationSizes.push_back({1, 0, 0, 1024, 1024});
6541 config.AllocationSizes.push_back({1, 0, 0, 2048, 2048});
6542 }
6543 }
6544 }
6545
6546 const VkDeviceSize avgResourceSize = config.CalcAvgResourceSize();
6547 config.PoolSize = avgResourceSize * AVG_RESOURCES_PER_POOL;
6548
6549 // 0 = 66%, 1 = 133%, 2 = 100%, 3 = 33%, 4 = 166%
6550 size_t subscriptionModeCount;
6551 switch(ConfigType)
6552 {
6553 case CONFIG_TYPE_MINIMUM: subscriptionModeCount = 2; break;
6554 case CONFIG_TYPE_SMALL: subscriptionModeCount = 2; break;
6555 case CONFIG_TYPE_AVERAGE: subscriptionModeCount = 3; break;
6556 case CONFIG_TYPE_LARGE: subscriptionModeCount = 5; break;
6557 case CONFIG_TYPE_MAXIMUM: subscriptionModeCount = 5; break;
6558 default: assert(0);
6559 }
6560 for(size_t subscriptionModeIndex = 0; subscriptionModeIndex < subscriptionModeCount; ++subscriptionModeIndex)
6561 {
6562 std::string desc5 = desc4;
6563
6564 switch(subscriptionModeIndex)
6565 {
6566 case 0:
6567 desc5 += " Subscription_66%";
6568 config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 66 / 100;
6569 break;
6570 case 1:
6571 desc5 += " Subscription_133%";
6572 config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 133 / 100;
6573 break;
6574 case 2:
6575 desc5 += " Subscription_100%";
6576 config.UsedItemCountMax = AVG_RESOURCES_PER_POOL;
6577 break;
6578 case 3:
6579 desc5 += " Subscription_33%";
6580 config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 33 / 100;
6581 break;
6582 case 4:
6583 desc5 += " Subscription_166%";
6584 config.UsedItemCountMax = AVG_RESOURCES_PER_POOL * 166 / 100;
6585 break;
6586 default:
6587 assert(0);
6588 }
6589
6590 config.TotalItemCount = config.UsedItemCountMax * 5;
6591 config.UsedItemCountMin = config.UsedItemCountMax * 80 / 100;
6592
6593 const char* testDescription = desc5.c_str();
6594
6595 for(size_t repeat = 0; repeat < repeatCount; ++repeat)
6596 {
6597 printf("%s #%u\n", testDescription, (uint32_t)repeat);
6598
6599 PoolTestResult result{};
6600 TestPool_Benchmark(result, config);
6601 WritePoolTestResult(file, CODE_DESCRIPTION, testDescription, config, result);
6602 }
6603 }
6604 }
6605 }
6606 }
6607 }
6608 }
6609
BasicTestBuddyAllocator()6610 static void BasicTestBuddyAllocator()
6611 {
6612 wprintf(L"Basic test buddy allocator\n");
6613
6614 RandomNumberGenerator rand{76543};
6615
6616 VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
6617 sampleBufCreateInfo.size = 1024; // Whatever.
6618 sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
6619
6620 VmaAllocationCreateInfo sampleAllocCreateInfo = {};
6621 sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
6622
6623 VmaPoolCreateInfo poolCreateInfo = {};
6624 VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
6625 TEST(res == VK_SUCCESS);
6626
6627 // Deliberately adding 1023 to test usable size smaller than memory block size.
6628 poolCreateInfo.blockSize = 1024 * 1024 + 1023;
6629 poolCreateInfo.flags = VMA_POOL_CREATE_BUDDY_ALGORITHM_BIT;
6630 //poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
6631
6632 VmaPool pool = nullptr;
6633 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
6634 TEST(res == VK_SUCCESS);
6635
6636 VkBufferCreateInfo bufCreateInfo = sampleBufCreateInfo;
6637
6638 VmaAllocationCreateInfo allocCreateInfo = {};
6639 allocCreateInfo.pool = pool;
6640
6641 std::vector<BufferInfo> bufInfo;
6642 BufferInfo newBufInfo;
6643 VmaAllocationInfo allocInfo;
6644
6645 bufCreateInfo.size = 1024 * 256;
6646 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6647 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6648 TEST(res == VK_SUCCESS);
6649 bufInfo.push_back(newBufInfo);
6650
6651 bufCreateInfo.size = 1024 * 512;
6652 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6653 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6654 TEST(res == VK_SUCCESS);
6655 bufInfo.push_back(newBufInfo);
6656
6657 bufCreateInfo.size = 1024 * 128;
6658 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6659 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6660 TEST(res == VK_SUCCESS);
6661 bufInfo.push_back(newBufInfo);
6662
6663 // Test very small allocation, smaller than minimum node size.
6664 bufCreateInfo.size = 1;
6665 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6666 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6667 TEST(res == VK_SUCCESS);
6668 bufInfo.push_back(newBufInfo);
6669
6670 // Test some small allocation with alignment requirement.
6671 {
6672 VkMemoryRequirements memReq;
6673 memReq.alignment = 256;
6674 memReq.memoryTypeBits = UINT32_MAX;
6675 memReq.size = 32;
6676
6677 newBufInfo.Buffer = VK_NULL_HANDLE;
6678 res = vmaAllocateMemory(g_hAllocator, &memReq, &allocCreateInfo,
6679 &newBufInfo.Allocation, &allocInfo);
6680 TEST(res == VK_SUCCESS);
6681 TEST(allocInfo.offset % memReq.alignment == 0);
6682 bufInfo.push_back(newBufInfo);
6683 }
6684
6685 //SaveAllocatorStatsToFile(L"TEST.json");
6686
6687 VmaPoolStats stats = {};
6688 vmaGetPoolStats(g_hAllocator, pool, &stats);
6689 int DBG = 0; // Set breakpoint here to inspect `stats`.
6690
6691 // Allocate enough new buffers to surely fall into second block.
6692 for(uint32_t i = 0; i < 32; ++i)
6693 {
6694 bufCreateInfo.size = 1024 * (rand.Generate() % 32 + 1);
6695 res = vmaCreateBuffer(g_hAllocator, &bufCreateInfo, &allocCreateInfo,
6696 &newBufInfo.Buffer, &newBufInfo.Allocation, &allocInfo);
6697 TEST(res == VK_SUCCESS);
6698 bufInfo.push_back(newBufInfo);
6699 }
6700
6701 SaveAllocatorStatsToFile(L"BuddyTest01.json");
6702
6703 // Destroy the buffers in random order.
6704 while(!bufInfo.empty())
6705 {
6706 const size_t indexToDestroy = rand.Generate() % bufInfo.size();
6707 const BufferInfo& currBufInfo = bufInfo[indexToDestroy];
6708 vmaDestroyBuffer(g_hAllocator, currBufInfo.Buffer, currBufInfo.Allocation);
6709 bufInfo.erase(bufInfo.begin() + indexToDestroy);
6710 }
6711
6712 vmaDestroyPool(g_hAllocator, pool);
6713 }
6714
BasicTestAllocatePages()6715 static void BasicTestAllocatePages()
6716 {
6717 wprintf(L"Basic test allocate pages\n");
6718
6719 RandomNumberGenerator rand{765461};
6720
6721 VkBufferCreateInfo sampleBufCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
6722 sampleBufCreateInfo.size = 1024; // Whatever.
6723 sampleBufCreateInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
6724
6725 VmaAllocationCreateInfo sampleAllocCreateInfo = {};
6726 sampleAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
6727
6728 VmaPoolCreateInfo poolCreateInfo = {};
6729 VkResult res = vmaFindMemoryTypeIndexForBufferInfo(g_hAllocator, &sampleBufCreateInfo, &sampleAllocCreateInfo, &poolCreateInfo.memoryTypeIndex);
6730 TEST(res == VK_SUCCESS);
6731
6732 // 1 block of 1 MB.
6733 poolCreateInfo.blockSize = 1024 * 1024;
6734 poolCreateInfo.minBlockCount = poolCreateInfo.maxBlockCount = 1;
6735
6736 // Create pool.
6737 VmaPool pool = nullptr;
6738 res = vmaCreatePool(g_hAllocator, &poolCreateInfo, &pool);
6739 TEST(res == VK_SUCCESS);
6740
6741 // Make 100 allocations of 4 KB - they should fit into the pool.
6742 VkMemoryRequirements memReq;
6743 memReq.memoryTypeBits = UINT32_MAX;
6744 memReq.alignment = 4 * 1024;
6745 memReq.size = 4 * 1024;
6746
6747 VmaAllocationCreateInfo allocCreateInfo = {};
6748 allocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
6749 allocCreateInfo.pool = pool;
6750
6751 constexpr uint32_t allocCount = 100;
6752
6753 std::vector<VmaAllocation> alloc{allocCount};
6754 std::vector<VmaAllocationInfo> allocInfo{allocCount};
6755 res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &allocCreateInfo, allocCount, alloc.data(), allocInfo.data());
6756 TEST(res == VK_SUCCESS);
6757 for(uint32_t i = 0; i < allocCount; ++i)
6758 {
6759 TEST(alloc[i] != VK_NULL_HANDLE &&
6760 allocInfo[i].pMappedData != nullptr &&
6761 allocInfo[i].deviceMemory == allocInfo[0].deviceMemory &&
6762 allocInfo[i].memoryType == allocInfo[0].memoryType);
6763 }
6764
6765 // Free the allocations.
6766 vmaFreeMemoryPages(g_hAllocator, allocCount, alloc.data());
6767 std::fill(alloc.begin(), alloc.end(), nullptr);
6768 std::fill(allocInfo.begin(), allocInfo.end(), VmaAllocationInfo{});
6769
6770 // Try to make 100 allocations of 100 KB. This call should fail due to not enough memory.
6771 // Also test optional allocationInfo = null.
6772 memReq.size = 100 * 1024;
6773 res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &allocCreateInfo, allocCount, alloc.data(), nullptr);
6774 TEST(res != VK_SUCCESS);
6775 TEST(std::find_if(alloc.begin(), alloc.end(), [](VmaAllocation alloc){ return alloc != VK_NULL_HANDLE; }) == alloc.end());
6776
6777 // Make 100 allocations of 4 KB, but with required alignment of 128 KB. This should also fail.
6778 memReq.size = 4 * 1024;
6779 memReq.alignment = 128 * 1024;
6780 res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &allocCreateInfo, allocCount, alloc.data(), allocInfo.data());
6781 TEST(res != VK_SUCCESS);
6782
6783 // Make 100 dedicated allocations of 4 KB.
6784 memReq.alignment = 4 * 1024;
6785 memReq.size = 4 * 1024;
6786
6787 VmaAllocationCreateInfo dedicatedAllocCreateInfo = {};
6788 dedicatedAllocCreateInfo.usage = VMA_MEMORY_USAGE_CPU_ONLY;
6789 dedicatedAllocCreateInfo.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
6790 res = vmaAllocateMemoryPages(g_hAllocator, &memReq, &dedicatedAllocCreateInfo, allocCount, alloc.data(), allocInfo.data());
6791 TEST(res == VK_SUCCESS);
6792 for(uint32_t i = 0; i < allocCount; ++i)
6793 {
6794 TEST(alloc[i] != VK_NULL_HANDLE &&
6795 allocInfo[i].pMappedData != nullptr &&
6796 allocInfo[i].memoryType == allocInfo[0].memoryType &&
6797 allocInfo[i].offset == 0);
6798 if(i > 0)
6799 {
6800 TEST(allocInfo[i].deviceMemory != allocInfo[0].deviceMemory);
6801 }
6802 }
6803
6804 // Free the allocations.
6805 vmaFreeMemoryPages(g_hAllocator, allocCount, alloc.data());
6806 std::fill(alloc.begin(), alloc.end(), nullptr);
6807 std::fill(allocInfo.begin(), allocInfo.end(), VmaAllocationInfo{});
6808
6809 vmaDestroyPool(g_hAllocator, pool);
6810 }
6811
6812 // Test the testing environment.
TestGpuData()6813 static void TestGpuData()
6814 {
6815 RandomNumberGenerator rand = { 53434 };
6816
6817 std::vector<AllocInfo> allocInfo;
6818
6819 for(size_t i = 0; i < 100; ++i)
6820 {
6821 AllocInfo info = {};
6822
6823 info.m_BufferInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
6824 info.m_BufferInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
6825 VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
6826 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
6827 info.m_BufferInfo.size = 1024 * 1024 * (rand.Generate() % 9 + 1);
6828
6829 VmaAllocationCreateInfo allocCreateInfo = {};
6830 allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
6831
6832 VkResult res = vmaCreateBuffer(g_hAllocator, &info.m_BufferInfo, &allocCreateInfo, &info.m_Buffer, &info.m_Allocation, nullptr);
6833 TEST(res == VK_SUCCESS);
6834
6835 info.m_StartValue = rand.Generate();
6836
6837 allocInfo.push_back(std::move(info));
6838 }
6839
6840 UploadGpuData(allocInfo.data(), allocInfo.size());
6841
6842 ValidateGpuData(allocInfo.data(), allocInfo.size());
6843
6844 DestroyAllAllocations(allocInfo);
6845 }
6846
Test()6847 void Test()
6848 {
6849 wprintf(L"TESTING:\n");
6850
6851 if(false)
6852 {
6853 ////////////////////////////////////////////////////////////////////////////////
6854 // Temporarily insert custom tests here:
6855 TestVirtualBlocks();
6856 TestVirtualBlocksAlgorithms();
6857 return;
6858 }
6859
6860 // # Simple tests
6861
6862 TestBasics();
6863 TestVirtualBlocks();
6864 TestVirtualBlocksAlgorithms();
6865 TestAllocationVersusResourceSize();
6866 //TestGpuData(); // Not calling this because it's just testing the testing environment.
6867 #if VMA_DEBUG_MARGIN
6868 TestDebugMargin();
6869 #else
6870 TestPool_SameSize();
6871 TestPool_MinBlockCount();
6872 TestPool_MinAllocationAlignment();
6873 TestHeapSizeLimit();
6874 #endif
6875 #if VMA_DEBUG_INITIALIZE_ALLOCATIONS
6876 TestAllocationsInitialization();
6877 #endif
6878 TestMemoryUsage();
6879 TestDeviceCoherentMemory();
6880 TestBudget();
6881 TestAliasing();
6882 TestMapping();
6883 TestDeviceLocalMapped();
6884 TestMappingMultithreaded();
6885 TestLinearAllocator();
6886 ManuallyTestLinearAllocator();
6887 TestLinearAllocatorMultiBlock();
6888
6889 BasicTestBuddyAllocator();
6890 BasicTestAllocatePages();
6891
6892 if(VK_KHR_buffer_device_address_enabled)
6893 TestBufferDeviceAddress();
6894 if(VK_EXT_memory_priority_enabled)
6895 TestMemoryPriority();
6896
6897 {
6898 FILE* file;
6899 fopen_s(&file, "Algorithms.csv", "w");
6900 assert(file != NULL);
6901 BenchmarkAlgorithms(file);
6902 fclose(file);
6903 }
6904
6905 TestDefragmentationSimple();
6906 TestDefragmentationFull();
6907 TestDefragmentationWholePool();
6908 TestDefragmentationGpu();
6909 TestDefragmentationIncrementalBasic();
6910 TestDefragmentationIncrementalComplex();
6911
6912 // # Detailed tests
6913 FILE* file;
6914 fopen_s(&file, "Results.csv", "w");
6915 assert(file != NULL);
6916
6917 WriteMainTestResultHeader(file);
6918 PerformMainTests(file);
6919 //PerformCustomMainTest(file);
6920
6921 WritePoolTestResultHeader(file);
6922 PerformPoolTests(file);
6923 //PerformCustomPoolTest(file);
6924
6925 fclose(file);
6926
6927 wprintf(L"Done, all PASSED.\n");
6928 }
6929
6930 #endif // #ifdef _WIN32
6931