• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/gpu/vk/vulkanmemoryallocator/VulkanAMDMemoryAllocator.h"
9 
10 #include "include/gpu/vk/VulkanBackendContext.h"
11 #include "include/gpu/vk/VulkanExtensions.h"
12 #include "include/private/base/SkAssert.h"
13 #include "include/private/base/SkTo.h"
14 #include "src/core/SkTraceEvent.h"
15 #include "src/gpu/GpuTypesPriv.h"
16 #include "src/gpu/vk/VulkanInterface.h"
17 #include "src/gpu/vk/VulkanUtilsPriv.h"
18 #include "src/gpu/vk/vulkanmemoryallocator/VulkanMemoryAllocatorPriv.h"
19 #include "src/base/SkUtils.h"
20 
21 #include <algorithm>
22 #include <cstring>
23 
24 namespace skgpu {
25 
Make(VkInstance instance,VkPhysicalDevice physicalDevice,VkDevice device,uint32_t physicalDeviceVersion,const VulkanExtensions * extensions,const VulkanInterface * interface,ThreadSafe threadSafe,std::optional<VkDeviceSize> blockSize,bool cacheFlag,size_t maxBlockCount)26 sk_sp<VulkanMemoryAllocator> VulkanAMDMemoryAllocator::Make(VkInstance instance,
27                                                             VkPhysicalDevice physicalDevice,
28                                                             VkDevice device,
29                                                             uint32_t physicalDeviceVersion,
30                                                             const VulkanExtensions* extensions,
31                                                             const VulkanInterface* interface,
32                                                             ThreadSafe threadSafe,
33                                                             std::optional<VkDeviceSize> blockSize,
34                                                             bool cacheFlag,
35                                                             size_t maxBlockCount) {
36 #define SKGPU_COPY_FUNCTION(NAME) functions.vk##NAME = interface->fFunctions.f##NAME
37 #define SKGPU_COPY_FUNCTION_KHR(NAME) functions.vk##NAME##KHR = interface->fFunctions.f##NAME
38 
39     VmaVulkanFunctions functions;
40     // We should be setting all the required functions (at least through vulkan 1.1), but this is
41     // just extra belt and suspenders to make sure there isn't unitialized values here.
42     std::memset(&functions, 0, sizeof(VmaVulkanFunctions));
43 
44     // We don't use dynamic function getting in the allocator so we set the getProc functions to
45     // null.
46     functions.vkGetInstanceProcAddr = nullptr;
47     functions.vkGetDeviceProcAddr = nullptr;
48     SKGPU_COPY_FUNCTION(GetPhysicalDeviceProperties);
49     SKGPU_COPY_FUNCTION(GetPhysicalDeviceMemoryProperties);
50     SKGPU_COPY_FUNCTION(AllocateMemory);
51     SKGPU_COPY_FUNCTION(FreeMemory);
52     SKGPU_COPY_FUNCTION(MapMemory);
53     SKGPU_COPY_FUNCTION(UnmapMemory);
54     SKGPU_COPY_FUNCTION(FlushMappedMemoryRanges);
55     SKGPU_COPY_FUNCTION(InvalidateMappedMemoryRanges);
56     SKGPU_COPY_FUNCTION(BindBufferMemory);
57     SKGPU_COPY_FUNCTION(BindImageMemory);
58     SKGPU_COPY_FUNCTION(GetBufferMemoryRequirements);
59     SKGPU_COPY_FUNCTION(GetImageMemoryRequirements);
60     SKGPU_COPY_FUNCTION(CreateBuffer);
61     SKGPU_COPY_FUNCTION(DestroyBuffer);
62     SKGPU_COPY_FUNCTION(CreateImage);
63     SKGPU_COPY_FUNCTION(DestroyImage);
64     SKGPU_COPY_FUNCTION(CmdCopyBuffer);
65     SKGPU_COPY_FUNCTION_KHR(GetBufferMemoryRequirements2);
66     SKGPU_COPY_FUNCTION_KHR(GetImageMemoryRequirements2);
67     SKGPU_COPY_FUNCTION_KHR(BindBufferMemory2);
68     SKGPU_COPY_FUNCTION_KHR(BindImageMemory2);
69     SKGPU_COPY_FUNCTION_KHR(GetPhysicalDeviceMemoryProperties2);
70 
71     VmaAllocatorCreateInfo info;
72     info.flags = 0;
73     if (threadSafe == ThreadSafe::kNo) {
74         info.flags |= VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
75     }
76     if (physicalDeviceVersion >= VK_MAKE_VERSION(1, 1, 0) ||
77         (extensions->hasExtension(VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME, 1) &&
78          extensions->hasExtension(VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME, 1))) {
79         info.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT;
80     }
81 
82     info.physicalDevice = physicalDevice;
83     info.device = device;
84     // 4MB was picked for the size here by looking at memory usage of Android apps and runs of DM.
85     // It seems to be a good compromise of not wasting unused allocated space and not making too
86     // many small allocations. The AMD allocator will start making blocks at 1/8 the max size and
87     // builds up block size as needed before capping at the max set here.
88     if (cacheFlag) {
89         info.preferredLargeHeapBlockSize = SkGetVmaBlockSizeMB() * 1024 * 1024; // 1024 = 1K
90     } else {
91         info.preferredLargeHeapBlockSize = blockSize.value_or(4 * 1024 * 1024);
92     }
93     info.maxBlockCount = maxBlockCount;
94     info.pAllocationCallbacks = nullptr;
95     info.pDeviceMemoryCallbacks = nullptr;
96     info.pHeapSizeLimit = nullptr;
97     info.pVulkanFunctions = &functions;
98     info.instance = instance;
99     // TODO: Update our interface and headers to support vulkan 1.3 and add in the new required
100     // functions for 1.3 that the allocator needs. Until then we just clamp the version to 1.1.
101     info.vulkanApiVersion = std::min(physicalDeviceVersion, VK_MAKE_VERSION(1, 1, 0));
102     info.pTypeExternalMemoryHandleTypes = nullptr;
103 
104     VmaAllocator allocator;
105     vmaCreateAllocator(&info, &allocator);
106 
107     return sk_sp<VulkanAMDMemoryAllocator>(new VulkanAMDMemoryAllocator(allocator));
108 }
109 
VulkanAMDMemoryAllocator(VmaAllocator allocator)110 VulkanAMDMemoryAllocator::VulkanAMDMemoryAllocator(VmaAllocator allocator)
111         : fAllocator(allocator) {}
112 
~VulkanAMDMemoryAllocator()113 VulkanAMDMemoryAllocator::~VulkanAMDMemoryAllocator() {
114     vmaDestroyAllocator(fAllocator);
115     fAllocator = VK_NULL_HANDLE;
116 }
117 
allocateImageMemory(VkImage image,uint32_t allocationPropertyFlags,skgpu::VulkanBackendMemory * backendMemory)118 VkResult VulkanAMDMemoryAllocator::allocateImageMemory(VkImage image,
119                                                        uint32_t allocationPropertyFlags,
120                                                        skgpu::VulkanBackendMemory* backendMemory) {
121     TRACE_EVENT0_ALWAYS("skia.gpu", TRACE_FUNC);
122     VmaAllocationCreateInfo info;
123     info.flags = 0;
124     info.usage = VMA_MEMORY_USAGE_UNKNOWN;
125     info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
126     info.preferredFlags = 0;
127     info.memoryTypeBits = 0;
128     info.pool = VK_NULL_HANDLE;
129     info.pUserData = nullptr;
130 
131     if (kDedicatedAllocation_AllocationPropertyFlag & allocationPropertyFlags) {
132         info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
133     }
134     if (kLazyAllocation_AllocationPropertyFlag & allocationPropertyFlags) {
135         info.requiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
136     }
137     if (kProtected_AllocationPropertyFlag & allocationPropertyFlags) {
138         info.requiredFlags |= VK_MEMORY_PROPERTY_PROTECTED_BIT;
139     }
140 
141     VmaAllocation allocation;
142     VkResult result = vmaAllocateMemoryForImage(fAllocator, image, &info, &allocation, nullptr);
143     if (VK_SUCCESS == result) {
144         *backendMemory = (VulkanBackendMemory)allocation;
145     }
146     return result;
147 }
148 
allocateBufferMemory(VkBuffer buffer,BufferUsage usage,uint32_t allocationPropertyFlags,skgpu::VulkanBackendMemory * backendMemory)149 VkResult VulkanAMDMemoryAllocator::allocateBufferMemory(VkBuffer buffer,
150                                                         BufferUsage usage,
151                                                         uint32_t allocationPropertyFlags,
152                                                         skgpu::VulkanBackendMemory* backendMemory) {
153     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
154     VmaAllocationCreateInfo info;
155     info.flags = 0;
156     info.usage = VMA_MEMORY_USAGE_UNKNOWN;
157     info.memoryTypeBits = 0;
158     info.pool = VK_NULL_HANDLE;
159     info.pUserData = nullptr;
160 
161     switch (usage) {
162         case BufferUsage::kGpuOnly:
163             info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
164             info.preferredFlags = 0;
165             break;
166         case BufferUsage::kCpuWritesGpuReads:
167             // When doing cpu writes and gpu reads the general rule of thumb is to use coherent
168             // memory. Though this depends on the fact that we are not doing any cpu reads and the
169             // cpu writes are sequential. For sparse writes we'd want cpu cached memory, however we
170             // don't do these types of writes in Skia.
171             //
172             // TODO: In the future there may be times where specific types of memory could benefit
173             // from a coherent and cached memory. Typically these allow for the gpu to read cpu
174             // writes from the cache without needing to flush the writes throughout the cache. The
175             // reverse is not true and GPU writes tend to invalidate the cache regardless. Also
176             // these gpu cache read access are typically lower bandwidth than non-cached memory.
177             // For now Skia doesn't really have a need or want of this type of memory. But if we
178             // ever do we could pass in an AllocationPropertyFlag that requests the cached property.
179             info.requiredFlags =
180                     VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
181             info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
182             break;
183         case BufferUsage::kTransfersFromCpuToGpu:
184             info.requiredFlags =
185                     VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
186             info.preferredFlags = 0;
187             break;
188         case BufferUsage::kTransfersFromGpuToCpu:
189             info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
190             info.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
191             break;
192     }
193 
194     if (kDedicatedAllocation_AllocationPropertyFlag & allocationPropertyFlags) {
195         info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
196     }
197     if ((kLazyAllocation_AllocationPropertyFlag & allocationPropertyFlags) &&
198         BufferUsage::kGpuOnly == usage) {
199         info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
200     }
201 
202     if (kPersistentlyMapped_AllocationPropertyFlag & allocationPropertyFlags) {
203         SkASSERT(BufferUsage::kGpuOnly != usage);
204         info.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
205     }
206 
207     if (kProtected_AllocationPropertyFlag & allocationPropertyFlags) {
208         info.requiredFlags |= VK_MEMORY_PROPERTY_PROTECTED_BIT;
209     }
210 
211     VmaAllocation allocation;
212     VkResult result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr);
213     if (VK_SUCCESS == result) {
214         *backendMemory = (VulkanBackendMemory)allocation;
215     }
216 
217     return result;
218 }
219 
freeMemory(const VulkanBackendMemory & memoryHandle)220 void VulkanAMDMemoryAllocator::freeMemory(const VulkanBackendMemory& memoryHandle) {
221     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
222     const VmaAllocation allocation = (VmaAllocation)memoryHandle;
223     vmaFreeMemory(fAllocator, allocation);
224 }
225 
getAllocInfo(const VulkanBackendMemory & memoryHandle,VulkanAlloc * alloc) const226 void VulkanAMDMemoryAllocator::getAllocInfo(const VulkanBackendMemory& memoryHandle,
227                                             VulkanAlloc* alloc) const {
228     const VmaAllocation allocation = (VmaAllocation)memoryHandle;
229     VmaAllocationInfo vmaInfo;
230     vmaGetAllocationInfo(fAllocator, allocation, &vmaInfo);
231 
232     VkMemoryPropertyFlags memFlags;
233     vmaGetMemoryTypeProperties(fAllocator, vmaInfo.memoryType, &memFlags);
234 
235     uint32_t flags = 0;
236     if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & memFlags) {
237         flags |= VulkanAlloc::kMappable_Flag;
238     }
239     if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) {
240         flags |= VulkanAlloc::kNoncoherent_Flag;
241     }
242     if (VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT & memFlags) {
243         flags |= VulkanAlloc::kLazilyAllocated_Flag;
244     }
245 
246     alloc->fMemory        = vmaInfo.deviceMemory;
247     alloc->fOffset        = vmaInfo.offset;
248     alloc->fSize          = vmaInfo.size;
249     alloc->fFlags         = flags;
250     alloc->fBackendMemory = memoryHandle;
251     alloc->fAllocator     = (VulkanMemoryAllocator *)this;
252 }
253 
mapMemory(const VulkanBackendMemory & memoryHandle,void ** data)254 VkResult VulkanAMDMemoryAllocator::mapMemory(const VulkanBackendMemory& memoryHandle, void** data) {
255     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
256     const VmaAllocation allocation = (VmaAllocation)memoryHandle;
257     return vmaMapMemory(fAllocator, allocation, data);
258 }
259 
unmapMemory(const VulkanBackendMemory & memoryHandle)260 void VulkanAMDMemoryAllocator::unmapMemory(const VulkanBackendMemory& memoryHandle) {
261     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
262     const VmaAllocation allocation = (VmaAllocation)memoryHandle;
263     vmaUnmapMemory(fAllocator, allocation);
264 }
265 
flushMemory(const VulkanBackendMemory & memoryHandle,VkDeviceSize offset,VkDeviceSize size)266 VkResult VulkanAMDMemoryAllocator::flushMemory(const VulkanBackendMemory& memoryHandle,
267                                                VkDeviceSize offset,
268                                                VkDeviceSize size) {
269     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
270     const VmaAllocation allocation = (VmaAllocation)memoryHandle;
271     return vmaFlushAllocation(fAllocator, allocation, offset, size);
272 }
273 
invalidateMemory(const VulkanBackendMemory & memoryHandle,VkDeviceSize offset,VkDeviceSize size)274 VkResult VulkanAMDMemoryAllocator::invalidateMemory(const VulkanBackendMemory& memoryHandle,
275                                                     VkDeviceSize offset,
276                                                     VkDeviceSize size) {
277     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
278     const VmaAllocation allocation = (VmaAllocation)memoryHandle;
279     return vmaInvalidateAllocation(fAllocator, allocation, offset, size);
280 }
281 
totalAllocatedAndUsedMemory() const282 std::pair<uint64_t, uint64_t> VulkanAMDMemoryAllocator::totalAllocatedAndUsedMemory() const {
283     VmaTotalStatistics stats;
284     vmaCalculateStatistics(fAllocator, &stats);
285     return {stats.total.statistics.blockBytes, stats.total.statistics.allocationBytes};
286 }
287 
dumpVmaStats(SkString * out,const char * sep) const288 void VulkanAMDMemoryAllocator::dumpVmaStats(SkString *out, const char *sep) const
289 {
290     constexpr int MB = 1024 * 1024;
291     if (out == nullptr || sep == nullptr) {
292         return;
293     }
294     bool flag = SkGetMemoryOptimizedFlag();
295     out->appendf("vma_flag: %d %s", flag, sep);
296     if (!flag) {
297         return;
298     }
299     VmaTotalStatistics stats;
300     vmaCalculateStatistics(fAllocator, &stats);
301     uint64_t used = stats.total.statistics.allocationBytes;
302     uint64_t total = stats.total.statistics.blockBytes;
303     uint64_t free = total - used;
304     auto maxBlockCount = SkGetVmaBlockCountMax();
305     out->appendf("vma_free: %llu (%d MB)%s", free, free / MB, sep);
306     out->appendf("vma_used: %llu (%d MB)%s", used, used / MB, sep);
307     out->appendf("vma_total: %llu (%d MB)%s", total, total / MB, sep);
308     out->appendf("vma_cacheBlockSize: %d MB%s", SkGetVmaBlockSizeMB(), sep);
309     out->appendf("vma_cacheBlockCount: %llu / %llu%s",
310         stats.total.statistics.blockCount <= maxBlockCount ? stats.total.statistics.blockCount : maxBlockCount,
311         maxBlockCount, sep);
312     out->appendf("vma_dedicatedBlockCount: %llu%s",
313         stats.total.statistics.blockCount <= maxBlockCount ? 0 : stats.total.statistics.blockCount - maxBlockCount,
314         sep);
315     out->appendf("vma_allocationCount: %u%s", stats.total.statistics.allocationCount, sep);
316     out->appendf("vma_unusedRangeCount: %u%s", stats.total.unusedRangeCount, sep);
317     out->appendf("vma_allocationSize: %llu / %llu%s",
318         stats.total.allocationSizeMin, stats.total.allocationSizeMax, sep);
319     out->appendf("vma_unusedRangeSize: %llu / %llu%s",
320         stats.total.unusedRangeSizeMin, stats.total.unusedRangeSizeMax, sep);
321 }
322 
vmaDefragment()323 void VulkanAMDMemoryAllocator::vmaDefragment()
324 {
325     bool flag = SkGetVmaDefragmentOn();
326     if (!flag) {
327         return;
328     }
329     bool debugFlag = SkGetVmaDebugFlag();
330     if (!debugFlag) {
331         vmaFreeEmptyBlock(fAllocator);
332         return;
333     }
334 
335     // dfx
336     SkString debugInfo;
337     dumpVmaStats(&debugInfo);
338     SkDebugf("GrVkAMDMemoryAllocator::vmaDefragment() before: %s",
339         debugInfo.c_str());
340 #ifdef SKIA_OHOS_FOR_OHOS_TRACE
341     HITRACE_OHOS_NAME_FMT_ALWAYS("GrVkAMDMemoryAllocator::vmaDefragment() before: %s", debugInfo.c_str());
342 #endif
343 
344     {
345         vmaFreeEmptyBlock(fAllocator);
346     }
347 
348     // dfx
349     debugInfo = "";
350     dumpVmaStats(&debugInfo);
351     SkDebugf("GrVkAMDMemoryAllocator::vmaDefragment() after: %s",
352         debugInfo.c_str());
353 #ifdef SKIA_OHOS_FOR_OHOS_TRACE
354     HITRACE_OHOS_NAME_FMT_ALWAYS("GrVkAMDMemoryAllocator::vmaDefragment() after: %s", debugInfo.c_str());
355 #endif
356 }
357 
358 namespace VulkanMemoryAllocators {
Make(const skgpu::VulkanBackendContext & backendContext,ThreadSafe threadSafe,std::optional<VkDeviceSize> blockSize,size_t maxBlockCount)359 sk_sp<VulkanMemoryAllocator> Make(const skgpu::VulkanBackendContext& backendContext,
360                                   ThreadSafe threadSafe,
361                                   std::optional<VkDeviceSize> blockSize,
362                                   size_t maxBlockCount) {
363     SkASSERT(backendContext.fInstance != VK_NULL_HANDLE);
364     SkASSERT(backendContext.fPhysicalDevice != VK_NULL_HANDLE);
365     SkASSERT(backendContext.fDevice != VK_NULL_HANDLE);
366     SkASSERT(backendContext.fQueue != VK_NULL_HANDLE);
367     SkASSERT(backendContext.fGetProc);
368 
369     skgpu::VulkanExtensions ext;
370     const skgpu::VulkanExtensions* extensions = &ext;
371     if (backendContext.fVkExtensions) {
372         extensions = backendContext.fVkExtensions;
373     }
374 
375     // It is a bit superfluous to create a VulkanInterface here just to create a memory allocator
376     // given that Ganesh and Graphite will create their own. However, there's not a clean way to
377     // have the interface created here persist for potential re-use without refactoring
378     // VulkanMemoryAllocator to hold onto its interface as opposed to "borrowing" it.
379     // Such a refactor could get messy without much actual benefit since interface creation is
380     // not too expensive and this cost is only paid once during initialization.
381     uint32_t physDevVersion = 0;
382     sk_sp<const skgpu::VulkanInterface> interface =
383             skgpu::MakeInterface(backendContext, extensions, &physDevVersion, nullptr);
384     if (!interface) {
385         return nullptr;
386     }
387 
388     return VulkanAMDMemoryAllocator::Make(backendContext.fInstance,
389                                           backendContext.fPhysicalDevice,
390                                           backendContext.fDevice,
391                                           physDevVersion,
392                                           extensions,
393                                           interface.get(),
394                                           threadSafe,
395                                           blockSize,
396                                           maxBlockCount);
397 }
398 
399 }  // namespace VulkanMemoryAllocators
400 }  // namespace skgpu
401