1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/gpu/vk/vulkanmemoryallocator/VulkanAMDMemoryAllocator.h"
9
10 #include "include/gpu/vk/VulkanBackendContext.h"
11 #include "include/gpu/vk/VulkanExtensions.h"
12 #include "include/private/base/SkAssert.h"
13 #include "include/private/base/SkTo.h"
14 #include "src/core/SkTraceEvent.h"
15 #include "src/gpu/GpuTypesPriv.h"
16 #include "src/gpu/vk/VulkanInterface.h"
17 #include "src/gpu/vk/VulkanUtilsPriv.h"
18 #include "src/gpu/vk/vulkanmemoryallocator/VulkanMemoryAllocatorPriv.h"
19 #include "src/base/SkUtils.h"
20
21 #include <algorithm>
22 #include <cstring>
23
24 namespace skgpu {
25
Make(VkInstance instance,VkPhysicalDevice physicalDevice,VkDevice device,uint32_t physicalDeviceVersion,const VulkanExtensions * extensions,const VulkanInterface * interface,ThreadSafe threadSafe,std::optional<VkDeviceSize> blockSize,bool cacheFlag,size_t maxBlockCount)26 sk_sp<VulkanMemoryAllocator> VulkanAMDMemoryAllocator::Make(VkInstance instance,
27 VkPhysicalDevice physicalDevice,
28 VkDevice device,
29 uint32_t physicalDeviceVersion,
30 const VulkanExtensions* extensions,
31 const VulkanInterface* interface,
32 ThreadSafe threadSafe,
33 std::optional<VkDeviceSize> blockSize,
34 bool cacheFlag,
35 size_t maxBlockCount) {
36 #define SKGPU_COPY_FUNCTION(NAME) functions.vk##NAME = interface->fFunctions.f##NAME
37 #define SKGPU_COPY_FUNCTION_KHR(NAME) functions.vk##NAME##KHR = interface->fFunctions.f##NAME
38
39 VmaVulkanFunctions functions;
40 // We should be setting all the required functions (at least through vulkan 1.1), but this is
41 // just extra belt and suspenders to make sure there isn't unitialized values here.
42 std::memset(&functions, 0, sizeof(VmaVulkanFunctions));
43
44 // We don't use dynamic function getting in the allocator so we set the getProc functions to
45 // null.
46 functions.vkGetInstanceProcAddr = nullptr;
47 functions.vkGetDeviceProcAddr = nullptr;
48 SKGPU_COPY_FUNCTION(GetPhysicalDeviceProperties);
49 SKGPU_COPY_FUNCTION(GetPhysicalDeviceMemoryProperties);
50 SKGPU_COPY_FUNCTION(AllocateMemory);
51 SKGPU_COPY_FUNCTION(FreeMemory);
52 SKGPU_COPY_FUNCTION(MapMemory);
53 SKGPU_COPY_FUNCTION(UnmapMemory);
54 SKGPU_COPY_FUNCTION(FlushMappedMemoryRanges);
55 SKGPU_COPY_FUNCTION(InvalidateMappedMemoryRanges);
56 SKGPU_COPY_FUNCTION(BindBufferMemory);
57 SKGPU_COPY_FUNCTION(BindImageMemory);
58 SKGPU_COPY_FUNCTION(GetBufferMemoryRequirements);
59 SKGPU_COPY_FUNCTION(GetImageMemoryRequirements);
60 SKGPU_COPY_FUNCTION(CreateBuffer);
61 SKGPU_COPY_FUNCTION(DestroyBuffer);
62 SKGPU_COPY_FUNCTION(CreateImage);
63 SKGPU_COPY_FUNCTION(DestroyImage);
64 SKGPU_COPY_FUNCTION(CmdCopyBuffer);
65 SKGPU_COPY_FUNCTION_KHR(GetBufferMemoryRequirements2);
66 SKGPU_COPY_FUNCTION_KHR(GetImageMemoryRequirements2);
67 SKGPU_COPY_FUNCTION_KHR(BindBufferMemory2);
68 SKGPU_COPY_FUNCTION_KHR(BindImageMemory2);
69 SKGPU_COPY_FUNCTION_KHR(GetPhysicalDeviceMemoryProperties2);
70
71 VmaAllocatorCreateInfo info;
72 info.flags = 0;
73 if (threadSafe == ThreadSafe::kNo) {
74 info.flags |= VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
75 }
76 if (physicalDeviceVersion >= VK_MAKE_VERSION(1, 1, 0) ||
77 (extensions->hasExtension(VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME, 1) &&
78 extensions->hasExtension(VK_KHR_GET_MEMORY_REQUIREMENTS_2_EXTENSION_NAME, 1))) {
79 info.flags |= VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT;
80 }
81
82 info.physicalDevice = physicalDevice;
83 info.device = device;
84 // 4MB was picked for the size here by looking at memory usage of Android apps and runs of DM.
85 // It seems to be a good compromise of not wasting unused allocated space and not making too
86 // many small allocations. The AMD allocator will start making blocks at 1/8 the max size and
87 // builds up block size as needed before capping at the max set here.
88 if (cacheFlag) {
89 info.preferredLargeHeapBlockSize = SkGetVmaBlockSizeMB() * 1024 * 1024; // 1024 = 1K
90 } else {
91 info.preferredLargeHeapBlockSize = blockSize.value_or(4 * 1024 * 1024);
92 }
93 info.maxBlockCount = maxBlockCount;
94 info.pAllocationCallbacks = nullptr;
95 info.pDeviceMemoryCallbacks = nullptr;
96 info.pHeapSizeLimit = nullptr;
97 info.pVulkanFunctions = &functions;
98 info.instance = instance;
99 // TODO: Update our interface and headers to support vulkan 1.3 and add in the new required
100 // functions for 1.3 that the allocator needs. Until then we just clamp the version to 1.1.
101 info.vulkanApiVersion = std::min(physicalDeviceVersion, VK_MAKE_VERSION(1, 1, 0));
102 info.pTypeExternalMemoryHandleTypes = nullptr;
103
104 VmaAllocator allocator;
105 vmaCreateAllocator(&info, &allocator);
106
107 return sk_sp<VulkanAMDMemoryAllocator>(new VulkanAMDMemoryAllocator(allocator));
108 }
109
VulkanAMDMemoryAllocator(VmaAllocator allocator)110 VulkanAMDMemoryAllocator::VulkanAMDMemoryAllocator(VmaAllocator allocator)
111 : fAllocator(allocator) {}
112
~VulkanAMDMemoryAllocator()113 VulkanAMDMemoryAllocator::~VulkanAMDMemoryAllocator() {
114 vmaDestroyAllocator(fAllocator);
115 fAllocator = VK_NULL_HANDLE;
116 }
117
allocateImageMemory(VkImage image,uint32_t allocationPropertyFlags,skgpu::VulkanBackendMemory * backendMemory)118 VkResult VulkanAMDMemoryAllocator::allocateImageMemory(VkImage image,
119 uint32_t allocationPropertyFlags,
120 skgpu::VulkanBackendMemory* backendMemory) {
121 TRACE_EVENT0_ALWAYS("skia.gpu", TRACE_FUNC);
122 VmaAllocationCreateInfo info;
123 info.flags = 0;
124 info.usage = VMA_MEMORY_USAGE_UNKNOWN;
125 info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
126 info.preferredFlags = 0;
127 info.memoryTypeBits = 0;
128 info.pool = VK_NULL_HANDLE;
129 info.pUserData = nullptr;
130
131 if (kDedicatedAllocation_AllocationPropertyFlag & allocationPropertyFlags) {
132 info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
133 }
134 if (kLazyAllocation_AllocationPropertyFlag & allocationPropertyFlags) {
135 info.requiredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
136 }
137 if (kProtected_AllocationPropertyFlag & allocationPropertyFlags) {
138 info.requiredFlags |= VK_MEMORY_PROPERTY_PROTECTED_BIT;
139 }
140
141 VmaAllocation allocation;
142 VkResult result = vmaAllocateMemoryForImage(fAllocator, image, &info, &allocation, nullptr);
143 if (VK_SUCCESS == result) {
144 *backendMemory = (VulkanBackendMemory)allocation;
145 }
146 return result;
147 }
148
allocateBufferMemory(VkBuffer buffer,BufferUsage usage,uint32_t allocationPropertyFlags,skgpu::VulkanBackendMemory * backendMemory)149 VkResult VulkanAMDMemoryAllocator::allocateBufferMemory(VkBuffer buffer,
150 BufferUsage usage,
151 uint32_t allocationPropertyFlags,
152 skgpu::VulkanBackendMemory* backendMemory) {
153 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
154 VmaAllocationCreateInfo info;
155 info.flags = 0;
156 info.usage = VMA_MEMORY_USAGE_UNKNOWN;
157 info.memoryTypeBits = 0;
158 info.pool = VK_NULL_HANDLE;
159 info.pUserData = nullptr;
160
161 switch (usage) {
162 case BufferUsage::kGpuOnly:
163 info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
164 info.preferredFlags = 0;
165 break;
166 case BufferUsage::kCpuWritesGpuReads:
167 // When doing cpu writes and gpu reads the general rule of thumb is to use coherent
168 // memory. Though this depends on the fact that we are not doing any cpu reads and the
169 // cpu writes are sequential. For sparse writes we'd want cpu cached memory, however we
170 // don't do these types of writes in Skia.
171 //
172 // TODO: In the future there may be times where specific types of memory could benefit
173 // from a coherent and cached memory. Typically these allow for the gpu to read cpu
174 // writes from the cache without needing to flush the writes throughout the cache. The
175 // reverse is not true and GPU writes tend to invalidate the cache regardless. Also
176 // these gpu cache read access are typically lower bandwidth than non-cached memory.
177 // For now Skia doesn't really have a need or want of this type of memory. But if we
178 // ever do we could pass in an AllocationPropertyFlag that requests the cached property.
179 info.requiredFlags =
180 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
181 info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
182 break;
183 case BufferUsage::kTransfersFromCpuToGpu:
184 info.requiredFlags =
185 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
186 info.preferredFlags = 0;
187 break;
188 case BufferUsage::kTransfersFromGpuToCpu:
189 info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
190 info.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
191 break;
192 }
193
194 if (kDedicatedAllocation_AllocationPropertyFlag & allocationPropertyFlags) {
195 info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
196 }
197 if ((kLazyAllocation_AllocationPropertyFlag & allocationPropertyFlags) &&
198 BufferUsage::kGpuOnly == usage) {
199 info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
200 }
201
202 if (kPersistentlyMapped_AllocationPropertyFlag & allocationPropertyFlags) {
203 SkASSERT(BufferUsage::kGpuOnly != usage);
204 info.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
205 }
206
207 if (kProtected_AllocationPropertyFlag & allocationPropertyFlags) {
208 info.requiredFlags |= VK_MEMORY_PROPERTY_PROTECTED_BIT;
209 }
210
211 VmaAllocation allocation;
212 VkResult result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr);
213 if (VK_SUCCESS == result) {
214 *backendMemory = (VulkanBackendMemory)allocation;
215 }
216
217 return result;
218 }
219
freeMemory(const VulkanBackendMemory & memoryHandle)220 void VulkanAMDMemoryAllocator::freeMemory(const VulkanBackendMemory& memoryHandle) {
221 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
222 const VmaAllocation allocation = (VmaAllocation)memoryHandle;
223 vmaFreeMemory(fAllocator, allocation);
224 }
225
getAllocInfo(const VulkanBackendMemory & memoryHandle,VulkanAlloc * alloc) const226 void VulkanAMDMemoryAllocator::getAllocInfo(const VulkanBackendMemory& memoryHandle,
227 VulkanAlloc* alloc) const {
228 const VmaAllocation allocation = (VmaAllocation)memoryHandle;
229 VmaAllocationInfo vmaInfo;
230 vmaGetAllocationInfo(fAllocator, allocation, &vmaInfo);
231
232 VkMemoryPropertyFlags memFlags;
233 vmaGetMemoryTypeProperties(fAllocator, vmaInfo.memoryType, &memFlags);
234
235 uint32_t flags = 0;
236 if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & memFlags) {
237 flags |= VulkanAlloc::kMappable_Flag;
238 }
239 if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) {
240 flags |= VulkanAlloc::kNoncoherent_Flag;
241 }
242 if (VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT & memFlags) {
243 flags |= VulkanAlloc::kLazilyAllocated_Flag;
244 }
245
246 alloc->fMemory = vmaInfo.deviceMemory;
247 alloc->fOffset = vmaInfo.offset;
248 alloc->fSize = vmaInfo.size;
249 alloc->fFlags = flags;
250 alloc->fBackendMemory = memoryHandle;
251 alloc->fAllocator = (VulkanMemoryAllocator *)this;
252 }
253
mapMemory(const VulkanBackendMemory & memoryHandle,void ** data)254 VkResult VulkanAMDMemoryAllocator::mapMemory(const VulkanBackendMemory& memoryHandle, void** data) {
255 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
256 const VmaAllocation allocation = (VmaAllocation)memoryHandle;
257 return vmaMapMemory(fAllocator, allocation, data);
258 }
259
unmapMemory(const VulkanBackendMemory & memoryHandle)260 void VulkanAMDMemoryAllocator::unmapMemory(const VulkanBackendMemory& memoryHandle) {
261 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
262 const VmaAllocation allocation = (VmaAllocation)memoryHandle;
263 vmaUnmapMemory(fAllocator, allocation);
264 }
265
flushMemory(const VulkanBackendMemory & memoryHandle,VkDeviceSize offset,VkDeviceSize size)266 VkResult VulkanAMDMemoryAllocator::flushMemory(const VulkanBackendMemory& memoryHandle,
267 VkDeviceSize offset,
268 VkDeviceSize size) {
269 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
270 const VmaAllocation allocation = (VmaAllocation)memoryHandle;
271 return vmaFlushAllocation(fAllocator, allocation, offset, size);
272 }
273
invalidateMemory(const VulkanBackendMemory & memoryHandle,VkDeviceSize offset,VkDeviceSize size)274 VkResult VulkanAMDMemoryAllocator::invalidateMemory(const VulkanBackendMemory& memoryHandle,
275 VkDeviceSize offset,
276 VkDeviceSize size) {
277 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
278 const VmaAllocation allocation = (VmaAllocation)memoryHandle;
279 return vmaInvalidateAllocation(fAllocator, allocation, offset, size);
280 }
281
totalAllocatedAndUsedMemory() const282 std::pair<uint64_t, uint64_t> VulkanAMDMemoryAllocator::totalAllocatedAndUsedMemory() const {
283 VmaTotalStatistics stats;
284 vmaCalculateStatistics(fAllocator, &stats);
285 return {stats.total.statistics.blockBytes, stats.total.statistics.allocationBytes};
286 }
287
dumpVmaStats(SkString * out,const char * sep) const288 void VulkanAMDMemoryAllocator::dumpVmaStats(SkString *out, const char *sep) const
289 {
290 constexpr int MB = 1024 * 1024;
291 if (out == nullptr || sep == nullptr) {
292 return;
293 }
294 bool flag = SkGetMemoryOptimizedFlag();
295 out->appendf("vma_flag: %d %s", flag, sep);
296 if (!flag) {
297 return;
298 }
299 VmaTotalStatistics stats;
300 vmaCalculateStatistics(fAllocator, &stats);
301 uint64_t used = stats.total.statistics.allocationBytes;
302 uint64_t total = stats.total.statistics.blockBytes;
303 uint64_t free = total - used;
304 auto maxBlockCount = SkGetVmaBlockCountMax();
305 out->appendf("vma_free: %llu (%d MB)%s", free, free / MB, sep);
306 out->appendf("vma_used: %llu (%d MB)%s", used, used / MB, sep);
307 out->appendf("vma_total: %llu (%d MB)%s", total, total / MB, sep);
308 out->appendf("vma_cacheBlockSize: %d MB%s", SkGetVmaBlockSizeMB(), sep);
309 out->appendf("vma_cacheBlockCount: %llu / %llu%s",
310 stats.total.statistics.blockCount <= maxBlockCount ? stats.total.statistics.blockCount : maxBlockCount,
311 maxBlockCount, sep);
312 out->appendf("vma_dedicatedBlockCount: %llu%s",
313 stats.total.statistics.blockCount <= maxBlockCount ? 0 : stats.total.statistics.blockCount - maxBlockCount,
314 sep);
315 out->appendf("vma_allocationCount: %u%s", stats.total.statistics.allocationCount, sep);
316 out->appendf("vma_unusedRangeCount: %u%s", stats.total.unusedRangeCount, sep);
317 out->appendf("vma_allocationSize: %llu / %llu%s",
318 stats.total.allocationSizeMin, stats.total.allocationSizeMax, sep);
319 out->appendf("vma_unusedRangeSize: %llu / %llu%s",
320 stats.total.unusedRangeSizeMin, stats.total.unusedRangeSizeMax, sep);
321 }
322
vmaDefragment()323 void VulkanAMDMemoryAllocator::vmaDefragment()
324 {
325 bool flag = SkGetVmaDefragmentOn();
326 if (!flag) {
327 return;
328 }
329 bool debugFlag = SkGetVmaDebugFlag();
330 if (!debugFlag) {
331 vmaFreeEmptyBlock(fAllocator);
332 return;
333 }
334
335 // dfx
336 SkString debugInfo;
337 dumpVmaStats(&debugInfo);
338 SkDebugf("GrVkAMDMemoryAllocator::vmaDefragment() before: %s",
339 debugInfo.c_str());
340 #ifdef SKIA_OHOS_FOR_OHOS_TRACE
341 HITRACE_OHOS_NAME_FMT_ALWAYS("GrVkAMDMemoryAllocator::vmaDefragment() before: %s", debugInfo.c_str());
342 #endif
343
344 {
345 vmaFreeEmptyBlock(fAllocator);
346 }
347
348 // dfx
349 debugInfo = "";
350 dumpVmaStats(&debugInfo);
351 SkDebugf("GrVkAMDMemoryAllocator::vmaDefragment() after: %s",
352 debugInfo.c_str());
353 #ifdef SKIA_OHOS_FOR_OHOS_TRACE
354 HITRACE_OHOS_NAME_FMT_ALWAYS("GrVkAMDMemoryAllocator::vmaDefragment() after: %s", debugInfo.c_str());
355 #endif
356 }
357
358 namespace VulkanMemoryAllocators {
Make(const skgpu::VulkanBackendContext & backendContext,ThreadSafe threadSafe,std::optional<VkDeviceSize> blockSize,size_t maxBlockCount)359 sk_sp<VulkanMemoryAllocator> Make(const skgpu::VulkanBackendContext& backendContext,
360 ThreadSafe threadSafe,
361 std::optional<VkDeviceSize> blockSize,
362 size_t maxBlockCount) {
363 SkASSERT(backendContext.fInstance != VK_NULL_HANDLE);
364 SkASSERT(backendContext.fPhysicalDevice != VK_NULL_HANDLE);
365 SkASSERT(backendContext.fDevice != VK_NULL_HANDLE);
366 SkASSERT(backendContext.fQueue != VK_NULL_HANDLE);
367 SkASSERT(backendContext.fGetProc);
368
369 skgpu::VulkanExtensions ext;
370 const skgpu::VulkanExtensions* extensions = &ext;
371 if (backendContext.fVkExtensions) {
372 extensions = backendContext.fVkExtensions;
373 }
374
375 // It is a bit superfluous to create a VulkanInterface here just to create a memory allocator
376 // given that Ganesh and Graphite will create their own. However, there's not a clean way to
377 // have the interface created here persist for potential re-use without refactoring
378 // VulkanMemoryAllocator to hold onto its interface as opposed to "borrowing" it.
379 // Such a refactor could get messy without much actual benefit since interface creation is
380 // not too expensive and this cost is only paid once during initialization.
381 uint32_t physDevVersion = 0;
382 sk_sp<const skgpu::VulkanInterface> interface =
383 skgpu::MakeInterface(backendContext, extensions, &physDevVersion, nullptr);
384 if (!interface) {
385 return nullptr;
386 }
387
388 return VulkanAMDMemoryAllocator::Make(backendContext.fInstance,
389 backendContext.fPhysicalDevice,
390 backendContext.fDevice,
391 physDevVersion,
392 extensions,
393 interface.get(),
394 threadSafe,
395 blockSize,
396 maxBlockCount);
397 }
398
399 } // namespace VulkanMemoryAllocators
400 } // namespace skgpu
401