• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2018 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/gpu/vk/GrVkAMDMemoryAllocator.h"
9 
10 #include "src/core/SkTraceEvent.h"
11 #include "src/gpu/vk/GrVkInterface.h"
12 #include "src/gpu/vk/GrVkMemory.h"
13 #include "src/gpu/vk/GrVkUtil.h"
14 
GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice,VkDevice device,sk_sp<const GrVkInterface> interface)15 GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice,
16                                                VkDevice device,
17                                                sk_sp<const GrVkInterface> interface)
18         : fAllocator(VK_NULL_HANDLE)
19         , fInterface(std::move(interface))
20         , fDevice(device) {
21 #define GR_COPY_FUNCTION(NAME) functions.vk##NAME = fInterface->fFunctions.f##NAME
22 
23     VmaVulkanFunctions functions;
24     GR_COPY_FUNCTION(GetPhysicalDeviceProperties);
25     GR_COPY_FUNCTION(GetPhysicalDeviceMemoryProperties);
26     GR_COPY_FUNCTION(AllocateMemory);
27     GR_COPY_FUNCTION(FreeMemory);
28     GR_COPY_FUNCTION(MapMemory);
29     GR_COPY_FUNCTION(UnmapMemory);
30     GR_COPY_FUNCTION(BindBufferMemory);
31     GR_COPY_FUNCTION(BindImageMemory);
32     GR_COPY_FUNCTION(GetBufferMemoryRequirements);
33     GR_COPY_FUNCTION(GetImageMemoryRequirements);
34     GR_COPY_FUNCTION(CreateBuffer);
35     GR_COPY_FUNCTION(DestroyBuffer);
36     GR_COPY_FUNCTION(CreateImage);
37     GR_COPY_FUNCTION(DestroyImage);
38 
39     // Skia current doesn't support VK_KHR_dedicated_allocation
40     functions.vkGetBufferMemoryRequirements2KHR = nullptr;
41     functions.vkGetImageMemoryRequirements2KHR = nullptr;
42 
43     VmaAllocatorCreateInfo info;
44     info.flags = 0;
45     info.physicalDevice = physicalDevice;
46     info.device = device;
47     // 4MB was picked for the size here by looking at memory usage of Android apps and runs of DM.
48     // It seems to be a good compromise of not wasting unused allocated space and not making too
49     // many small allocations. The AMD allocator will start making blocks at 1/8 the max size and
50     // builds up block size as needed before capping at the max set here.
51     info.preferredLargeHeapBlockSize = 4*1024*1024;
52     info.pAllocationCallbacks = nullptr;
53     info.pDeviceMemoryCallbacks = nullptr;
54     info.frameInUseCount = 0;
55     info.pHeapSizeLimit = nullptr;
56     info.pVulkanFunctions = &functions;
57 
58     vmaCreateAllocator(&info, &fAllocator);
59 }
60 
~GrVkAMDMemoryAllocator()61 GrVkAMDMemoryAllocator::~GrVkAMDMemoryAllocator() {
62     vmaDestroyAllocator(fAllocator);
63     fAllocator = VK_NULL_HANDLE;
64 }
65 
allocateMemoryForImage(VkImage image,AllocationPropertyFlags flags,GrVkBackendMemory * backendMemory)66 bool GrVkAMDMemoryAllocator::allocateMemoryForImage(VkImage image, AllocationPropertyFlags flags,
67                                                     GrVkBackendMemory* backendMemory) {
68     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
69     VmaAllocationCreateInfo info;
70     info.flags = 0;
71     info.usage = VMA_MEMORY_USAGE_UNKNOWN;
72     info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
73     info.preferredFlags = 0;
74     info.memoryTypeBits = 0;
75     info.pool = VK_NULL_HANDLE;
76     info.pUserData = nullptr;
77 
78     if (AllocationPropertyFlags::kDedicatedAllocation & flags) {
79         info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
80     }
81 
82     if (AllocationPropertyFlags::kLazyAllocation & flags) {
83         info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
84     }
85 
86     if (AllocationPropertyFlags::kProtected & flags) {
87         info.requiredFlags |= VK_MEMORY_PROPERTY_PROTECTED_BIT;
88     }
89 
90     VmaAllocation allocation;
91     VkResult result = vmaAllocateMemoryForImage(fAllocator, image, &info, &allocation, nullptr);
92     if (VK_SUCCESS != result) {
93         return false;
94     }
95     *backendMemory = (GrVkBackendMemory)allocation;
96     return true;
97 }
98 
allocateMemoryForBuffer(VkBuffer buffer,BufferUsage usage,AllocationPropertyFlags flags,GrVkBackendMemory * backendMemory)99 bool GrVkAMDMemoryAllocator::allocateMemoryForBuffer(VkBuffer buffer, BufferUsage usage,
100                                                      AllocationPropertyFlags flags,
101                                                      GrVkBackendMemory* backendMemory) {
102     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
103     VmaAllocationCreateInfo info;
104     info.flags = 0;
105     info.usage = VMA_MEMORY_USAGE_UNKNOWN;
106     info.memoryTypeBits = 0;
107     info.pool = VK_NULL_HANDLE;
108     info.pUserData = nullptr;
109 
110     switch (usage) {
111         case BufferUsage::kGpuOnly:
112             info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
113             info.preferredFlags = 0;
114             break;
115         case BufferUsage::kCpuOnly:
116             info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
117                                  VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
118             info.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
119             break;
120         case BufferUsage::kCpuWritesGpuReads:
121             // First attempt to try memory is also cached
122             info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
123                                  VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
124             info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
125             break;
126         case BufferUsage::kGpuWritesCpuReads:
127             info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
128             info.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
129                                   VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
130             break;
131     }
132 
133     if (AllocationPropertyFlags::kDedicatedAllocation & flags) {
134         info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
135     }
136 
137     if ((AllocationPropertyFlags::kLazyAllocation & flags) && BufferUsage::kGpuOnly == usage) {
138         info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
139     }
140 
141     if (AllocationPropertyFlags::kPersistentlyMapped & flags) {
142         SkASSERT(BufferUsage::kGpuOnly != usage);
143         info.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
144     }
145 
146     VmaAllocation allocation;
147     VkResult result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr);
148     if (VK_SUCCESS != result) {
149         if (usage == BufferUsage::kCpuWritesGpuReads) {
150             // We try again but this time drop the requirement for cached
151             info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
152             result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr);
153         }
154     }
155     if (VK_SUCCESS != result) {
156         return false;
157     }
158 
159     *backendMemory = (GrVkBackendMemory)allocation;
160     return true;
161 }
162 
freeMemory(const GrVkBackendMemory & memoryHandle)163 void GrVkAMDMemoryAllocator::freeMemory(const GrVkBackendMemory& memoryHandle) {
164     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
165     const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
166     vmaFreeMemory(fAllocator, allocation);
167 }
168 
getAllocInfo(const GrVkBackendMemory & memoryHandle,GrVkAlloc * alloc) const169 void GrVkAMDMemoryAllocator::getAllocInfo(const GrVkBackendMemory& memoryHandle,
170                                           GrVkAlloc* alloc) const {
171     const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
172     VmaAllocationInfo vmaInfo;
173     vmaGetAllocationInfo(fAllocator, allocation, &vmaInfo);
174 
175     VkMemoryPropertyFlags memFlags;
176     vmaGetMemoryTypeProperties(fAllocator, vmaInfo.memoryType, &memFlags);
177 
178     uint32_t flags = 0;
179     if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & memFlags) {
180         flags |= GrVkAlloc::kMappable_Flag;
181     }
182     if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) {
183         flags |= GrVkAlloc::kNoncoherent_Flag;
184     }
185 
186     alloc->fMemory        = vmaInfo.deviceMemory;
187     alloc->fOffset        = vmaInfo.offset;
188     alloc->fSize          = vmaInfo.size;
189     alloc->fFlags         = flags;
190     alloc->fBackendMemory = memoryHandle;
191 
192     // TODO: Remove this hack once the AMD allocator is able to handle the alignment of noncoherent
193     // memory itself.
194     if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) {
195         // This is a hack to say that the allocation size is actually larger than it is. This is to
196         // make sure when we are flushing and invalidating noncoherent memory we have a size that is
197         // aligned to the nonCoherentAtomSize. This is safe for three reasons. First the total size
198         // of the VkDeviceMemory we allocate will always be a multple of the max possible alignment
199         // (currently 256). Second all sub allocations are alignmed with an offset of 256. And
200         // finally the allocator we are using always maps the entire VkDeviceMemory so the range
201         // we'll be flushing/invalidating will be mapped. So our new fake allocation size will
202         // always fit into the VkDeviceMemory, will never push it into another suballocation, and
203         // will always be mapped when map is called.
204         const VkPhysicalDeviceProperties* devProps;
205         vmaGetPhysicalDeviceProperties(fAllocator, &devProps);
206         VkDeviceSize alignment = devProps->limits.nonCoherentAtomSize;
207 
208         alloc->fSize = (alloc->fSize + alignment - 1) & ~(alignment -1);
209     }
210 }
211 
mapMemory(const GrVkBackendMemory & memoryHandle)212 void* GrVkAMDMemoryAllocator::mapMemory(const GrVkBackendMemory& memoryHandle) {
213     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
214     const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
215     void* mapPtr;
216     vmaMapMemory(fAllocator, allocation, &mapPtr);
217     return mapPtr;
218 }
219 
unmapMemory(const GrVkBackendMemory & memoryHandle)220 void GrVkAMDMemoryAllocator::unmapMemory(const GrVkBackendMemory& memoryHandle) {
221     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
222     const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
223     vmaUnmapMemory(fAllocator, allocation);
224 }
225 
flushMappedMemory(const GrVkBackendMemory & memoryHandle,VkDeviceSize offset,VkDeviceSize size)226 void GrVkAMDMemoryAllocator::flushMappedMemory(const GrVkBackendMemory& memoryHandle,
227                                                VkDeviceSize offset, VkDeviceSize size) {
228     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
229     GrVkAlloc info;
230     this->getAllocInfo(memoryHandle, &info);
231 
232     if (GrVkAlloc::kNoncoherent_Flag & info.fFlags) {
233         // We need to store the nonCoherentAtomSize for non-coherent flush/invalidate alignment.
234         const VkPhysicalDeviceProperties* physDevProps;
235         vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps);
236         VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize;
237 
238         VkMappedMemoryRange mappedMemoryRange;
239         GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment,
240                                                     &mappedMemoryRange);
241         GR_VK_CALL(fInterface, FlushMappedMemoryRanges(fDevice, 1, &mappedMemoryRange));
242     }
243 }
244 
invalidateMappedMemory(const GrVkBackendMemory & memoryHandle,VkDeviceSize offset,VkDeviceSize size)245 void GrVkAMDMemoryAllocator::invalidateMappedMemory(const GrVkBackendMemory& memoryHandle,
246                                                     VkDeviceSize offset, VkDeviceSize size) {
247     TRACE_EVENT0("skia.gpu", TRACE_FUNC);
248     GrVkAlloc info;
249     this->getAllocInfo(memoryHandle, &info);
250 
251     if (GrVkAlloc::kNoncoherent_Flag & info.fFlags) {
252         // We need to store the nonCoherentAtomSize for non-coherent flush/invalidate alignment.
253         const VkPhysicalDeviceProperties* physDevProps;
254         vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps);
255         VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize;
256 
257         VkMappedMemoryRange mappedMemoryRange;
258         GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment,
259                                                     &mappedMemoryRange);
260         GR_VK_CALL(fInterface, InvalidateMappedMemoryRanges(fDevice, 1, &mappedMemoryRange));
261     }
262 }
263 
totalUsedMemory() const264 uint64_t GrVkAMDMemoryAllocator::totalUsedMemory() const {
265     VmaStats stats;
266     vmaCalculateStats(fAllocator, &stats);
267     return stats.total.usedBytes;
268 }
269 
totalAllocatedMemory() const270 uint64_t GrVkAMDMemoryAllocator::totalAllocatedMemory() const {
271     VmaStats stats;
272     vmaCalculateStats(fAllocator, &stats);
273     return stats.total.usedBytes + stats.total.unusedBytes;
274 }
275 
276