1 /*
2 * Copyright 2018 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/gpu/vk/GrVkAMDMemoryAllocator.h"
9
10 #include "src/core/SkTraceEvent.h"
11 #include "src/gpu/vk/GrVkInterface.h"
12 #include "src/gpu/vk/GrVkMemory.h"
13 #include "src/gpu/vk/GrVkUtil.h"
14
GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice,VkDevice device,sk_sp<const GrVkInterface> interface)15 GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice,
16 VkDevice device,
17 sk_sp<const GrVkInterface> interface)
18 : fAllocator(VK_NULL_HANDLE)
19 , fInterface(std::move(interface))
20 , fDevice(device) {
21 #define GR_COPY_FUNCTION(NAME) functions.vk##NAME = fInterface->fFunctions.f##NAME
22
23 VmaVulkanFunctions functions;
24 GR_COPY_FUNCTION(GetPhysicalDeviceProperties);
25 GR_COPY_FUNCTION(GetPhysicalDeviceMemoryProperties);
26 GR_COPY_FUNCTION(AllocateMemory);
27 GR_COPY_FUNCTION(FreeMemory);
28 GR_COPY_FUNCTION(MapMemory);
29 GR_COPY_FUNCTION(UnmapMemory);
30 GR_COPY_FUNCTION(BindBufferMemory);
31 GR_COPY_FUNCTION(BindImageMemory);
32 GR_COPY_FUNCTION(GetBufferMemoryRequirements);
33 GR_COPY_FUNCTION(GetImageMemoryRequirements);
34 GR_COPY_FUNCTION(CreateBuffer);
35 GR_COPY_FUNCTION(DestroyBuffer);
36 GR_COPY_FUNCTION(CreateImage);
37 GR_COPY_FUNCTION(DestroyImage);
38
39 // Skia current doesn't support VK_KHR_dedicated_allocation
40 functions.vkGetBufferMemoryRequirements2KHR = nullptr;
41 functions.vkGetImageMemoryRequirements2KHR = nullptr;
42
43 VmaAllocatorCreateInfo info;
44 info.flags = 0;
45 info.physicalDevice = physicalDevice;
46 info.device = device;
47 // 4MB was picked for the size here by looking at memory usage of Android apps and runs of DM.
48 // It seems to be a good compromise of not wasting unused allocated space and not making too
49 // many small allocations. The AMD allocator will start making blocks at 1/8 the max size and
50 // builds up block size as needed before capping at the max set here.
51 info.preferredLargeHeapBlockSize = 4*1024*1024;
52 info.pAllocationCallbacks = nullptr;
53 info.pDeviceMemoryCallbacks = nullptr;
54 info.frameInUseCount = 0;
55 info.pHeapSizeLimit = nullptr;
56 info.pVulkanFunctions = &functions;
57
58 vmaCreateAllocator(&info, &fAllocator);
59 }
60
~GrVkAMDMemoryAllocator()61 GrVkAMDMemoryAllocator::~GrVkAMDMemoryAllocator() {
62 vmaDestroyAllocator(fAllocator);
63 fAllocator = VK_NULL_HANDLE;
64 }
65
allocateMemoryForImage(VkImage image,AllocationPropertyFlags flags,GrVkBackendMemory * backendMemory)66 bool GrVkAMDMemoryAllocator::allocateMemoryForImage(VkImage image, AllocationPropertyFlags flags,
67 GrVkBackendMemory* backendMemory) {
68 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
69 VmaAllocationCreateInfo info;
70 info.flags = 0;
71 info.usage = VMA_MEMORY_USAGE_UNKNOWN;
72 info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
73 info.preferredFlags = 0;
74 info.memoryTypeBits = 0;
75 info.pool = VK_NULL_HANDLE;
76 info.pUserData = nullptr;
77
78 if (AllocationPropertyFlags::kDedicatedAllocation & flags) {
79 info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
80 }
81
82 if (AllocationPropertyFlags::kLazyAllocation & flags) {
83 info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
84 }
85
86 if (AllocationPropertyFlags::kProtected & flags) {
87 info.requiredFlags |= VK_MEMORY_PROPERTY_PROTECTED_BIT;
88 }
89
90 VmaAllocation allocation;
91 VkResult result = vmaAllocateMemoryForImage(fAllocator, image, &info, &allocation, nullptr);
92 if (VK_SUCCESS != result) {
93 return false;
94 }
95 *backendMemory = (GrVkBackendMemory)allocation;
96 return true;
97 }
98
allocateMemoryForBuffer(VkBuffer buffer,BufferUsage usage,AllocationPropertyFlags flags,GrVkBackendMemory * backendMemory)99 bool GrVkAMDMemoryAllocator::allocateMemoryForBuffer(VkBuffer buffer, BufferUsage usage,
100 AllocationPropertyFlags flags,
101 GrVkBackendMemory* backendMemory) {
102 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
103 VmaAllocationCreateInfo info;
104 info.flags = 0;
105 info.usage = VMA_MEMORY_USAGE_UNKNOWN;
106 info.memoryTypeBits = 0;
107 info.pool = VK_NULL_HANDLE;
108 info.pUserData = nullptr;
109
110 switch (usage) {
111 case BufferUsage::kGpuOnly:
112 info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
113 info.preferredFlags = 0;
114 break;
115 case BufferUsage::kCpuOnly:
116 info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
117 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
118 info.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
119 break;
120 case BufferUsage::kCpuWritesGpuReads:
121 // First attempt to try memory is also cached
122 info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
123 VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
124 info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
125 break;
126 case BufferUsage::kGpuWritesCpuReads:
127 info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
128 info.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
129 VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
130 break;
131 }
132
133 if (AllocationPropertyFlags::kDedicatedAllocation & flags) {
134 info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT;
135 }
136
137 if ((AllocationPropertyFlags::kLazyAllocation & flags) && BufferUsage::kGpuOnly == usage) {
138 info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT;
139 }
140
141 if (AllocationPropertyFlags::kPersistentlyMapped & flags) {
142 SkASSERT(BufferUsage::kGpuOnly != usage);
143 info.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT;
144 }
145
146 VmaAllocation allocation;
147 VkResult result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr);
148 if (VK_SUCCESS != result) {
149 if (usage == BufferUsage::kCpuWritesGpuReads) {
150 // We try again but this time drop the requirement for cached
151 info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
152 result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr);
153 }
154 }
155 if (VK_SUCCESS != result) {
156 return false;
157 }
158
159 *backendMemory = (GrVkBackendMemory)allocation;
160 return true;
161 }
162
freeMemory(const GrVkBackendMemory & memoryHandle)163 void GrVkAMDMemoryAllocator::freeMemory(const GrVkBackendMemory& memoryHandle) {
164 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
165 const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
166 vmaFreeMemory(fAllocator, allocation);
167 }
168
getAllocInfo(const GrVkBackendMemory & memoryHandle,GrVkAlloc * alloc) const169 void GrVkAMDMemoryAllocator::getAllocInfo(const GrVkBackendMemory& memoryHandle,
170 GrVkAlloc* alloc) const {
171 const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
172 VmaAllocationInfo vmaInfo;
173 vmaGetAllocationInfo(fAllocator, allocation, &vmaInfo);
174
175 VkMemoryPropertyFlags memFlags;
176 vmaGetMemoryTypeProperties(fAllocator, vmaInfo.memoryType, &memFlags);
177
178 uint32_t flags = 0;
179 if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & memFlags) {
180 flags |= GrVkAlloc::kMappable_Flag;
181 }
182 if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) {
183 flags |= GrVkAlloc::kNoncoherent_Flag;
184 }
185
186 alloc->fMemory = vmaInfo.deviceMemory;
187 alloc->fOffset = vmaInfo.offset;
188 alloc->fSize = vmaInfo.size;
189 alloc->fFlags = flags;
190 alloc->fBackendMemory = memoryHandle;
191
192 // TODO: Remove this hack once the AMD allocator is able to handle the alignment of noncoherent
193 // memory itself.
194 if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) {
195 // This is a hack to say that the allocation size is actually larger than it is. This is to
196 // make sure when we are flushing and invalidating noncoherent memory we have a size that is
197 // aligned to the nonCoherentAtomSize. This is safe for three reasons. First the total size
198 // of the VkDeviceMemory we allocate will always be a multple of the max possible alignment
199 // (currently 256). Second all sub allocations are alignmed with an offset of 256. And
200 // finally the allocator we are using always maps the entire VkDeviceMemory so the range
201 // we'll be flushing/invalidating will be mapped. So our new fake allocation size will
202 // always fit into the VkDeviceMemory, will never push it into another suballocation, and
203 // will always be mapped when map is called.
204 const VkPhysicalDeviceProperties* devProps;
205 vmaGetPhysicalDeviceProperties(fAllocator, &devProps);
206 VkDeviceSize alignment = devProps->limits.nonCoherentAtomSize;
207
208 alloc->fSize = (alloc->fSize + alignment - 1) & ~(alignment -1);
209 }
210 }
211
mapMemory(const GrVkBackendMemory & memoryHandle)212 void* GrVkAMDMemoryAllocator::mapMemory(const GrVkBackendMemory& memoryHandle) {
213 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
214 const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
215 void* mapPtr;
216 vmaMapMemory(fAllocator, allocation, &mapPtr);
217 return mapPtr;
218 }
219
unmapMemory(const GrVkBackendMemory & memoryHandle)220 void GrVkAMDMemoryAllocator::unmapMemory(const GrVkBackendMemory& memoryHandle) {
221 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
222 const VmaAllocation allocation = (const VmaAllocation)memoryHandle;
223 vmaUnmapMemory(fAllocator, allocation);
224 }
225
flushMappedMemory(const GrVkBackendMemory & memoryHandle,VkDeviceSize offset,VkDeviceSize size)226 void GrVkAMDMemoryAllocator::flushMappedMemory(const GrVkBackendMemory& memoryHandle,
227 VkDeviceSize offset, VkDeviceSize size) {
228 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
229 GrVkAlloc info;
230 this->getAllocInfo(memoryHandle, &info);
231
232 if (GrVkAlloc::kNoncoherent_Flag & info.fFlags) {
233 // We need to store the nonCoherentAtomSize for non-coherent flush/invalidate alignment.
234 const VkPhysicalDeviceProperties* physDevProps;
235 vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps);
236 VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize;
237
238 VkMappedMemoryRange mappedMemoryRange;
239 GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment,
240 &mappedMemoryRange);
241 GR_VK_CALL(fInterface, FlushMappedMemoryRanges(fDevice, 1, &mappedMemoryRange));
242 }
243 }
244
invalidateMappedMemory(const GrVkBackendMemory & memoryHandle,VkDeviceSize offset,VkDeviceSize size)245 void GrVkAMDMemoryAllocator::invalidateMappedMemory(const GrVkBackendMemory& memoryHandle,
246 VkDeviceSize offset, VkDeviceSize size) {
247 TRACE_EVENT0("skia.gpu", TRACE_FUNC);
248 GrVkAlloc info;
249 this->getAllocInfo(memoryHandle, &info);
250
251 if (GrVkAlloc::kNoncoherent_Flag & info.fFlags) {
252 // We need to store the nonCoherentAtomSize for non-coherent flush/invalidate alignment.
253 const VkPhysicalDeviceProperties* physDevProps;
254 vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps);
255 VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize;
256
257 VkMappedMemoryRange mappedMemoryRange;
258 GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment,
259 &mappedMemoryRange);
260 GR_VK_CALL(fInterface, InvalidateMappedMemoryRanges(fDevice, 1, &mappedMemoryRange));
261 }
262 }
263
totalUsedMemory() const264 uint64_t GrVkAMDMemoryAllocator::totalUsedMemory() const {
265 VmaStats stats;
266 vmaCalculateStats(fAllocator, &stats);
267 return stats.total.usedBytes;
268 }
269
totalAllocatedMemory() const270 uint64_t GrVkAMDMemoryAllocator::totalAllocatedMemory() const {
271 VmaStats stats;
272 vmaCalculateStats(fAllocator, &stats);
273 return stats.total.usedBytes + stats.total.unusedBytes;
274 }
275
276