1 /*
2 * Copyright 2015 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "GrVkMemory.h"
9
10 #include "GrVkGpu.h"
11 #include "GrVkUtil.h"
12
13 #ifdef SK_DEBUG
14 // for simple tracking of how much we're using in each heap
15 // last counter is for non-subheap allocations
16 VkDeviceSize gHeapUsage[VK_MAX_MEMORY_HEAPS+1] = { 0 };
17 #endif
18
get_valid_memory_type_index(const VkPhysicalDeviceMemoryProperties & physDevMemProps,uint32_t typeBits,VkMemoryPropertyFlags requestedMemFlags,uint32_t * typeIndex,uint32_t * heapIndex)19 static bool get_valid_memory_type_index(const VkPhysicalDeviceMemoryProperties& physDevMemProps,
20 uint32_t typeBits,
21 VkMemoryPropertyFlags requestedMemFlags,
22 uint32_t* typeIndex,
23 uint32_t* heapIndex) {
24 for (uint32_t i = 0; i < physDevMemProps.memoryTypeCount; ++i) {
25 if (typeBits & (1 << i)) {
26 uint32_t supportedFlags = physDevMemProps.memoryTypes[i].propertyFlags &
27 requestedMemFlags;
28 if (supportedFlags == requestedMemFlags) {
29 *typeIndex = i;
30 *heapIndex = physDevMemProps.memoryTypes[i].heapIndex;
31 return true;
32 }
33 }
34 }
35 return false;
36 }
37
buffer_type_to_heap(GrVkBuffer::Type type)38 static GrVkGpu::Heap buffer_type_to_heap(GrVkBuffer::Type type) {
39 const GrVkGpu::Heap kBufferToHeap[]{
40 GrVkGpu::kVertexBuffer_Heap,
41 GrVkGpu::kIndexBuffer_Heap,
42 GrVkGpu::kUniformBuffer_Heap,
43 GrVkGpu::kTexelBuffer_Heap,
44 GrVkGpu::kCopyReadBuffer_Heap,
45 GrVkGpu::kCopyWriteBuffer_Heap,
46 };
47 GR_STATIC_ASSERT(0 == GrVkBuffer::kVertex_Type);
48 GR_STATIC_ASSERT(1 == GrVkBuffer::kIndex_Type);
49 GR_STATIC_ASSERT(2 == GrVkBuffer::kUniform_Type);
50 GR_STATIC_ASSERT(3 == GrVkBuffer::kTexel_Type);
51 GR_STATIC_ASSERT(4 == GrVkBuffer::kCopyRead_Type);
52 GR_STATIC_ASSERT(5 == GrVkBuffer::kCopyWrite_Type);
53
54 return kBufferToHeap[type];
55 }
56
AllocAndBindBufferMemory(const GrVkGpu * gpu,VkBuffer buffer,GrVkBuffer::Type type,bool dynamic,GrVkAlloc * alloc)57 bool GrVkMemory::AllocAndBindBufferMemory(const GrVkGpu* gpu,
58 VkBuffer buffer,
59 GrVkBuffer::Type type,
60 bool dynamic,
61 GrVkAlloc* alloc) {
62 const GrVkInterface* iface = gpu->vkInterface();
63 VkDevice device = gpu->device();
64
65 VkMemoryRequirements memReqs;
66 GR_VK_CALL(iface, GetBufferMemoryRequirements(device, buffer, &memReqs));
67
68 uint32_t typeIndex = 0;
69 uint32_t heapIndex = 0;
70 const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties();
71 const VkPhysicalDeviceProperties& phDevProps = gpu->physicalDeviceProperties();
72 if (dynamic) {
73 // try to get cached and ideally non-coherent memory first
74 if (!get_valid_memory_type_index(phDevMemProps,
75 memReqs.memoryTypeBits,
76 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
77 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
78 &typeIndex,
79 &heapIndex)) {
80 // some sort of host-visible memory type should always be available for dynamic buffers
81 SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
82 memReqs.memoryTypeBits,
83 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
84 &typeIndex,
85 &heapIndex));
86 }
87
88 VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags;
89 alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0
90 : GrVkAlloc::kNoncoherent_Flag;
91 if (SkToBool(alloc->fFlags & GrVkAlloc::kNoncoherent_Flag)) {
92 SkASSERT(SkIsPow2(memReqs.alignment));
93 SkASSERT(SkIsPow2(phDevProps.limits.nonCoherentAtomSize));
94 memReqs.alignment = SkTMax(memReqs.alignment, phDevProps.limits.nonCoherentAtomSize);
95 }
96 } else {
97 // device-local memory should always be available for static buffers
98 SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
99 memReqs.memoryTypeBits,
100 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
101 &typeIndex,
102 &heapIndex));
103 alloc->fFlags = 0x0;
104 }
105
106 GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type));
107
108 if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
109 // if static, try to allocate from non-host-visible non-device-local memory instead
110 if (dynamic ||
111 !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits,
112 0, &typeIndex, &heapIndex) ||
113 !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
114 SkDebugf("Failed to alloc buffer\n");
115 return false;
116 }
117 }
118
119 // Bind buffer
120 VkResult err = GR_VK_CALL(iface, BindBufferMemory(device, buffer,
121 alloc->fMemory, alloc->fOffset));
122 if (err) {
123 SkASSERT_RELEASE(heap->free(*alloc));
124 return false;
125 }
126
127 return true;
128 }
129
FreeBufferMemory(const GrVkGpu * gpu,GrVkBuffer::Type type,const GrVkAlloc & alloc)130 void GrVkMemory::FreeBufferMemory(const GrVkGpu* gpu, GrVkBuffer::Type type,
131 const GrVkAlloc& alloc) {
132
133 GrVkHeap* heap = gpu->getHeap(buffer_type_to_heap(type));
134 SkASSERT_RELEASE(heap->free(alloc));
135 }
136
137 // for debugging
138 static uint64_t gTotalImageMemory = 0;
139 static uint64_t gTotalImageMemoryFullPage = 0;
140
141 const VkDeviceSize kMaxSmallImageSize = 16 * 1024;
142 const VkDeviceSize kMinVulkanPageSize = 16 * 1024;
143
align_size(VkDeviceSize size,VkDeviceSize alignment)144 static VkDeviceSize align_size(VkDeviceSize size, VkDeviceSize alignment) {
145 return (size + alignment - 1) & ~(alignment - 1);
146 }
147
AllocAndBindImageMemory(const GrVkGpu * gpu,VkImage image,bool linearTiling,GrVkAlloc * alloc)148 bool GrVkMemory::AllocAndBindImageMemory(const GrVkGpu* gpu,
149 VkImage image,
150 bool linearTiling,
151 GrVkAlloc* alloc) {
152 const GrVkInterface* iface = gpu->vkInterface();
153 VkDevice device = gpu->device();
154
155 VkMemoryRequirements memReqs;
156 GR_VK_CALL(iface, GetImageMemoryRequirements(device, image, &memReqs));
157
158 uint32_t typeIndex = 0;
159 uint32_t heapIndex = 0;
160 GrVkHeap* heap;
161 const VkPhysicalDeviceMemoryProperties& phDevMemProps = gpu->physicalDeviceMemoryProperties();
162 const VkPhysicalDeviceProperties& phDevProps = gpu->physicalDeviceProperties();
163 if (linearTiling) {
164 VkMemoryPropertyFlags desiredMemProps = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
165 VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
166 if (!get_valid_memory_type_index(phDevMemProps,
167 memReqs.memoryTypeBits,
168 desiredMemProps,
169 &typeIndex,
170 &heapIndex)) {
171 // some sort of host-visible memory type should always be available
172 SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
173 memReqs.memoryTypeBits,
174 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
175 &typeIndex,
176 &heapIndex));
177 }
178 heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap);
179 VkMemoryPropertyFlags mpf = phDevMemProps.memoryTypes[typeIndex].propertyFlags;
180 alloc->fFlags = mpf & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT ? 0x0
181 : GrVkAlloc::kNoncoherent_Flag;
182 if (SkToBool(alloc->fFlags & GrVkAlloc::kNoncoherent_Flag)) {
183 SkASSERT(SkIsPow2(memReqs.alignment));
184 SkASSERT(SkIsPow2(phDevProps.limits.nonCoherentAtomSize));
185 memReqs.alignment = SkTMax(memReqs.alignment, phDevProps.limits.nonCoherentAtomSize);
186 }
187 } else {
188 // this memory type should always be available
189 SkASSERT_RELEASE(get_valid_memory_type_index(phDevMemProps,
190 memReqs.memoryTypeBits,
191 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
192 &typeIndex,
193 &heapIndex));
194 if (memReqs.size <= kMaxSmallImageSize) {
195 heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap);
196 } else {
197 heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap);
198 }
199 alloc->fFlags = 0x0;
200 }
201
202 if (!heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
203 // if optimal, try to allocate from non-host-visible non-device-local memory instead
204 if (linearTiling ||
205 !get_valid_memory_type_index(phDevMemProps, memReqs.memoryTypeBits,
206 0, &typeIndex, &heapIndex) ||
207 !heap->alloc(memReqs.size, memReqs.alignment, typeIndex, heapIndex, alloc)) {
208 SkDebugf("Failed to alloc image\n");
209 return false;
210 }
211 }
212
213 // Bind image
214 VkResult err = GR_VK_CALL(iface, BindImageMemory(device, image,
215 alloc->fMemory, alloc->fOffset));
216 if (err) {
217 SkASSERT_RELEASE(heap->free(*alloc));
218 return false;
219 }
220
221 gTotalImageMemory += alloc->fSize;
222
223 VkDeviceSize pageAlignedSize = align_size(alloc->fSize, kMinVulkanPageSize);
224 gTotalImageMemoryFullPage += pageAlignedSize;
225
226 return true;
227 }
228
FreeImageMemory(const GrVkGpu * gpu,bool linearTiling,const GrVkAlloc & alloc)229 void GrVkMemory::FreeImageMemory(const GrVkGpu* gpu, bool linearTiling,
230 const GrVkAlloc& alloc) {
231 GrVkHeap* heap;
232 if (linearTiling) {
233 heap = gpu->getHeap(GrVkGpu::kLinearImage_Heap);
234 } else if (alloc.fSize <= kMaxSmallImageSize) {
235 heap = gpu->getHeap(GrVkGpu::kSmallOptimalImage_Heap);
236 } else {
237 heap = gpu->getHeap(GrVkGpu::kOptimalImage_Heap);
238 }
239 if (!heap->free(alloc)) {
240 // must be an adopted allocation
241 GR_VK_CALL(gpu->vkInterface(), FreeMemory(gpu->device(), alloc.fMemory, nullptr));
242 } else {
243 gTotalImageMemory -= alloc.fSize;
244 VkDeviceSize pageAlignedSize = align_size(alloc.fSize, kMinVulkanPageSize);
245 gTotalImageMemoryFullPage -= pageAlignedSize;
246 }
247 }
248
LayoutToPipelineStageFlags(const VkImageLayout layout)249 VkPipelineStageFlags GrVkMemory::LayoutToPipelineStageFlags(const VkImageLayout layout) {
250 if (VK_IMAGE_LAYOUT_GENERAL == layout) {
251 return VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
252 } else if (VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL == layout ||
253 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL == layout) {
254 return VK_PIPELINE_STAGE_TRANSFER_BIT;
255 } else if (VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL == layout ||
256 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL == layout ||
257 VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL == layout ||
258 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL == layout) {
259 return VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT;
260 } else if (VK_IMAGE_LAYOUT_PREINITIALIZED == layout) {
261 return VK_PIPELINE_STAGE_HOST_BIT;
262 }
263
264 SkASSERT(VK_IMAGE_LAYOUT_UNDEFINED == layout);
265 return VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
266 }
267
LayoutToSrcAccessMask(const VkImageLayout layout)268 VkAccessFlags GrVkMemory::LayoutToSrcAccessMask(const VkImageLayout layout) {
269 // Currently we assume we will never being doing any explict shader writes (this doesn't include
270 // color attachment or depth/stencil writes). So we will ignore the
271 // VK_MEMORY_OUTPUT_SHADER_WRITE_BIT.
272
273 // We can only directly access the host memory if we are in preinitialized or general layout,
274 // and the image is linear.
275 // TODO: Add check for linear here so we are not always adding host to general, and we should
276 // only be in preinitialized if we are linear
277 VkAccessFlags flags = 0;;
278 if (VK_IMAGE_LAYOUT_GENERAL == layout) {
279 flags = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
280 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
281 VK_ACCESS_TRANSFER_WRITE_BIT |
282 VK_ACCESS_TRANSFER_READ_BIT |
283 VK_ACCESS_SHADER_READ_BIT |
284 VK_ACCESS_HOST_WRITE_BIT | VK_ACCESS_HOST_READ_BIT;
285 } else if (VK_IMAGE_LAYOUT_PREINITIALIZED == layout) {
286 flags = VK_ACCESS_HOST_WRITE_BIT;
287 } else if (VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL == layout) {
288 flags = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
289 } else if (VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL == layout) {
290 flags = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
291 } else if (VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL == layout) {
292 flags = VK_ACCESS_TRANSFER_WRITE_BIT;
293 } else if (VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL == layout) {
294 flags = VK_ACCESS_TRANSFER_READ_BIT;
295 } else if (VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL == layout) {
296 flags = VK_ACCESS_SHADER_READ_BIT;
297 }
298 return flags;
299 }
300
FlushMappedAlloc(const GrVkGpu * gpu,const GrVkAlloc & alloc,VkDeviceSize offset,VkDeviceSize size)301 void GrVkMemory::FlushMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc, VkDeviceSize offset,
302 VkDeviceSize size) {
303 if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) {
304 #ifdef SK_DEBUG
305 SkASSERT(offset >= alloc.fOffset);
306 VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
307 SkASSERT(0 == (offset & (alignment-1)));
308 if (size != VK_WHOLE_SIZE) {
309 SkASSERT(size > 0);
310 SkASSERT(0 == (size & (alignment-1)) ||
311 (offset + size) == (alloc.fOffset + alloc.fSize));
312 SkASSERT(offset + size <= alloc.fOffset + alloc.fSize);
313 }
314 #endif
315
316 VkMappedMemoryRange mappedMemoryRange;
317 memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange));
318 mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
319 mappedMemoryRange.memory = alloc.fMemory;
320 mappedMemoryRange.offset = offset;
321 mappedMemoryRange.size = size;
322 GR_VK_CALL(gpu->vkInterface(), FlushMappedMemoryRanges(gpu->device(),
323 1, &mappedMemoryRange));
324 }
325 }
326
InvalidateMappedAlloc(const GrVkGpu * gpu,const GrVkAlloc & alloc,VkDeviceSize offset,VkDeviceSize size)327 void GrVkMemory::InvalidateMappedAlloc(const GrVkGpu* gpu, const GrVkAlloc& alloc,
328 VkDeviceSize offset, VkDeviceSize size) {
329 if (alloc.fFlags & GrVkAlloc::kNoncoherent_Flag) {
330 #ifdef SK_DEBUG
331 SkASSERT(offset >= alloc.fOffset);
332 VkDeviceSize alignment = gpu->physicalDeviceProperties().limits.nonCoherentAtomSize;
333 SkASSERT(0 == (offset & (alignment-1)));
334 if (size != VK_WHOLE_SIZE) {
335 SkASSERT(size > 0);
336 SkASSERT(0 == (size & (alignment-1)) ||
337 (offset + size) == (alloc.fOffset + alloc.fSize));
338 SkASSERT(offset + size <= alloc.fOffset + alloc.fSize);
339 }
340 #endif
341
342 VkMappedMemoryRange mappedMemoryRange;
343 memset(&mappedMemoryRange, 0, sizeof(VkMappedMemoryRange));
344 mappedMemoryRange.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
345 mappedMemoryRange.memory = alloc.fMemory;
346 mappedMemoryRange.offset = offset;
347 mappedMemoryRange.size = size;
348 GR_VK_CALL(gpu->vkInterface(), InvalidateMappedMemoryRanges(gpu->device(),
349 1, &mappedMemoryRange));
350 }
351 }
352
alloc(VkDeviceSize requestedSize,VkDeviceSize * allocOffset,VkDeviceSize * allocSize)353 bool GrVkFreeListAlloc::alloc(VkDeviceSize requestedSize,
354 VkDeviceSize* allocOffset, VkDeviceSize* allocSize) {
355 VkDeviceSize alignedSize = align_size(requestedSize, fAlignment);
356
357 // find the smallest block big enough for our allocation
358 FreeList::Iter iter = fFreeList.headIter();
359 FreeList::Iter bestFitIter;
360 VkDeviceSize bestFitSize = fSize + 1;
361 VkDeviceSize secondLargestSize = 0;
362 VkDeviceSize secondLargestOffset = 0;
363 while (iter.get()) {
364 Block* block = iter.get();
365 // need to adjust size to match desired alignment
366 SkASSERT(align_size(block->fOffset, fAlignment) - block->fOffset == 0);
367 if (block->fSize >= alignedSize && block->fSize < bestFitSize) {
368 bestFitIter = iter;
369 bestFitSize = block->fSize;
370 }
371 if (secondLargestSize < block->fSize && block->fOffset != fLargestBlockOffset) {
372 secondLargestSize = block->fSize;
373 secondLargestOffset = block->fOffset;
374 }
375 iter.next();
376 }
377 SkASSERT(secondLargestSize <= fLargestBlockSize);
378
379 Block* bestFit = bestFitIter.get();
380 if (bestFit) {
381 SkASSERT(align_size(bestFit->fOffset, fAlignment) == bestFit->fOffset);
382 *allocOffset = bestFit->fOffset;
383 *allocSize = alignedSize;
384 // adjust or remove current block
385 VkDeviceSize originalBestFitOffset = bestFit->fOffset;
386 if (bestFit->fSize > alignedSize) {
387 bestFit->fOffset += alignedSize;
388 bestFit->fSize -= alignedSize;
389 if (fLargestBlockOffset == originalBestFitOffset) {
390 if (bestFit->fSize >= secondLargestSize) {
391 fLargestBlockSize = bestFit->fSize;
392 fLargestBlockOffset = bestFit->fOffset;
393 } else {
394 fLargestBlockSize = secondLargestSize;
395 fLargestBlockOffset = secondLargestOffset;
396 }
397 }
398 #ifdef SK_DEBUG
399 VkDeviceSize largestSize = 0;
400 iter = fFreeList.headIter();
401 while (iter.get()) {
402 Block* block = iter.get();
403 if (largestSize < block->fSize) {
404 largestSize = block->fSize;
405 }
406 iter.next();
407 }
408 SkASSERT(largestSize == fLargestBlockSize);
409 #endif
410 } else {
411 SkASSERT(bestFit->fSize == alignedSize);
412 if (fLargestBlockOffset == originalBestFitOffset) {
413 fLargestBlockSize = secondLargestSize;
414 fLargestBlockOffset = secondLargestOffset;
415 }
416 fFreeList.remove(bestFit);
417 #ifdef SK_DEBUG
418 VkDeviceSize largestSize = 0;
419 iter = fFreeList.headIter();
420 while (iter.get()) {
421 Block* block = iter.get();
422 if (largestSize < block->fSize) {
423 largestSize = block->fSize;
424 }
425 iter.next();
426 }
427 SkASSERT(largestSize == fLargestBlockSize);
428 #endif
429 }
430 fFreeSize -= alignedSize;
431 SkASSERT(*allocSize > 0);
432
433 return true;
434 }
435
436 SkDebugf("Can't allocate %d bytes, %d bytes available, largest free block %d\n", alignedSize, fFreeSize, fLargestBlockSize);
437
438 return false;
439 }
440
free(VkDeviceSize allocOffset,VkDeviceSize allocSize)441 void GrVkFreeListAlloc::free(VkDeviceSize allocOffset, VkDeviceSize allocSize) {
442 // find the block right after this allocation
443 FreeList::Iter iter = fFreeList.headIter();
444 FreeList::Iter prev;
445 while (iter.get() && iter.get()->fOffset < allocOffset) {
446 prev = iter;
447 iter.next();
448 }
449 // we have four cases:
450 // we exactly follow the previous one
451 Block* block;
452 if (prev.get() && prev.get()->fOffset + prev.get()->fSize == allocOffset) {
453 block = prev.get();
454 block->fSize += allocSize;
455 if (block->fOffset == fLargestBlockOffset) {
456 fLargestBlockSize = block->fSize;
457 }
458 // and additionally we may exactly precede the next one
459 if (iter.get() && iter.get()->fOffset == allocOffset + allocSize) {
460 block->fSize += iter.get()->fSize;
461 if (iter.get()->fOffset == fLargestBlockOffset) {
462 fLargestBlockOffset = block->fOffset;
463 fLargestBlockSize = block->fSize;
464 }
465 fFreeList.remove(iter.get());
466 }
467 // or we only exactly proceed the next one
468 } else if (iter.get() && iter.get()->fOffset == allocOffset + allocSize) {
469 block = iter.get();
470 block->fSize += allocSize;
471 if (block->fOffset == fLargestBlockOffset) {
472 fLargestBlockOffset = allocOffset;
473 fLargestBlockSize = block->fSize;
474 }
475 block->fOffset = allocOffset;
476 // or we fall somewhere in between, with gaps
477 } else {
478 block = fFreeList.addBefore(iter);
479 block->fOffset = allocOffset;
480 block->fSize = allocSize;
481 }
482 fFreeSize += allocSize;
483 if (block->fSize > fLargestBlockSize) {
484 fLargestBlockSize = block->fSize;
485 fLargestBlockOffset = block->fOffset;
486 }
487
488 #ifdef SK_DEBUG
489 VkDeviceSize largestSize = 0;
490 iter = fFreeList.headIter();
491 while (iter.get()) {
492 Block* block = iter.get();
493 if (largestSize < block->fSize) {
494 largestSize = block->fSize;
495 }
496 iter.next();
497 }
498 SkASSERT(fLargestBlockSize == largestSize);
499 #endif
500 }
501
GrVkSubHeap(const GrVkGpu * gpu,uint32_t memoryTypeIndex,uint32_t heapIndex,VkDeviceSize size,VkDeviceSize alignment)502 GrVkSubHeap::GrVkSubHeap(const GrVkGpu* gpu, uint32_t memoryTypeIndex, uint32_t heapIndex,
503 VkDeviceSize size, VkDeviceSize alignment)
504 : INHERITED(size, alignment)
505 , fGpu(gpu)
506 #ifdef SK_DEBUG
507 , fHeapIndex(heapIndex)
508 #endif
509 , fMemoryTypeIndex(memoryTypeIndex) {
510
511 VkMemoryAllocateInfo allocInfo = {
512 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType
513 nullptr, // pNext
514 size, // allocationSize
515 memoryTypeIndex, // memoryTypeIndex
516 };
517
518 VkResult err = GR_VK_CALL(gpu->vkInterface(), AllocateMemory(gpu->device(),
519 &allocInfo,
520 nullptr,
521 &fAlloc));
522 if (VK_SUCCESS != err) {
523 this->reset();
524 }
525 #ifdef SK_DEBUG
526 else {
527 gHeapUsage[heapIndex] += size;
528 }
529 #endif
530 }
531
~GrVkSubHeap()532 GrVkSubHeap::~GrVkSubHeap() {
533 const GrVkInterface* iface = fGpu->vkInterface();
534 GR_VK_CALL(iface, FreeMemory(fGpu->device(), fAlloc, nullptr));
535 #ifdef SK_DEBUG
536 gHeapUsage[fHeapIndex] -= fSize;
537 #endif
538 }
539
alloc(VkDeviceSize size,GrVkAlloc * alloc)540 bool GrVkSubHeap::alloc(VkDeviceSize size, GrVkAlloc* alloc) {
541 alloc->fMemory = fAlloc;
542 return INHERITED::alloc(size, &alloc->fOffset, &alloc->fSize);
543 }
544
free(const GrVkAlloc & alloc)545 void GrVkSubHeap::free(const GrVkAlloc& alloc) {
546 SkASSERT(alloc.fMemory == fAlloc);
547
548 INHERITED::free(alloc.fOffset, alloc.fSize);
549 }
550
subAlloc(VkDeviceSize size,VkDeviceSize alignment,uint32_t memoryTypeIndex,uint32_t heapIndex,GrVkAlloc * alloc)551 bool GrVkHeap::subAlloc(VkDeviceSize size, VkDeviceSize alignment,
552 uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) {
553 VkDeviceSize alignedSize = align_size(size, alignment);
554
555 // if requested is larger than our subheap allocation, just alloc directly
556 if (alignedSize > fSubHeapSize) {
557 VkMemoryAllocateInfo allocInfo = {
558 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // sType
559 nullptr, // pNext
560 alignedSize, // allocationSize
561 memoryTypeIndex, // memoryTypeIndex
562 };
563
564 VkResult err = GR_VK_CALL(fGpu->vkInterface(), AllocateMemory(fGpu->device(),
565 &allocInfo,
566 nullptr,
567 &alloc->fMemory));
568 if (VK_SUCCESS != err) {
569 return false;
570 }
571 alloc->fOffset = 0;
572 alloc->fSize = alignedSize;
573 alloc->fUsesSystemHeap = true;
574 #ifdef SK_DEBUG
575 gHeapUsage[VK_MAX_MEMORY_HEAPS] += alignedSize;
576 #endif
577
578 return true;
579 }
580
581 // first try to find a subheap that fits our allocation request
582 int bestFitIndex = -1;
583 VkDeviceSize bestFitSize = 0x7FFFFFFF;
584 for (auto i = 0; i < fSubHeaps.count(); ++i) {
585 if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex &&
586 fSubHeaps[i]->alignment() == alignment) {
587 VkDeviceSize heapSize = fSubHeaps[i]->largestBlockSize();
588 if (heapSize >= alignedSize && heapSize < bestFitSize) {
589 bestFitIndex = i;
590 bestFitSize = heapSize;
591 }
592 }
593 }
594
595 if (bestFitIndex >= 0) {
596 SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment);
597 if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) {
598 fUsedSize += alloc->fSize;
599 return true;
600 }
601 return false;
602 }
603
604 // need to allocate a new subheap
605 std::unique_ptr<GrVkSubHeap>& subHeap = fSubHeaps.push_back();
606 subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, fSubHeapSize, alignment));
607 // try to recover from failed allocation by only allocating what we need
608 if (subHeap->size() == 0) {
609 VkDeviceSize alignedSize = align_size(size, alignment);
610 subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment));
611 if (subHeap->size() == 0) {
612 return false;
613 }
614 }
615 fAllocSize += fSubHeapSize;
616 if (subHeap->alloc(size, alloc)) {
617 fUsedSize += alloc->fSize;
618 return true;
619 }
620
621 return false;
622 }
623
singleAlloc(VkDeviceSize size,VkDeviceSize alignment,uint32_t memoryTypeIndex,uint32_t heapIndex,GrVkAlloc * alloc)624 bool GrVkHeap::singleAlloc(VkDeviceSize size, VkDeviceSize alignment,
625 uint32_t memoryTypeIndex, uint32_t heapIndex, GrVkAlloc* alloc) {
626 VkDeviceSize alignedSize = align_size(size, alignment);
627
628 // first try to find an unallocated subheap that fits our allocation request
629 int bestFitIndex = -1;
630 VkDeviceSize bestFitSize = 0x7FFFFFFF;
631 for (auto i = 0; i < fSubHeaps.count(); ++i) {
632 if (fSubHeaps[i]->memoryTypeIndex() == memoryTypeIndex &&
633 fSubHeaps[i]->alignment() == alignment &&
634 fSubHeaps[i]->unallocated()) {
635 VkDeviceSize heapSize = fSubHeaps[i]->size();
636 if (heapSize >= alignedSize && heapSize < bestFitSize) {
637 bestFitIndex = i;
638 bestFitSize = heapSize;
639 }
640 }
641 }
642
643 if (bestFitIndex >= 0) {
644 SkASSERT(fSubHeaps[bestFitIndex]->alignment() == alignment);
645 if (fSubHeaps[bestFitIndex]->alloc(size, alloc)) {
646 fUsedSize += alloc->fSize;
647 return true;
648 }
649 return false;
650 }
651
652 // need to allocate a new subheap
653 std::unique_ptr<GrVkSubHeap>& subHeap = fSubHeaps.push_back();
654 subHeap.reset(new GrVkSubHeap(fGpu, memoryTypeIndex, heapIndex, alignedSize, alignment));
655 fAllocSize += alignedSize;
656 if (subHeap->alloc(size, alloc)) {
657 fUsedSize += alloc->fSize;
658 return true;
659 }
660
661 return false;
662 }
663
free(const GrVkAlloc & alloc)664 bool GrVkHeap::free(const GrVkAlloc& alloc) {
665 // a size of 0 means we're using the system heap
666 if (alloc.fUsesSystemHeap) {
667 const GrVkInterface* iface = fGpu->vkInterface();
668 GR_VK_CALL(iface, FreeMemory(fGpu->device(), alloc.fMemory, nullptr));
669 return true;
670 }
671
672 for (auto i = 0; i < fSubHeaps.count(); ++i) {
673 if (fSubHeaps[i]->memory() == alloc.fMemory) {
674 fSubHeaps[i]->free(alloc);
675 fUsedSize -= alloc.fSize;
676 return true;
677 }
678 }
679
680 return false;
681 }
682
683
684