/* * Copyright 2018 Google Inc. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ #include "GrVkAMDMemoryAllocator.h" #include "GrVkInterface.h" #include "GrVkMemory.h" #include "GrVkUtil.h" GrVkAMDMemoryAllocator::GrVkAMDMemoryAllocator(VkPhysicalDevice physicalDevice, VkDevice device, sk_sp interface) : fAllocator(VK_NULL_HANDLE) , fInterface(std::move(interface)) , fDevice(device) { #define GR_COPY_FUNCTION(NAME) functions.vk##NAME = fInterface->fFunctions.f##NAME VmaVulkanFunctions functions; GR_COPY_FUNCTION(GetPhysicalDeviceProperties); GR_COPY_FUNCTION(GetPhysicalDeviceMemoryProperties); GR_COPY_FUNCTION(AllocateMemory); GR_COPY_FUNCTION(FreeMemory); GR_COPY_FUNCTION(MapMemory); GR_COPY_FUNCTION(UnmapMemory); GR_COPY_FUNCTION(BindBufferMemory); GR_COPY_FUNCTION(BindImageMemory); GR_COPY_FUNCTION(GetBufferMemoryRequirements); GR_COPY_FUNCTION(GetImageMemoryRequirements); GR_COPY_FUNCTION(CreateBuffer); GR_COPY_FUNCTION(DestroyBuffer); GR_COPY_FUNCTION(CreateImage); GR_COPY_FUNCTION(DestroyImage); // Skia current doesn't support VK_KHR_dedicated_allocation functions.vkGetBufferMemoryRequirements2KHR = nullptr; functions.vkGetImageMemoryRequirements2KHR = nullptr; VmaAllocatorCreateInfo info; info.flags = 0; info.physicalDevice = physicalDevice; info.device = device; // Manually testing runs of dm using 64 here instead of the default 256 shows less memory usage // on average. Also dm seems to run faster using 64 so it doesn't seem to be trading off speed // for memory. info.preferredLargeHeapBlockSize = 64*1024*1024; info.pAllocationCallbacks = nullptr; info.pDeviceMemoryCallbacks = nullptr; info.frameInUseCount = 0; info.pHeapSizeLimit = nullptr; info.pVulkanFunctions = &functions; vmaCreateAllocator(&info, &fAllocator); } GrVkAMDMemoryAllocator::~GrVkAMDMemoryAllocator() { vmaDestroyAllocator(fAllocator); fAllocator = VK_NULL_HANDLE; } bool GrVkAMDMemoryAllocator::allocateMemoryForImage(VkImage image, AllocationPropertyFlags flags, GrVkBackendMemory* backendMemory) { VmaAllocationCreateInfo info; info.flags = 0; info.usage = VMA_MEMORY_USAGE_UNKNOWN; info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; info.preferredFlags = 0; info.memoryTypeBits = 0; info.pool = VK_NULL_HANDLE; info.pUserData = nullptr; if (AllocationPropertyFlags::kDedicatedAllocation & flags) { info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; } if (AllocationPropertyFlags::kLazyAllocation & flags) { info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; } VmaAllocation allocation; VkResult result = vmaAllocateMemoryForImage(fAllocator, image, &info, &allocation, nullptr); if (VK_SUCCESS != result) { return false; } *backendMemory = (GrVkBackendMemory)allocation; return true; } bool GrVkAMDMemoryAllocator::allocateMemoryForBuffer(VkBuffer buffer, BufferUsage usage, AllocationPropertyFlags flags, GrVkBackendMemory* backendMemory) { VmaAllocationCreateInfo info; info.flags = 0; info.usage = VMA_MEMORY_USAGE_UNKNOWN; info.memoryTypeBits = 0; info.pool = VK_NULL_HANDLE; info.pUserData = nullptr; switch (usage) { case BufferUsage::kGpuOnly: info.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; info.preferredFlags = 0; break; case BufferUsage::kCpuOnly: info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; info.preferredFlags = VK_MEMORY_PROPERTY_HOST_CACHED_BIT; break; case BufferUsage::kCpuWritesGpuReads: // First attempt to try memory is also cached info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; break; case BufferUsage::kGpuWritesCpuReads: info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; info.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT; break; } if (AllocationPropertyFlags::kDedicatedAllocation & flags) { info.flags |= VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT; } if ((AllocationPropertyFlags::kLazyAllocation & flags) && BufferUsage::kGpuOnly == usage) { info.preferredFlags |= VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT; } if (AllocationPropertyFlags::kPersistentlyMapped & flags) { SkASSERT(BufferUsage::kGpuOnly != usage); info.flags |= VMA_ALLOCATION_CREATE_MAPPED_BIT; } VmaAllocation allocation; VkResult result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr); if (VK_SUCCESS != result) { if (usage == BufferUsage::kCpuWritesGpuReads) { // We try again but this time drop the requirement for cached info.requiredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT; result = vmaAllocateMemoryForBuffer(fAllocator, buffer, &info, &allocation, nullptr); } } if (VK_SUCCESS != result) { return false; } *backendMemory = (GrVkBackendMemory)allocation; return true; } void GrVkAMDMemoryAllocator::freeMemory(const GrVkBackendMemory& memoryHandle) { const VmaAllocation allocation = (const VmaAllocation)memoryHandle; vmaFreeMemory(fAllocator, allocation); } void GrVkAMDMemoryAllocator::getAllocInfo(const GrVkBackendMemory& memoryHandle, GrVkAlloc* alloc) const { const VmaAllocation allocation = (const VmaAllocation)memoryHandle; VmaAllocationInfo vmaInfo; vmaGetAllocationInfo(fAllocator, allocation, &vmaInfo); VkMemoryPropertyFlags memFlags; vmaGetMemoryTypeProperties(fAllocator, vmaInfo.memoryType, &memFlags); uint32_t flags = 0; if (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT & memFlags) { flags |= GrVkAlloc::kMappable_Flag; } if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) { flags |= GrVkAlloc::kNoncoherent_Flag; } alloc->fMemory = vmaInfo.deviceMemory; alloc->fOffset = vmaInfo.offset; alloc->fSize = vmaInfo.size; alloc->fFlags = flags; alloc->fBackendMemory = memoryHandle; // TODO: Remove this hack once the AMD allocator is able to handle the alignment of noncoherent // memory itself. if (!SkToBool(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT & memFlags)) { // This is a hack to say that the allocation size is actually larger than it is. This is to // make sure when we are flushing and invalidating noncoherent memory we have a size that is // aligned to the nonCoherentAtomSize. This is safe for three reasons. First the total size // of the VkDeviceMemory we allocate will always be a multple of the max possible alignment // (currently 256). Second all sub allocations are alignmed with an offset of 256. And // finally the allocator we are using always maps the entire VkDeviceMemory so the range // we'll be flushing/invalidating will be mapped. So our new fake allocation size will // always fit into the VkDeviceMemory, will never push it into another suballocation, and // will always be mapped when map is called. const VkPhysicalDeviceProperties* devProps; vmaGetPhysicalDeviceProperties(fAllocator, &devProps); VkDeviceSize alignment = devProps->limits.nonCoherentAtomSize; alloc->fSize = (alloc->fSize + alignment - 1) & ~(alignment -1); } } void* GrVkAMDMemoryAllocator::mapMemory(const GrVkBackendMemory& memoryHandle) { const VmaAllocation allocation = (const VmaAllocation)memoryHandle; void* mapPtr; vmaMapMemory(fAllocator, allocation, &mapPtr); return mapPtr; } void GrVkAMDMemoryAllocator::unmapMemory(const GrVkBackendMemory& memoryHandle) { const VmaAllocation allocation = (const VmaAllocation)memoryHandle; vmaUnmapMemory(fAllocator, allocation); } void GrVkAMDMemoryAllocator::flushMappedMemory(const GrVkBackendMemory& memoryHandle, VkDeviceSize offset, VkDeviceSize size) { GrVkAlloc info; this->getAllocInfo(memoryHandle, &info); if (GrVkAlloc::kNoncoherent_Flag & info.fFlags) { // We need to store the nonCoherentAtomSize for non-coherent flush/invalidate alignment. const VkPhysicalDeviceProperties* physDevProps; vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps); VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize; VkMappedMemoryRange mappedMemoryRange; GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment, &mappedMemoryRange); GR_VK_CALL(fInterface, FlushMappedMemoryRanges(fDevice, 1, &mappedMemoryRange)); } } void GrVkAMDMemoryAllocator::invalidateMappedMemory(const GrVkBackendMemory& memoryHandle, VkDeviceSize offset, VkDeviceSize size) { GrVkAlloc info; this->getAllocInfo(memoryHandle, &info); if (GrVkAlloc::kNoncoherent_Flag & info.fFlags) { // We need to store the nonCoherentAtomSize for non-coherent flush/invalidate alignment. const VkPhysicalDeviceProperties* physDevProps; vmaGetPhysicalDeviceProperties(fAllocator, &physDevProps); VkDeviceSize alignment = physDevProps->limits.nonCoherentAtomSize; VkMappedMemoryRange mappedMemoryRange; GrVkMemory::GetNonCoherentMappedMemoryRange(info, offset, size, alignment, &mappedMemoryRange); GR_VK_CALL(fInterface, InvalidateMappedMemoryRanges(fDevice, 1, &mappedMemoryRange)); } } uint64_t GrVkAMDMemoryAllocator::totalUsedMemory() const { VmaStats stats; vmaCalculateStats(fAllocator, &stats); return stats.total.usedBytes; } uint64_t GrVkAMDMemoryAllocator::totalAllocatedMemory() const { VmaStats stats; vmaCalculateStats(fAllocator, &stats); return stats.total.usedBytes + stats.total.unusedBytes; }