• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "gpu_buffer_vk.h"
17 
18 #include <cinttypes>
19 #include <cstdint>
20 #include <vulkan/vulkan_core.h>
21 
22 #include <base/math/mathf.h>
23 
24 #if (RENDER_PERF_ENABLED == 1)
25 #include <core/implementation_uids.h>
26 #include <core/perf/intf_performance_data_manager.h>
27 #endif
28 
29 #include <render/namespace.h>
30 
31 #include "device/device.h"
32 #include "device/gpu_resource_desc_flag_validation.h"
33 #include "util/log.h"
34 #include "vulkan/device_vk.h"
35 #include "vulkan/gpu_memory_allocator_vk.h"
36 #include "vulkan/validate_vk.h"
37 using namespace BASE_NS;
38 
39 RENDER_BEGIN_NAMESPACE()
40 namespace {
GetAlignedByteSize(const uint32_t byteSize,const uint32_t alignment)41 constexpr uint32_t GetAlignedByteSize(const uint32_t byteSize, const uint32_t alignment)
42 {
43     return (byteSize + alignment - 1) & (~(alignment - 1));
44 }
45 
GetMinBufferAlignment(const VkPhysicalDeviceLimits & limits)46 constexpr uint32_t GetMinBufferAlignment(const VkPhysicalDeviceLimits& limits)
47 {
48     return Math::max(static_cast<uint32_t>(limits.minStorageBufferOffsetAlignment),
49         static_cast<uint32_t>(limits.minUniformBufferOffsetAlignment));
50 }
51 
GetMemoryMapAlignment(const VkPhysicalDeviceLimits & limits)52 constexpr uint32_t GetMemoryMapAlignment(const VkPhysicalDeviceLimits& limits)
53 {
54     return Math::max(
55         static_cast<uint32_t>(limits.minMemoryMapAlignment), static_cast<uint32_t>(limits.nonCoherentAtomSize));
56 }
57 
GetPlatMemory(const VmaAllocationInfo & allocationInfo,const VkMemoryPropertyFlags flags)58 GpuResourceMemoryVk GetPlatMemory(const VmaAllocationInfo& allocationInfo, const VkMemoryPropertyFlags flags)
59 {
60     return GpuResourceMemoryVk {
61         allocationInfo.deviceMemory,
62         allocationInfo.offset,
63         allocationInfo.size,
64         allocationInfo.pMappedData,
65         allocationInfo.memoryType,
66         flags,
67     };
68 }
69 
70 #if (RENDER_PERF_ENABLED == 1)
RecordAllocation(PlatformGpuMemoryAllocator & gpuMemAllocator,const GpuBufferDesc & desc,const int64_t alignedByteSize)71 void RecordAllocation(
72     PlatformGpuMemoryAllocator& gpuMemAllocator, const GpuBufferDesc& desc, const int64_t alignedByteSize)
73 {
74     if (auto* inst = CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
75         inst) {
76         CORE_NS::IPerformanceDataManager* pdm = inst->Get("Memory");
77 
78         pdm->UpdateData("AllGpuBuffers", "GPU_BUFFER", alignedByteSize);
79         const string poolDebugName = gpuMemAllocator.GetBufferPoolDebugName(desc);
80         if (!poolDebugName.empty()) {
81             pdm->UpdateData(poolDebugName, "GPU_BUFFER", alignedByteSize);
82         }
83     }
84 }
85 #endif
86 } // namespace
87 
GpuBufferVk(Device & device,const GpuBufferDesc & desc)88 GpuBufferVk::GpuBufferVk(Device& device, const GpuBufferDesc& desc)
89     : device_(device), desc_(desc),
90       isPersistantlyMapped_(
91           (desc.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
92           (desc.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT)),
93       isRingBuffer_(desc.engineCreationFlags & CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER),
94       bufferingCount_(isRingBuffer_ ? device_.GetCommandBufferingCount() : 1u)
95 {
96     CreateBufferImpl();
97 }
98 
GpuBufferVk(Device & device,const GpuAccelerationStructureDesc & desc)99 GpuBufferVk::GpuBufferVk(Device& device, const GpuAccelerationStructureDesc& desc)
100     : device_(device), desc_(desc.bufferDesc), descAccel_(desc),
101       isPersistantlyMapped_(
102           (desc_.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
103           (desc_.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT)),
104       isRingBuffer_(desc_.engineCreationFlags & CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER),
105       isAccelerationStructure_(true), bufferingCount_(isRingBuffer_ ? device_.GetCommandBufferingCount() : 1u)
106 {
107     CreateBufferImpl();
108 
109 #if (RENDER_VULKAN_RT_ENABLED == 1)
110     PLUGIN_ASSERT(desc.bufferDesc.usageFlags & CORE_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT);
111     platAccel_.buffer = plat_.buffer;
112     platAccel_.byteSize = plat_.fullByteSize;
113 
114     constexpr VkFlags createFlags = 0;
115     const VkAccelerationStructureTypeKHR accelerationStructureType =
116         static_cast<VkAccelerationStructureTypeKHR>(descAccel_.accelerationStructureType);
117     VkAccelerationStructureCreateInfoKHR createInfo {
118         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, // sType
119         nullptr,                                                  // pNext
120         createFlags,                                              // createFlags
121         plat_.buffer,                                             // buffer
122         0,                                                        // offset
123         (VkDeviceSize)platAccel_.byteSize,                        // size
124         accelerationStructureType,                                // type
125         0,                                                        // deviceAddress
126     };
127 
128     const DeviceVk& deviceVk = (const DeviceVk&)device_;
129     const DevicePlatformDataVk& devicePlat = (const DevicePlatformDataVk&)device_.GetPlatformData();
130     const VkDevice vkDevice = devicePlat.device;
131 
132     const DeviceVk::ExtFunctions& extFunctions = deviceVk.GetExtFunctions();
133     if (extFunctions.vkCreateAccelerationStructureKHR && extFunctions.vkGetAccelerationStructureDeviceAddressKHR) {
134         VALIDATE_VK_RESULT(extFunctions.vkCreateAccelerationStructureKHR(vkDevice, // device
135             &createInfo,                                                           // pCreateInfo
136             nullptr,                                                               // pAllocator
137             &platAccel_.accelerationStructure));                                   // pAccelerationStructure
138 
139         if (platAccel_.accelerationStructure) {
140             const VkAccelerationStructureDeviceAddressInfoKHR addressInfo {
141                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, // sType
142                 nullptr,                                                          // pNext
143                 platAccel_.accelerationStructure,                                 // accelerationStructure
144             };
145             platAccel_.deviceAddress = extFunctions.vkGetAccelerationStructureDeviceAddressKHR(vkDevice, // device
146                 &addressInfo);                                                                           // pInfo
147         }
148     }
149 #endif
150 }
151 
~GpuBufferVk()152 GpuBufferVk::~GpuBufferVk()
153 {
154     if (isMapped_) {
155         Unmap();
156     }
157 
158 #if (RENDER_VULKAN_RT_ENABLED == 1)
159     if (isAccelerationStructure_ && platAccel_.accelerationStructure) {
160         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
161         const DeviceVk& deviceVk = (const DeviceVk&)device_;
162         const DeviceVk::ExtFunctions& extFunctions = deviceVk.GetExtFunctions();
163         if (extFunctions.vkDestroyAccelerationStructureKHR) {
164             extFunctions.vkDestroyAccelerationStructureKHR(device, // device
165                 platAccel_.accelerationStructure,                  // accelerationStructure
166                 nullptr);                                          // pAllocator
167         }
168     }
169 #endif
170 
171     if (PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator(); gpuMemAllocator) {
172         gpuMemAllocator->DestroyBuffer(plat_.buffer, mem_.allocation);
173 #if (RENDER_PERF_ENABLED == 1)
174         RecordAllocation(*gpuMemAllocator, desc_, -static_cast<int64_t>(plat_.fullByteSize));
175 #endif
176     }
177 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
178     PLUGIN_LOG_E("gpu buffer id <: 0x%" PRIxPTR, (uintptr_t)plat_.buffer);
179 #endif
180 }
181 
CreateBufferImpl()182 void GpuBufferVk::CreateBufferImpl()
183 {
184     PLUGIN_ASSERT_MSG(
185         (isRingBuffer_ && isPersistantlyMapped_) || !isRingBuffer_, "dynamic ring buffer needs persistent mapping");
186 
187     auto memoryPropertyFlags = static_cast<VkMemoryPropertyFlags>(desc_.memoryPropertyFlags);
188     const VkMemoryPropertyFlags requiredFlags =
189         (memoryPropertyFlags & (~(VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
190                                    CORE_MEMORY_PROPERTY_PROTECTED_BIT)));
191     const VkMemoryPropertyFlags preferredFlags = memoryPropertyFlags;
192 
193     const auto& limits = static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
194                              .physicalDeviceProperties.physicalDeviceProperties.limits;
195     // force min buffer alignment always
196     const uint32_t minBufferAlignment = GetMinBufferAlignment(limits);
197     const uint32_t minMapAlignment = (isRingBuffer_ || isPersistantlyMapped_) ? GetMemoryMapAlignment(limits) : 1u;
198     plat_.bindMemoryByteSize = GetAlignedByteSize(desc_.byteSize, Math::max(minBufferAlignment, minMapAlignment));
199     plat_.fullByteSize = plat_.bindMemoryByteSize * bufferingCount_;
200     plat_.currentByteOffset = 0;
201     plat_.usage = static_cast<VkBufferUsageFlags>(desc_.usageFlags);
202 
203     AllocateMemory(requiredFlags, preferredFlags);
204 
205     if (PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator(); gpuMemAllocator) {
206         const auto memFlags =
207             (VkMemoryPropertyFlags)gpuMemAllocator->GetMemoryTypeProperties(mem_.allocationInfo.memoryType);
208         isMappable_ = (memFlags & VkMemoryPropertyFlagBits::VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
209 #if (RENDER_PERF_ENABLED == 1)
210         RecordAllocation(*gpuMemAllocator, desc_, plat_.fullByteSize);
211 #endif
212     }
213 
214 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
215     PLUGIN_LOG_E("gpu buffer id >: 0x%" PRIxPTR, (uintptr_t)plat_.buffer);
216 #endif
217 }
218 
GetDesc() const219 const GpuBufferDesc& GpuBufferVk::GetDesc() const
220 {
221     return desc_;
222 }
223 
GetPlatformData() const224 const GpuBufferPlatformDataVk& GpuBufferVk::GetPlatformData() const
225 {
226     return plat_;
227 }
228 
GetDescAccelerationStructure() const229 const GpuAccelerationStructureDesc& GpuBufferVk::GetDescAccelerationStructure() const
230 {
231     return descAccel_;
232 }
233 
GetPlatformDataAccelerationStructure() const234 const GpuAccelerationStructurePlatformDataVk& GpuBufferVk::GetPlatformDataAccelerationStructure() const
235 {
236     return platAccel_;
237 }
238 
Map()239 void* GpuBufferVk::Map()
240 {
241     if (!isMappable_) {
242         PLUGIN_LOG_E("trying to map non-mappable gpu buffer");
243         return nullptr;
244     }
245     if (isMapped_) {
246         PLUGIN_LOG_E("gpu buffer already mapped");
247         Unmap();
248     }
249     isMapped_ = true;
250 
251     if (isRingBuffer_) {
252         plat_.currentByteOffset = (plat_.currentByteOffset + plat_.bindMemoryByteSize) % plat_.fullByteSize;
253     }
254 
255     void* data { nullptr };
256     if (isPersistantlyMapped_) {
257         if (mem_.allocationInfo.pMappedData) {
258             data = reinterpret_cast<uint8_t*>(mem_.allocationInfo.pMappedData) + plat_.currentByteOffset;
259         }
260     } else {
261         PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
262         if (gpuMemAllocator) {
263             data = gpuMemAllocator->MapMemory(mem_.allocation);
264         }
265     }
266     return data;
267 }
268 
MapMemory()269 void* GpuBufferVk::MapMemory()
270 {
271     if (!isMappable_) {
272         PLUGIN_LOG_E("trying to map non-mappable gpu buffer");
273         return nullptr;
274     }
275     if (isMapped_) {
276         PLUGIN_LOG_E("gpu buffer already mapped");
277         Unmap();
278     }
279     isMapped_ = true;
280 
281     void* data { nullptr };
282     if (isPersistantlyMapped_) {
283         data = mem_.allocationInfo.pMappedData;
284     } else {
285         PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
286         if (gpuMemAllocator) {
287             data = gpuMemAllocator->MapMemory(mem_.allocation);
288         }
289     }
290     return data;
291 }
292 
Unmap() const293 void GpuBufferVk::Unmap() const
294 {
295     if (!isMappable_) {
296         PLUGIN_LOG_E("trying to unmap non-mappable gpu buffer");
297     }
298     if (!isMapped_) {
299         PLUGIN_LOG_E("gpu buffer not mapped");
300     }
301     isMapped_ = false;
302 
303     if (!isPersistantlyMapped_) {
304         PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
305         if (gpuMemAllocator) {
306             gpuMemAllocator->FlushAllocation(mem_.allocation, 0, VK_WHOLE_SIZE);
307             gpuMemAllocator->UnmapMemory(mem_.allocation);
308         }
309     }
310 }
311 
AllocateMemory(const VkMemoryPropertyFlags requiredFlags,const VkMemoryPropertyFlags preferredFlags)312 void GpuBufferVk::AllocateMemory(const VkMemoryPropertyFlags requiredFlags, const VkMemoryPropertyFlags preferredFlags)
313 {
314     constexpr VkBufferCreateFlags bufferCreateFlags { 0 };
315     const VkBufferCreateInfo bufferCreateInfo {
316         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,     // sType
317         nullptr,                                  // pNext
318         bufferCreateFlags,                        // flags
319         (VkDeviceSize)plat_.fullByteSize,         // size
320         plat_.usage,                              // usage
321         VkSharingMode::VK_SHARING_MODE_EXCLUSIVE, // sharingMode
322         0,                                        // queueFamilyIndexCount
323         nullptr,                                  // pQueueFamilyIndices
324     };
325 
326     VmaAllocationCreateFlags allocationCreateFlags { 0 };
327     if (isPersistantlyMapped_) {
328         allocationCreateFlags |= static_cast<VmaAllocationCreateFlags>(
329             VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_MAPPED_BIT
330 #ifdef USE_NEW_VMA
331             | VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT
332 #endif
333         );
334     }
335     if (desc_.memoryPropertyFlags & CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
336 #ifdef USE_NEW_VMA
337         allocationCreateFlags |= VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
338 #endif
339     }
340 
341     PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
342     PLUGIN_ASSERT(gpuMemAllocator);
343     if (gpuMemAllocator) {
344         // can be null handle -> default allocator
345         const VmaPool customPool = gpuMemAllocator->GetBufferPool(desc_);
346         const VmaAllocationCreateInfo allocationCreateInfo {
347             allocationCreateFlags, // flags
348 #ifdef USE_NEW_VMA
349             VmaMemoryUsage::VMA_MEMORY_USAGE_AUTO, // usage
350 #else
351             VmaMemoryUsage::VMA_MEMORY_USAGE_UNKNOWN, // usage
352 #endif
353             requiredFlags,  // requiredFlags
354             preferredFlags, // preferredFlags
355             0,              // memoryTypeBits
356             customPool,     // pool
357             nullptr,        // pUserData
358 #ifdef USE_NEW_VMA
359             0.f, // priority
360 #endif
361         };
362 
363         gpuMemAllocator->CreateBuffer(
364             bufferCreateInfo, allocationCreateInfo, plat_.buffer, mem_.allocation, mem_.allocationInfo);
365     }
366 
367     plat_.memory = GetPlatMemory(mem_.allocationInfo, preferredFlags);
368 }
369 
370 RENDER_END_NAMESPACE()
371