1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "gpu_buffer_vk.h"
17
18 #include <cinttypes>
19 #include <cstdint>
20 #include <vulkan/vulkan_core.h>
21
22 #include <base/math/mathf.h>
23
24 #if (RENDER_PERF_ENABLED == 1)
25 #include <core/implementation_uids.h>
26 #include <core/perf/intf_performance_data_manager.h>
27 #endif
28
29 #include <render/namespace.h>
30
31 #include "device/device.h"
32 #include "device/gpu_resource_desc_flag_validation.h"
33 #include "util/log.h"
34 #include "vulkan/device_vk.h"
35 #include "vulkan/gpu_memory_allocator_vk.h"
36 #include "vulkan/validate_vk.h"
37 using namespace BASE_NS;
38
39 RENDER_BEGIN_NAMESPACE()
40 namespace {
GetAlignedByteSize(const uint32_t byteSize,const uint32_t alignment)41 constexpr uint32_t GetAlignedByteSize(const uint32_t byteSize, const uint32_t alignment)
42 {
43 return (byteSize + alignment - 1) & (~(alignment - 1));
44 }
45
GetMinBufferAlignment(const VkPhysicalDeviceLimits & limits)46 constexpr uint32_t GetMinBufferAlignment(const VkPhysicalDeviceLimits& limits)
47 {
48 return Math::max(static_cast<uint32_t>(limits.minStorageBufferOffsetAlignment),
49 static_cast<uint32_t>(limits.minUniformBufferOffsetAlignment));
50 }
51
GetMemoryMapAlignment(const VkPhysicalDeviceLimits & limits)52 constexpr uint32_t GetMemoryMapAlignment(const VkPhysicalDeviceLimits& limits)
53 {
54 return Math::max(
55 static_cast<uint32_t>(limits.minMemoryMapAlignment), static_cast<uint32_t>(limits.nonCoherentAtomSize));
56 }
57
GetPlatMemory(const VmaAllocationInfo & allocationInfo,const VkMemoryPropertyFlags flags)58 GpuResourceMemoryVk GetPlatMemory(const VmaAllocationInfo& allocationInfo, const VkMemoryPropertyFlags flags)
59 {
60 return GpuResourceMemoryVk {
61 allocationInfo.deviceMemory,
62 allocationInfo.offset,
63 allocationInfo.size,
64 allocationInfo.pMappedData,
65 allocationInfo.memoryType,
66 flags,
67 };
68 }
69
70 #if (RENDER_PERF_ENABLED == 1)
RecordAllocation(PlatformGpuMemoryAllocator & gpuMemAllocator,const GpuBufferDesc & desc,const int64_t alignedByteSize)71 void RecordAllocation(
72 PlatformGpuMemoryAllocator& gpuMemAllocator, const GpuBufferDesc& desc, const int64_t alignedByteSize)
73 {
74 if (auto* inst = CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
75 inst) {
76 CORE_NS::IPerformanceDataManager* pdm = inst->Get("Memory");
77
78 pdm->UpdateData("AllGpuBuffers", "GPU_BUFFER", alignedByteSize);
79 const string poolDebugName = gpuMemAllocator.GetBufferPoolDebugName(desc);
80 if (!poolDebugName.empty()) {
81 pdm->UpdateData(poolDebugName, "GPU_BUFFER", alignedByteSize);
82 }
83 }
84 }
85 #endif
86 } // namespace
87
GpuBufferVk(Device & device,const GpuBufferDesc & desc)88 GpuBufferVk::GpuBufferVk(Device& device, const GpuBufferDesc& desc)
89 : device_(device), desc_(desc),
90 isPersistantlyMapped_(
91 (desc.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
92 (desc.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT)),
93 isRingBuffer_(desc.engineCreationFlags & CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER),
94 bufferingCount_(isRingBuffer_ ? device_.GetCommandBufferingCount() : 1u)
95 {
96 CreateBufferImpl();
97 }
98
GpuBufferVk(Device & device,const GpuAccelerationStructureDesc & desc)99 GpuBufferVk::GpuBufferVk(Device& device, const GpuAccelerationStructureDesc& desc)
100 : device_(device), desc_(desc.bufferDesc), descAccel_(desc),
101 isPersistantlyMapped_(
102 (desc_.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
103 (desc_.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT)),
104 isRingBuffer_(desc_.engineCreationFlags & CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER),
105 isAccelerationStructure_(true), bufferingCount_(isRingBuffer_ ? device_.GetCommandBufferingCount() : 1u)
106 {
107 CreateBufferImpl();
108
109 #if (RENDER_VULKAN_RT_ENABLED == 1)
110 PLUGIN_ASSERT(desc.bufferDesc.usageFlags & CORE_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT);
111 platAccel_.buffer = plat_.buffer;
112 platAccel_.byteSize = plat_.fullByteSize;
113
114 constexpr VkFlags createFlags = 0;
115 const VkAccelerationStructureTypeKHR accelerationStructureType =
116 static_cast<VkAccelerationStructureTypeKHR>(descAccel_.accelerationStructureType);
117 VkAccelerationStructureCreateInfoKHR createInfo {
118 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, // sType
119 nullptr, // pNext
120 createFlags, // createFlags
121 plat_.buffer, // buffer
122 0, // offset
123 (VkDeviceSize)platAccel_.byteSize, // size
124 accelerationStructureType, // type
125 0, // deviceAddress
126 };
127
128 const DeviceVk& deviceVk = (const DeviceVk&)device_;
129 const DevicePlatformDataVk& devicePlat = (const DevicePlatformDataVk&)device_.GetPlatformData();
130 const VkDevice vkDevice = devicePlat.device;
131
132 const DeviceVk::ExtFunctions& extFunctions = deviceVk.GetExtFunctions();
133 if (extFunctions.vkCreateAccelerationStructureKHR && extFunctions.vkGetAccelerationStructureDeviceAddressKHR) {
134 VALIDATE_VK_RESULT(extFunctions.vkCreateAccelerationStructureKHR(vkDevice, // device
135 &createInfo, // pCreateInfo
136 nullptr, // pAllocator
137 &platAccel_.accelerationStructure)); // pAccelerationStructure
138
139 if (platAccel_.accelerationStructure) {
140 const VkAccelerationStructureDeviceAddressInfoKHR addressInfo {
141 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, // sType
142 nullptr, // pNext
143 platAccel_.accelerationStructure, // accelerationStructure
144 };
145 platAccel_.deviceAddress = extFunctions.vkGetAccelerationStructureDeviceAddressKHR(vkDevice, // device
146 &addressInfo); // pInfo
147 }
148 }
149 #endif
150 }
151
~GpuBufferVk()152 GpuBufferVk::~GpuBufferVk()
153 {
154 if (isMapped_) {
155 Unmap();
156 }
157
158 #if (RENDER_VULKAN_RT_ENABLED == 1)
159 if (isAccelerationStructure_ && platAccel_.accelerationStructure) {
160 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
161 const DeviceVk& deviceVk = (const DeviceVk&)device_;
162 const DeviceVk::ExtFunctions& extFunctions = deviceVk.GetExtFunctions();
163 if (extFunctions.vkDestroyAccelerationStructureKHR) {
164 extFunctions.vkDestroyAccelerationStructureKHR(device, // device
165 platAccel_.accelerationStructure, // accelerationStructure
166 nullptr); // pAllocator
167 }
168 }
169 #endif
170
171 if (PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator(); gpuMemAllocator) {
172 gpuMemAllocator->DestroyBuffer(plat_.buffer, mem_.allocation);
173 #if (RENDER_PERF_ENABLED == 1)
174 RecordAllocation(*gpuMemAllocator, desc_, -static_cast<int64_t>(plat_.fullByteSize));
175 #endif
176 }
177 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
178 PLUGIN_LOG_E("gpu buffer id <: 0x%" PRIxPTR, (uintptr_t)plat_.buffer);
179 #endif
180 }
181
CreateBufferImpl()182 void GpuBufferVk::CreateBufferImpl()
183 {
184 PLUGIN_ASSERT_MSG(
185 (isRingBuffer_ && isPersistantlyMapped_) || !isRingBuffer_, "dynamic ring buffer needs persistent mapping");
186
187 auto memoryPropertyFlags = static_cast<VkMemoryPropertyFlags>(desc_.memoryPropertyFlags);
188 const VkMemoryPropertyFlags requiredFlags =
189 (memoryPropertyFlags & (~(VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
190 CORE_MEMORY_PROPERTY_PROTECTED_BIT)));
191 const VkMemoryPropertyFlags preferredFlags = memoryPropertyFlags;
192
193 const auto& limits = static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
194 .physicalDeviceProperties.physicalDeviceProperties.limits;
195 // force min buffer alignment always
196 const uint32_t minBufferAlignment = GetMinBufferAlignment(limits);
197 const uint32_t minMapAlignment = (isRingBuffer_ || isPersistantlyMapped_) ? GetMemoryMapAlignment(limits) : 1u;
198 plat_.bindMemoryByteSize = GetAlignedByteSize(desc_.byteSize, Math::max(minBufferAlignment, minMapAlignment));
199 plat_.fullByteSize = plat_.bindMemoryByteSize * bufferingCount_;
200 plat_.currentByteOffset = 0;
201 plat_.usage = static_cast<VkBufferUsageFlags>(desc_.usageFlags);
202
203 AllocateMemory(requiredFlags, preferredFlags);
204
205 if (PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator(); gpuMemAllocator) {
206 const auto memFlags =
207 (VkMemoryPropertyFlags)gpuMemAllocator->GetMemoryTypeProperties(mem_.allocationInfo.memoryType);
208 isMappable_ = (memFlags & VkMemoryPropertyFlagBits::VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
209 #if (RENDER_PERF_ENABLED == 1)
210 RecordAllocation(*gpuMemAllocator, desc_, plat_.fullByteSize);
211 #endif
212 }
213
214 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
215 PLUGIN_LOG_E("gpu buffer id >: 0x%" PRIxPTR, (uintptr_t)plat_.buffer);
216 #endif
217 }
218
GetDesc() const219 const GpuBufferDesc& GpuBufferVk::GetDesc() const
220 {
221 return desc_;
222 }
223
GetPlatformData() const224 const GpuBufferPlatformDataVk& GpuBufferVk::GetPlatformData() const
225 {
226 return plat_;
227 }
228
GetDescAccelerationStructure() const229 const GpuAccelerationStructureDesc& GpuBufferVk::GetDescAccelerationStructure() const
230 {
231 return descAccel_;
232 }
233
GetPlatformDataAccelerationStructure() const234 const GpuAccelerationStructurePlatformDataVk& GpuBufferVk::GetPlatformDataAccelerationStructure() const
235 {
236 return platAccel_;
237 }
238
Map()239 void* GpuBufferVk::Map()
240 {
241 if (!isMappable_) {
242 PLUGIN_LOG_E("trying to map non-mappable gpu buffer");
243 return nullptr;
244 }
245 if (isMapped_) {
246 PLUGIN_LOG_E("gpu buffer already mapped");
247 Unmap();
248 }
249 isMapped_ = true;
250
251 if (isRingBuffer_) {
252 plat_.currentByteOffset = (plat_.currentByteOffset + plat_.bindMemoryByteSize) % plat_.fullByteSize;
253 }
254
255 void* data { nullptr };
256 if (isPersistantlyMapped_) {
257 if (mem_.allocationInfo.pMappedData) {
258 data = reinterpret_cast<uint8_t*>(mem_.allocationInfo.pMappedData) + plat_.currentByteOffset;
259 }
260 } else {
261 PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
262 if (gpuMemAllocator) {
263 data = gpuMemAllocator->MapMemory(mem_.allocation);
264 }
265 }
266 return data;
267 }
268
MapMemory()269 void* GpuBufferVk::MapMemory()
270 {
271 if (!isMappable_) {
272 PLUGIN_LOG_E("trying to map non-mappable gpu buffer");
273 return nullptr;
274 }
275 if (isMapped_) {
276 PLUGIN_LOG_E("gpu buffer already mapped");
277 Unmap();
278 }
279 isMapped_ = true;
280
281 void* data { nullptr };
282 if (isPersistantlyMapped_) {
283 data = mem_.allocationInfo.pMappedData;
284 } else {
285 PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
286 if (gpuMemAllocator) {
287 data = gpuMemAllocator->MapMemory(mem_.allocation);
288 }
289 }
290 return data;
291 }
292
Unmap() const293 void GpuBufferVk::Unmap() const
294 {
295 if (!isMappable_) {
296 PLUGIN_LOG_E("trying to unmap non-mappable gpu buffer");
297 }
298 if (!isMapped_) {
299 PLUGIN_LOG_E("gpu buffer not mapped");
300 }
301 isMapped_ = false;
302
303 if (!isPersistantlyMapped_) {
304 PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
305 if (gpuMemAllocator) {
306 gpuMemAllocator->FlushAllocation(mem_.allocation, 0, VK_WHOLE_SIZE);
307 gpuMemAllocator->UnmapMemory(mem_.allocation);
308 }
309 }
310 }
311
AllocateMemory(const VkMemoryPropertyFlags requiredFlags,const VkMemoryPropertyFlags preferredFlags)312 void GpuBufferVk::AllocateMemory(const VkMemoryPropertyFlags requiredFlags, const VkMemoryPropertyFlags preferredFlags)
313 {
314 constexpr VkBufferCreateFlags bufferCreateFlags { 0 };
315 const VkBufferCreateInfo bufferCreateInfo {
316 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // sType
317 nullptr, // pNext
318 bufferCreateFlags, // flags
319 (VkDeviceSize)plat_.fullByteSize, // size
320 plat_.usage, // usage
321 VkSharingMode::VK_SHARING_MODE_EXCLUSIVE, // sharingMode
322 0, // queueFamilyIndexCount
323 nullptr, // pQueueFamilyIndices
324 };
325
326 VmaAllocationCreateFlags allocationCreateFlags { 0 };
327 if (isPersistantlyMapped_) {
328 allocationCreateFlags |= static_cast<VmaAllocationCreateFlags>(
329 VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_MAPPED_BIT
330 #ifdef USE_NEW_VMA
331 | VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT
332 #endif
333 );
334 }
335 if (desc_.memoryPropertyFlags & CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
336 #ifdef USE_NEW_VMA
337 allocationCreateFlags |= VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
338 #endif
339 }
340
341 PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
342 PLUGIN_ASSERT(gpuMemAllocator);
343 if (gpuMemAllocator) {
344 // can be null handle -> default allocator
345 const VmaPool customPool = gpuMemAllocator->GetBufferPool(desc_);
346 const VmaAllocationCreateInfo allocationCreateInfo {
347 allocationCreateFlags, // flags
348 #ifdef USE_NEW_VMA
349 VmaMemoryUsage::VMA_MEMORY_USAGE_AUTO, // usage
350 #else
351 VmaMemoryUsage::VMA_MEMORY_USAGE_UNKNOWN, // usage
352 #endif
353 requiredFlags, // requiredFlags
354 preferredFlags, // preferredFlags
355 0, // memoryTypeBits
356 customPool, // pool
357 nullptr, // pUserData
358 #ifdef USE_NEW_VMA
359 0.f, // priority
360 #endif
361 };
362
363 gpuMemAllocator->CreateBuffer(
364 bufferCreateInfo, allocationCreateInfo, plat_.buffer, mem_.allocation, mem_.allocationInfo);
365 }
366
367 plat_.memory = GetPlatMemory(mem_.allocationInfo, preferredFlags);
368 }
369
370 RENDER_END_NAMESPACE()
371