1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "gpu_buffer_vk.h"
17
18 #include <cinttypes>
19 #include <cstdint>
20 #include <vulkan/vulkan_core.h>
21
22 #include <base/math/mathf.h>
23
24 #if (RENDER_PERF_ENABLED == 1)
25 #include <core/implementation_uids.h>
26 #include <core/perf/intf_performance_data_manager.h>
27 #endif
28
29 #include <render/namespace.h>
30
31 #include "device/device.h"
32 #include "device/gpu_resource_desc_flag_validation.h"
33 #include "util/log.h"
34 #include "vulkan/device_vk.h"
35 #include "vulkan/gpu_memory_allocator_vk.h"
36 #include "vulkan/validate_vk.h"
37 using namespace BASE_NS;
38
39 RENDER_BEGIN_NAMESPACE()
40 namespace {
GetAlignedByteSize(const uint32_t byteSize,const uint32_t alignment)41 constexpr uint32_t GetAlignedByteSize(const uint32_t byteSize, const uint32_t alignment)
42 {
43 return (byteSize + alignment - 1) & (~(alignment - 1));
44 }
45
GetMinBufferAlignment(const VkPhysicalDeviceLimits & limits)46 constexpr uint32_t GetMinBufferAlignment(const VkPhysicalDeviceLimits& limits)
47 {
48 return Math::max(static_cast<uint32_t>(limits.minStorageBufferOffsetAlignment),
49 static_cast<uint32_t>(limits.minUniformBufferOffsetAlignment));
50 }
51
GetMemoryMapAlignment(const VkPhysicalDeviceLimits & limits)52 constexpr uint32_t GetMemoryMapAlignment(const VkPhysicalDeviceLimits& limits)
53 {
54 return Math::max(
55 static_cast<uint32_t>(limits.minMemoryMapAlignment), static_cast<uint32_t>(limits.nonCoherentAtomSize));
56 }
57
GetPlatMemory(const VmaAllocationInfo & allocationInfo,const VkMemoryPropertyFlags flags)58 GpuResourceMemoryVk GetPlatMemory(const VmaAllocationInfo& allocationInfo, const VkMemoryPropertyFlags flags)
59 {
60 return GpuResourceMemoryVk {
61 allocationInfo.deviceMemory,
62 allocationInfo.offset,
63 allocationInfo.size,
64 allocationInfo.pMappedData,
65 allocationInfo.memoryType,
66 flags,
67 };
68 }
69
70 #if (RENDER_VULKAN_RT_ENABLED == 1)
GetBufferDeviceAddress(const VkDevice device,const VkBuffer buffer)71 inline uint64_t GetBufferDeviceAddress(const VkDevice device, const VkBuffer buffer)
72 {
73 const VkBufferDeviceAddressInfo addressInfo {
74 VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // sType
75 nullptr, // pNext
76 buffer, // buffer
77 };
78 return vkGetBufferDeviceAddress(device, &addressInfo);
79 }
80 #endif
81
82 #if (RENDER_PERF_ENABLED == 1)
RecordAllocation(PlatformGpuMemoryAllocator & gpuMemAllocator,const GpuBufferDesc & desc,const int64_t alignedByteSize)83 void RecordAllocation(
84 PlatformGpuMemoryAllocator& gpuMemAllocator, const GpuBufferDesc& desc, const int64_t alignedByteSize)
85 {
86 if (auto* inst = CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
87 inst) {
88 CORE_NS::IPerformanceDataManager* pdm = inst->Get("Memory");
89
90 pdm->UpdateData("AllGpuBuffers", "GPU_BUFFER", alignedByteSize,
91 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::BYTES);
92 const string poolDebugName = gpuMemAllocator.GetBufferPoolDebugName(desc);
93 if (!poolDebugName.empty()) {
94 pdm->UpdateData(poolDebugName, "GPU_BUFFER", alignedByteSize,
95 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::BYTES);
96 }
97 }
98 }
99 #endif
100 } // namespace
101
GpuBufferVk(Device & device,const GpuBufferDesc & desc)102 GpuBufferVk::GpuBufferVk(Device& device, const GpuBufferDesc& desc)
103 : device_(device), desc_(desc),
104 isPersistantlyMapped_(
105 (desc.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
106 (desc.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT)),
107 isRingBuffer_(desc.engineCreationFlags & CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER),
108 bufferingCount_(isRingBuffer_ ? device_.GetCommandBufferingCount() : 1u)
109 {
110 CreateBufferImpl();
111 }
112
GpuBufferVk(Device & device,const GpuAccelerationStructureDesc & desc)113 GpuBufferVk::GpuBufferVk(Device& device, const GpuAccelerationStructureDesc& desc)
114 : device_(device), desc_(desc.bufferDesc), descAccel_(desc),
115 isPersistantlyMapped_(
116 (desc_.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
117 (desc_.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT)),
118 isRingBuffer_(desc_.engineCreationFlags & CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER),
119 isAccelerationStructure_(true), bufferingCount_(isRingBuffer_ ? device_.GetCommandBufferingCount() : 1u)
120 {
121 CreateBufferImpl();
122
123 #if (RENDER_VULKAN_RT_ENABLED == 1)
124 PLUGIN_ASSERT(desc.bufferDesc.usageFlags & CORE_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT);
125 platAccel_.buffer = plat_.buffer;
126 platAccel_.byteSize = plat_.fullByteSize;
127
128 constexpr VkFlags createFlags = 0;
129 const VkAccelerationStructureTypeKHR accelerationStructureType =
130 static_cast<VkAccelerationStructureTypeKHR>(descAccel_.accelerationStructureType);
131 VkAccelerationStructureCreateInfoKHR createInfo {
132 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, // sType
133 nullptr, // pNext
134 createFlags, // createFlags
135 plat_.buffer, // buffer
136 0, // offset
137 (VkDeviceSize)platAccel_.byteSize, // size
138 accelerationStructureType, // type
139 0, // deviceAddress
140 };
141
142 const DeviceVk& deviceVk = (const DeviceVk&)device_;
143 const DevicePlatformDataVk& devicePlat = (const DevicePlatformDataVk&)device_.GetPlatformData();
144 const VkDevice vkDevice = devicePlat.device;
145
146 const DeviceVk::ExtFunctions& extFunctions = deviceVk.GetExtFunctions();
147 if (extFunctions.vkCreateAccelerationStructureKHR && extFunctions.vkGetAccelerationStructureDeviceAddressKHR) {
148 VALIDATE_VK_RESULT(extFunctions.vkCreateAccelerationStructureKHR(vkDevice, // device
149 &createInfo, // pCreateInfo
150 nullptr, // pAllocator
151 &platAccel_.accelerationStructure)); // pAccelerationStructure
152
153 if (platAccel_.accelerationStructure) {
154 const VkAccelerationStructureDeviceAddressInfoKHR addressInfo {
155 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, // sType
156 nullptr, // pNext
157 platAccel_.accelerationStructure, // accelerationStructure
158 };
159 platAccel_.deviceAddress = extFunctions.vkGetAccelerationStructureDeviceAddressKHR(vkDevice, // device
160 &addressInfo); // pInfo
161 }
162 }
163 #endif
164 }
165
GpuBufferVk(Device & device,const BackendSpecificBufferDesc & desc)166 GpuBufferVk::GpuBufferVk(Device& device, const BackendSpecificBufferDesc& desc)
167 : device_(device), desc_(GetBufferDescFromHwBufferDesc(static_cast<const BufferDescVk&>(desc).platformHwBuffer))
168 {
169 plat_.platformHwBuffer = static_cast<const BufferDescVk&>(desc).platformHwBuffer;
170 if (plat_.platformHwBuffer) {
171 CreatePlatformHwBuffer();
172 }
173 }
174
~GpuBufferVk()175 GpuBufferVk::~GpuBufferVk()
176 {
177 if (isMapped_) {
178 Unmap();
179 }
180
181 #if (RENDER_VULKAN_RT_ENABLED == 1)
182 if (isAccelerationStructure_ && platAccel_.accelerationStructure) {
183 const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
184 const DeviceVk& deviceVk = (const DeviceVk&)device_;
185 const DeviceVk::ExtFunctions& extFunctions = deviceVk.GetExtFunctions();
186 if (extFunctions.vkDestroyAccelerationStructureKHR) {
187 extFunctions.vkDestroyAccelerationStructureKHR(device, // device
188 platAccel_.accelerationStructure, // accelerationStructure
189 nullptr); // pAllocator
190 }
191 }
192 #endif
193 if (ownsResources_) {
194 if (PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator(); gpuMemAllocator) {
195 gpuMemAllocator->DestroyBuffer(plat_.buffer, mem_.allocation);
196 #if (RENDER_PERF_ENABLED == 1)
197 RecordAllocation(*gpuMemAllocator, desc_, -static_cast<int64_t>(plat_.fullByteSize));
198 #endif
199 }
200 }
201 if (plat_.platformHwBuffer) {
202 DestroyPlatformHwBuffer();
203 }
204
205 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
206 PLUGIN_LOG_E("gpu buffer id <: 0x%" PRIxPTR, (uintptr_t)plat_.buffer);
207 #endif
208 }
209
CreateBufferImpl()210 void GpuBufferVk::CreateBufferImpl()
211 {
212 PLUGIN_ASSERT_MSG(
213 (isRingBuffer_ && isPersistantlyMapped_) || !isRingBuffer_, "dynamic ring buffer needs persistent mapping");
214
215 auto memoryPropertyFlags = static_cast<VkMemoryPropertyFlags>(desc_.memoryPropertyFlags);
216 const VkMemoryPropertyFlags requiredFlags =
217 (memoryPropertyFlags & (~(VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
218 CORE_MEMORY_PROPERTY_PROTECTED_BIT)));
219 const VkMemoryPropertyFlags preferredFlags = memoryPropertyFlags;
220
221 const auto& limits = static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
222 .physicalDeviceProperties.physicalDeviceProperties.limits;
223 // force min buffer alignment always
224 const uint32_t minBufferAlignment = GetMinBufferAlignment(limits);
225 const uint32_t minMapAlignment = (isRingBuffer_ || isPersistantlyMapped_) ? GetMemoryMapAlignment(limits) : 1u;
226 plat_.bindMemoryByteSize = GetAlignedByteSize(desc_.byteSize, Math::max(minBufferAlignment, minMapAlignment));
227 plat_.fullByteSize = plat_.bindMemoryByteSize * bufferingCount_;
228 plat_.currentByteOffset = 0;
229 plat_.usage = static_cast<VkBufferUsageFlags>(desc_.usageFlags);
230
231 AllocateMemory(requiredFlags, preferredFlags);
232
233 #if (RENDER_VULKAN_RT_ENABLED == 1)
234 if (plat_.usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
235 const DevicePlatformDataVk& devicePlat = (const DevicePlatformDataVk&)device_.GetPlatformData();
236 plat_.deviceAddress = GetBufferDeviceAddress(devicePlat.device, plat_.buffer);
237 }
238 #endif
239
240 if (PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator(); gpuMemAllocator) {
241 const auto memFlags =
242 (VkMemoryPropertyFlags)gpuMemAllocator->GetMemoryTypeProperties(mem_.allocationInfo.memoryType);
243 isMappable_ = (memFlags & VkMemoryPropertyFlagBits::VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
244 #if (RENDER_PERF_ENABLED == 1)
245 RecordAllocation(*gpuMemAllocator, desc_, plat_.fullByteSize);
246 #endif
247 }
248
249 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
250 PLUGIN_LOG_E("gpu buffer id >: 0x%" PRIxPTR, (uintptr_t)plat_.buffer);
251 #endif
252 }
253
GetDesc() const254 const GpuBufferDesc& GpuBufferVk::GetDesc() const
255 {
256 return desc_;
257 }
258
GetPlatformData() const259 const GpuBufferPlatformDataVk& GpuBufferVk::GetPlatformData() const
260 {
261 return plat_;
262 }
263
GetDescAccelerationStructure() const264 const GpuAccelerationStructureDesc& GpuBufferVk::GetDescAccelerationStructure() const
265 {
266 return descAccel_;
267 }
268
GetPlatformDataAccelerationStructure() const269 const GpuAccelerationStructurePlatformDataVk& GpuBufferVk::GetPlatformDataAccelerationStructure() const
270 {
271 return platAccel_;
272 }
273
Map()274 void* GpuBufferVk::Map()
275 {
276 if (!isMappable_) {
277 PLUGIN_LOG_E("trying to map non-mappable gpu buffer");
278 return nullptr;
279 }
280 if (isMapped_) {
281 PLUGIN_LOG_E("gpu buffer already mapped");
282 Unmap();
283 }
284 isMapped_ = true;
285
286 if (isRingBuffer_) {
287 plat_.currentByteOffset = (plat_.currentByteOffset + plat_.bindMemoryByteSize) % plat_.fullByteSize;
288 }
289
290 void* data { nullptr };
291 if (isPersistantlyMapped_) {
292 if (mem_.allocationInfo.pMappedData) {
293 data = reinterpret_cast<uint8_t*>(mem_.allocationInfo.pMappedData) + plat_.currentByteOffset;
294 }
295 } else {
296 PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
297 if (gpuMemAllocator) {
298 data = gpuMemAllocator->MapMemory(mem_.allocation);
299 }
300 }
301 return data;
302 }
303
MapMemory()304 void* GpuBufferVk::MapMemory()
305 {
306 if (!isMappable_) {
307 PLUGIN_LOG_E("trying to map non-mappable gpu buffer");
308 return nullptr;
309 }
310 if (isMapped_) {
311 PLUGIN_LOG_E("gpu buffer already mapped");
312 Unmap();
313 }
314 isMapped_ = true;
315
316 void* data { nullptr };
317 if (isPersistantlyMapped_) {
318 data = mem_.allocationInfo.pMappedData;
319 } else {
320 PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
321 if (gpuMemAllocator) {
322 data = gpuMemAllocator->MapMemory(mem_.allocation);
323 }
324 }
325 return data;
326 }
327
Unmap() const328 void GpuBufferVk::Unmap() const
329 {
330 if (!isMappable_) {
331 PLUGIN_LOG_E("trying to unmap non-mappable gpu buffer");
332 }
333 if (!isMapped_) {
334 PLUGIN_LOG_E("gpu buffer not mapped");
335 }
336 isMapped_ = false;
337
338 if (!isPersistantlyMapped_) {
339 PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
340 if (gpuMemAllocator) {
341 gpuMemAllocator->FlushAllocation(mem_.allocation, 0, VK_WHOLE_SIZE);
342 gpuMemAllocator->UnmapMemory(mem_.allocation);
343 }
344 }
345 }
346
AllocateMemory(const VkMemoryPropertyFlags requiredFlags,const VkMemoryPropertyFlags preferredFlags)347 void GpuBufferVk::AllocateMemory(const VkMemoryPropertyFlags requiredFlags, const VkMemoryPropertyFlags preferredFlags)
348 {
349 constexpr VkBufferCreateFlags bufferCreateFlags { 0 };
350 const VkBufferCreateInfo bufferCreateInfo {
351 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // sType
352 nullptr, // pNext
353 bufferCreateFlags, // flags
354 (VkDeviceSize)plat_.fullByteSize, // size
355 plat_.usage, // usage
356 VkSharingMode::VK_SHARING_MODE_EXCLUSIVE, // sharingMode
357 0, // queueFamilyIndexCount
358 nullptr, // pQueueFamilyIndices
359 };
360
361 VmaAllocationCreateFlags allocationCreateFlags { 0 };
362 if (isPersistantlyMapped_) {
363 allocationCreateFlags |= static_cast<VmaAllocationCreateFlags>(
364 VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_MAPPED_BIT
365 #ifdef USE_NEW_VMA
366 | VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT
367 #endif
368 );
369 }
370 if (desc_.memoryPropertyFlags & CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
371 #ifdef USE_NEW_VMA
372 allocationCreateFlags |= VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
373 #endif
374 }
375
376 PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
377 PLUGIN_ASSERT(gpuMemAllocator);
378 if (gpuMemAllocator) {
379 // can be null handle -> default allocator
380 const VmaPool customPool = gpuMemAllocator->GetBufferPool(desc_);
381 const VmaAllocationCreateInfo allocationCreateInfo {
382 allocationCreateFlags, // flags
383 #ifdef USE_NEW_VMA
384 VmaMemoryUsage::VMA_MEMORY_USAGE_AUTO, // usage
385 #else
386 VmaMemoryUsage::VMA_MEMORY_USAGE_UNKNOWN, // usage
387 #endif
388 requiredFlags, // requiredFlags
389 preferredFlags, // preferredFlags
390 0, // memoryTypeBits
391 customPool, // pool
392 nullptr, // pUserData
393 #ifdef USE_NEW_VMA
394 0.f, // priority
395 #endif
396 };
397
398 gpuMemAllocator->CreateBuffer(
399 bufferCreateInfo, allocationCreateInfo, plat_.buffer, mem_.allocation, mem_.allocationInfo);
400 }
401
402 plat_.memory = GetPlatMemory(mem_.allocationInfo, preferredFlags);
403 }
404
405 RENDER_END_NAMESPACE()
406