• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "gpu_buffer_vk.h"
17 
18 #include <cinttypes>
19 #include <cstdint>
20 #include <vulkan/vulkan_core.h>
21 
22 #include <base/math/mathf.h>
23 
24 #if (RENDER_PERF_ENABLED == 1)
25 #include <core/implementation_uids.h>
26 #include <core/perf/intf_performance_data_manager.h>
27 #endif
28 
29 #include <render/namespace.h>
30 
31 #include "device/device.h"
32 #include "device/gpu_resource_desc_flag_validation.h"
33 #include "util/log.h"
34 #include "vulkan/device_vk.h"
35 #include "vulkan/gpu_memory_allocator_vk.h"
36 #include "vulkan/validate_vk.h"
37 using namespace BASE_NS;
38 
39 RENDER_BEGIN_NAMESPACE()
40 namespace {
GetAlignedByteSize(const uint32_t byteSize,const uint32_t alignment)41 constexpr uint32_t GetAlignedByteSize(const uint32_t byteSize, const uint32_t alignment)
42 {
43     return (byteSize + alignment - 1) & (~(alignment - 1));
44 }
45 
GetMinBufferAlignment(const VkPhysicalDeviceLimits & limits)46 constexpr uint32_t GetMinBufferAlignment(const VkPhysicalDeviceLimits& limits)
47 {
48     return Math::max(static_cast<uint32_t>(limits.minStorageBufferOffsetAlignment),
49         static_cast<uint32_t>(limits.minUniformBufferOffsetAlignment));
50 }
51 
GetMemoryMapAlignment(const VkPhysicalDeviceLimits & limits)52 constexpr uint32_t GetMemoryMapAlignment(const VkPhysicalDeviceLimits& limits)
53 {
54     return Math::max(
55         static_cast<uint32_t>(limits.minMemoryMapAlignment), static_cast<uint32_t>(limits.nonCoherentAtomSize));
56 }
57 
GetPlatMemory(const VmaAllocationInfo & allocationInfo,const VkMemoryPropertyFlags flags)58 GpuResourceMemoryVk GetPlatMemory(const VmaAllocationInfo& allocationInfo, const VkMemoryPropertyFlags flags)
59 {
60     return GpuResourceMemoryVk {
61         allocationInfo.deviceMemory,
62         allocationInfo.offset,
63         allocationInfo.size,
64         allocationInfo.pMappedData,
65         allocationInfo.memoryType,
66         flags,
67     };
68 }
69 
70 #if (RENDER_VULKAN_RT_ENABLED == 1)
GetBufferDeviceAddress(const VkDevice device,const VkBuffer buffer)71 inline uint64_t GetBufferDeviceAddress(const VkDevice device, const VkBuffer buffer)
72 {
73     const VkBufferDeviceAddressInfo addressInfo {
74         VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, // sType
75         nullptr,                                      // pNext
76         buffer,                                       // buffer
77     };
78     return vkGetBufferDeviceAddress(device, &addressInfo);
79 }
80 #endif
81 
82 #if (RENDER_PERF_ENABLED == 1)
RecordAllocation(PlatformGpuMemoryAllocator & gpuMemAllocator,const GpuBufferDesc & desc,const int64_t alignedByteSize)83 void RecordAllocation(
84     PlatformGpuMemoryAllocator& gpuMemAllocator, const GpuBufferDesc& desc, const int64_t alignedByteSize)
85 {
86     if (auto* inst = CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
87         inst) {
88         CORE_NS::IPerformanceDataManager* pdm = inst->Get("Memory");
89 
90         pdm->UpdateData("AllGpuBuffers", "GPU_BUFFER", alignedByteSize,
91             CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::BYTES);
92         const string poolDebugName = gpuMemAllocator.GetBufferPoolDebugName(desc);
93         if (!poolDebugName.empty()) {
94             pdm->UpdateData(poolDebugName, "GPU_BUFFER", alignedByteSize,
95                 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::BYTES);
96         }
97     }
98 }
99 #endif
100 } // namespace
101 
GpuBufferVk(Device & device,const GpuBufferDesc & desc)102 GpuBufferVk::GpuBufferVk(Device& device, const GpuBufferDesc& desc)
103     : device_(device), desc_(desc),
104       isPersistantlyMapped_(
105           (desc.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
106           (desc.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT)),
107       isRingBuffer_(desc.engineCreationFlags & CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER),
108       bufferingCount_(isRingBuffer_ ? device_.GetCommandBufferingCount() : 1u)
109 {
110     CreateBufferImpl();
111 }
112 
GpuBufferVk(Device & device,const GpuAccelerationStructureDesc & desc)113 GpuBufferVk::GpuBufferVk(Device& device, const GpuAccelerationStructureDesc& desc)
114     : device_(device), desc_(desc.bufferDesc), descAccel_(desc),
115       isPersistantlyMapped_(
116           (desc_.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
117           (desc_.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT)),
118       isRingBuffer_(desc_.engineCreationFlags & CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER),
119       isAccelerationStructure_(true), bufferingCount_(isRingBuffer_ ? device_.GetCommandBufferingCount() : 1u)
120 {
121     CreateBufferImpl();
122 
123 #if (RENDER_VULKAN_RT_ENABLED == 1)
124     PLUGIN_ASSERT(desc.bufferDesc.usageFlags & CORE_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT);
125     platAccel_.buffer = plat_.buffer;
126     platAccel_.byteSize = plat_.fullByteSize;
127 
128     constexpr VkFlags createFlags = 0;
129     const VkAccelerationStructureTypeKHR accelerationStructureType =
130         static_cast<VkAccelerationStructureTypeKHR>(descAccel_.accelerationStructureType);
131     VkAccelerationStructureCreateInfoKHR createInfo {
132         VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, // sType
133         nullptr,                                                  // pNext
134         createFlags,                                              // createFlags
135         plat_.buffer,                                             // buffer
136         0,                                                        // offset
137         (VkDeviceSize)platAccel_.byteSize,                        // size
138         accelerationStructureType,                                // type
139         0,                                                        // deviceAddress
140     };
141 
142     const DeviceVk& deviceVk = (const DeviceVk&)device_;
143     const DevicePlatformDataVk& devicePlat = (const DevicePlatformDataVk&)device_.GetPlatformData();
144     const VkDevice vkDevice = devicePlat.device;
145 
146     const DeviceVk::ExtFunctions& extFunctions = deviceVk.GetExtFunctions();
147     if (extFunctions.vkCreateAccelerationStructureKHR && extFunctions.vkGetAccelerationStructureDeviceAddressKHR) {
148         VALIDATE_VK_RESULT(extFunctions.vkCreateAccelerationStructureKHR(vkDevice, // device
149             &createInfo,                                                           // pCreateInfo
150             nullptr,                                                               // pAllocator
151             &platAccel_.accelerationStructure));                                   // pAccelerationStructure
152 
153         if (platAccel_.accelerationStructure) {
154             const VkAccelerationStructureDeviceAddressInfoKHR addressInfo {
155                 VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, // sType
156                 nullptr,                                                          // pNext
157                 platAccel_.accelerationStructure,                                 // accelerationStructure
158             };
159             platAccel_.deviceAddress = extFunctions.vkGetAccelerationStructureDeviceAddressKHR(vkDevice, // device
160                 &addressInfo);                                                                           // pInfo
161         }
162     }
163 #endif
164 }
165 
GpuBufferVk(Device & device,const BackendSpecificBufferDesc & desc)166 GpuBufferVk::GpuBufferVk(Device& device, const BackendSpecificBufferDesc& desc)
167     : device_(device), desc_(GetBufferDescFromHwBufferDesc(static_cast<const BufferDescVk&>(desc).platformHwBuffer))
168 {
169     plat_.platformHwBuffer = static_cast<const BufferDescVk&>(desc).platformHwBuffer;
170     if (plat_.platformHwBuffer) {
171         CreatePlatformHwBuffer();
172     }
173 }
174 
~GpuBufferVk()175 GpuBufferVk::~GpuBufferVk()
176 {
177     if (isMapped_) {
178         Unmap();
179     }
180 
181 #if (RENDER_VULKAN_RT_ENABLED == 1)
182     if (isAccelerationStructure_ && platAccel_.accelerationStructure) {
183         const VkDevice device = ((const DevicePlatformDataVk&)device_.GetPlatformData()).device;
184         const DeviceVk& deviceVk = (const DeviceVk&)device_;
185         const DeviceVk::ExtFunctions& extFunctions = deviceVk.GetExtFunctions();
186         if (extFunctions.vkDestroyAccelerationStructureKHR) {
187             extFunctions.vkDestroyAccelerationStructureKHR(device, // device
188                 platAccel_.accelerationStructure,                  // accelerationStructure
189                 nullptr);                                          // pAllocator
190         }
191     }
192 #endif
193     if (ownsResources_) {
194         if (PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator(); gpuMemAllocator) {
195             gpuMemAllocator->DestroyBuffer(plat_.buffer, mem_.allocation);
196 #if (RENDER_PERF_ENABLED == 1)
197             RecordAllocation(*gpuMemAllocator, desc_, -static_cast<int64_t>(plat_.fullByteSize));
198 #endif
199         }
200     }
201     if (plat_.platformHwBuffer) {
202         DestroyPlatformHwBuffer();
203     }
204 
205 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
206     PLUGIN_LOG_E("gpu buffer id <: 0x%" PRIxPTR, (uintptr_t)plat_.buffer);
207 #endif
208 }
209 
CreateBufferImpl()210 void GpuBufferVk::CreateBufferImpl()
211 {
212     PLUGIN_ASSERT_MSG(
213         (isRingBuffer_ && isPersistantlyMapped_) || !isRingBuffer_, "dynamic ring buffer needs persistent mapping");
214 
215     auto memoryPropertyFlags = static_cast<VkMemoryPropertyFlags>(desc_.memoryPropertyFlags);
216     const VkMemoryPropertyFlags requiredFlags =
217         (memoryPropertyFlags & (~(VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
218                                    CORE_MEMORY_PROPERTY_PROTECTED_BIT)));
219     const VkMemoryPropertyFlags preferredFlags = memoryPropertyFlags;
220 
221     const auto& limits = static_cast<const DevicePlatformDataVk&>(device_.GetPlatformData())
222                              .physicalDeviceProperties.physicalDeviceProperties.limits;
223     // force min buffer alignment always
224     const uint32_t minBufferAlignment = GetMinBufferAlignment(limits);
225     const uint32_t minMapAlignment = (isRingBuffer_ || isPersistantlyMapped_) ? GetMemoryMapAlignment(limits) : 1u;
226     plat_.bindMemoryByteSize = GetAlignedByteSize(desc_.byteSize, Math::max(minBufferAlignment, minMapAlignment));
227     plat_.fullByteSize = plat_.bindMemoryByteSize * bufferingCount_;
228     plat_.currentByteOffset = 0;
229     plat_.usage = static_cast<VkBufferUsageFlags>(desc_.usageFlags);
230 
231     AllocateMemory(requiredFlags, preferredFlags);
232 
233 #if (RENDER_VULKAN_RT_ENABLED == 1)
234     if (plat_.usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) {
235         const DevicePlatformDataVk& devicePlat = (const DevicePlatformDataVk&)device_.GetPlatformData();
236         plat_.deviceAddress = GetBufferDeviceAddress(devicePlat.device, plat_.buffer);
237     }
238 #endif
239 
240     if (PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator(); gpuMemAllocator) {
241         const auto memFlags =
242             (VkMemoryPropertyFlags)gpuMemAllocator->GetMemoryTypeProperties(mem_.allocationInfo.memoryType);
243         isMappable_ = (memFlags & VkMemoryPropertyFlagBits::VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) != 0;
244 #if (RENDER_PERF_ENABLED == 1)
245         RecordAllocation(*gpuMemAllocator, desc_, plat_.fullByteSize);
246 #endif
247     }
248 
249 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
250     PLUGIN_LOG_E("gpu buffer id >: 0x%" PRIxPTR, (uintptr_t)plat_.buffer);
251 #endif
252 }
253 
GetDesc() const254 const GpuBufferDesc& GpuBufferVk::GetDesc() const
255 {
256     return desc_;
257 }
258 
GetPlatformData() const259 const GpuBufferPlatformDataVk& GpuBufferVk::GetPlatformData() const
260 {
261     return plat_;
262 }
263 
GetDescAccelerationStructure() const264 const GpuAccelerationStructureDesc& GpuBufferVk::GetDescAccelerationStructure() const
265 {
266     return descAccel_;
267 }
268 
GetPlatformDataAccelerationStructure() const269 const GpuAccelerationStructurePlatformDataVk& GpuBufferVk::GetPlatformDataAccelerationStructure() const
270 {
271     return platAccel_;
272 }
273 
Map()274 void* GpuBufferVk::Map()
275 {
276     if (!isMappable_) {
277         PLUGIN_LOG_E("trying to map non-mappable gpu buffer");
278         return nullptr;
279     }
280     if (isMapped_) {
281         PLUGIN_LOG_E("gpu buffer already mapped");
282         Unmap();
283     }
284     isMapped_ = true;
285 
286     if (isRingBuffer_) {
287         plat_.currentByteOffset = (plat_.currentByteOffset + plat_.bindMemoryByteSize) % plat_.fullByteSize;
288     }
289 
290     void* data { nullptr };
291     if (isPersistantlyMapped_) {
292         if (mem_.allocationInfo.pMappedData) {
293             data = reinterpret_cast<uint8_t*>(mem_.allocationInfo.pMappedData) + plat_.currentByteOffset;
294         }
295     } else {
296         PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
297         if (gpuMemAllocator) {
298             data = gpuMemAllocator->MapMemory(mem_.allocation);
299         }
300     }
301     return data;
302 }
303 
MapMemory()304 void* GpuBufferVk::MapMemory()
305 {
306     if (!isMappable_) {
307         PLUGIN_LOG_E("trying to map non-mappable gpu buffer");
308         return nullptr;
309     }
310     if (isMapped_) {
311         PLUGIN_LOG_E("gpu buffer already mapped");
312         Unmap();
313     }
314     isMapped_ = true;
315 
316     void* data { nullptr };
317     if (isPersistantlyMapped_) {
318         data = mem_.allocationInfo.pMappedData;
319     } else {
320         PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
321         if (gpuMemAllocator) {
322             data = gpuMemAllocator->MapMemory(mem_.allocation);
323         }
324     }
325     return data;
326 }
327 
Unmap() const328 void GpuBufferVk::Unmap() const
329 {
330     if (!isMappable_) {
331         PLUGIN_LOG_E("trying to unmap non-mappable gpu buffer");
332     }
333     if (!isMapped_) {
334         PLUGIN_LOG_E("gpu buffer not mapped");
335     }
336     isMapped_ = false;
337 
338     if (!isPersistantlyMapped_) {
339         PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
340         if (gpuMemAllocator) {
341             gpuMemAllocator->FlushAllocation(mem_.allocation, 0, VK_WHOLE_SIZE);
342             gpuMemAllocator->UnmapMemory(mem_.allocation);
343         }
344     }
345 }
346 
AllocateMemory(const VkMemoryPropertyFlags requiredFlags,const VkMemoryPropertyFlags preferredFlags)347 void GpuBufferVk::AllocateMemory(const VkMemoryPropertyFlags requiredFlags, const VkMemoryPropertyFlags preferredFlags)
348 {
349     constexpr VkBufferCreateFlags bufferCreateFlags { 0 };
350     const VkBufferCreateInfo bufferCreateInfo {
351         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,     // sType
352         nullptr,                                  // pNext
353         bufferCreateFlags,                        // flags
354         (VkDeviceSize)plat_.fullByteSize,         // size
355         plat_.usage,                              // usage
356         VkSharingMode::VK_SHARING_MODE_EXCLUSIVE, // sharingMode
357         0,                                        // queueFamilyIndexCount
358         nullptr,                                  // pQueueFamilyIndices
359     };
360 
361     VmaAllocationCreateFlags allocationCreateFlags { 0 };
362     if (isPersistantlyMapped_) {
363         allocationCreateFlags |= static_cast<VmaAllocationCreateFlags>(
364             VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_MAPPED_BIT
365 #ifdef USE_NEW_VMA
366             | VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT
367 #endif
368         );
369     }
370     if (desc_.memoryPropertyFlags & CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) {
371 #ifdef USE_NEW_VMA
372         allocationCreateFlags |= VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
373 #endif
374     }
375 
376     PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
377     PLUGIN_ASSERT(gpuMemAllocator);
378     if (gpuMemAllocator) {
379         // can be null handle -> default allocator
380         const VmaPool customPool = gpuMemAllocator->GetBufferPool(desc_);
381         const VmaAllocationCreateInfo allocationCreateInfo {
382             allocationCreateFlags, // flags
383 #ifdef USE_NEW_VMA
384             VmaMemoryUsage::VMA_MEMORY_USAGE_AUTO, // usage
385 #else
386             VmaMemoryUsage::VMA_MEMORY_USAGE_UNKNOWN, // usage
387 #endif
388             requiredFlags,  // requiredFlags
389             preferredFlags, // preferredFlags
390             0,              // memoryTypeBits
391             customPool,     // pool
392             nullptr,        // pUserData
393 #ifdef USE_NEW_VMA
394             0.f, // priority
395 #endif
396         };
397 
398         gpuMemAllocator->CreateBuffer(
399             bufferCreateInfo, allocationCreateInfo, plat_.buffer, mem_.allocation, mem_.allocationInfo);
400     }
401 
402     plat_.memory = GetPlatMemory(mem_.allocationInfo, preferredFlags);
403 }
404 
405 RENDER_END_NAMESPACE()
406