• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "gpu_buffer_vk.h"
17 
18 #include <cinttypes>
19 #include <cstdint>
20 #include <cstring>
21 #include <vulkan/vulkan_core.h>
22 
23 #include <base/math/mathf.h>
24 
25 #if (RENDER_PERF_ENABLED == 1)
26 #include <core/implementation_uids.h>
27 #include <core/perf/intf_performance_data_manager.h>
28 #endif
29 
30 #include <render/namespace.h>
31 
32 #include "device/device.h"
33 #include "device/gpu_buffer.h"
34 #include "device/gpu_resource_desc_flag_validation.h"
35 #include "util/log.h"
36 #include "vulkan/device_vk.h"
37 #include "vulkan/gpu_memory_allocator_vk.h"
38 #include "vulkan/validate_vk.h"
39 
40 using namespace BASE_NS;
41 
42 RENDER_BEGIN_NAMESPACE()
43 namespace {
GetAlignedByteSize(const uint32_t byteSize,const uint32_t alignment)44 constexpr uint32_t GetAlignedByteSize(const uint32_t byteSize, const uint32_t alignment)
45 {
46     return (byteSize + alignment - 1) & (~(alignment - 1));
47 }
48 
GetMinBufferAlignment(const VkPhysicalDeviceLimits & limits)49 constexpr uint32_t GetMinBufferAlignment(const VkPhysicalDeviceLimits& limits)
50 {
51     return Math::max(static_cast<uint32_t>(limits.minStorageBufferOffsetAlignment),
52         static_cast<uint32_t>(limits.minUniformBufferOffsetAlignment));
53 }
54 
GetMemoryMapAlignment(const VkPhysicalDeviceLimits & limits)55 constexpr uint32_t GetMemoryMapAlignment(const VkPhysicalDeviceLimits& limits)
56 {
57     return Math::max(
58         static_cast<uint32_t>(limits.minMemoryMapAlignment), static_cast<uint32_t>(limits.nonCoherentAtomSize));
59 }
60 
GetPlatMemory(const VmaAllocationInfo & allocationInfo,const VkMemoryPropertyFlags flags)61 GpuResourceMemoryVk GetPlatMemory(const VmaAllocationInfo& allocationInfo, const VkMemoryPropertyFlags flags)
62 {
63     return GpuResourceMemoryVk {
64         allocationInfo.deviceMemory,
65         allocationInfo.offset,
66         allocationInfo.size,
67         allocationInfo.pMappedData,
68         allocationInfo.memoryType,
69         flags,
70     };
71 }
72 
73 #if (RENDER_PERF_ENABLED == 1)
RecordAllocation(PlatformGpuMemoryAllocator & gpuMemAllocator,const GpuBufferDesc & desc,const int64_t alignedByteSize)74 void RecordAllocation(
75     PlatformGpuMemoryAllocator& gpuMemAllocator, const GpuBufferDesc& desc, const int64_t alignedByteSize)
76 {
77     if (auto* inst = CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
78         inst) {
79         CORE_NS::IPerformanceDataManager* pdm = inst->Get("Memory");
80 
81         pdm->UpdateData("AllGpuBuffers", "GPU_BUFFER", alignedByteSize);
82         const string poolDebugName = gpuMemAllocator.GetBufferPoolDebugName(desc);
83         if (!poolDebugName.empty()) {
84             pdm->UpdateData(poolDebugName, "GPU_BUFFER", alignedByteSize);
85         }
86     }
87 }
88 #endif
89 } // namespace
90 
GpuBufferVk(Device & device,const GpuBufferDesc & desc)91 GpuBufferVk::GpuBufferVk(Device& device, const GpuBufferDesc& desc)
92     : device_(device), desc_(desc),
93       isPersistantlyMapped_(
94           (desc.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
95           (desc.memoryPropertyFlags & MemoryPropertyFlagBits::CORE_MEMORY_PROPERTY_HOST_COHERENT_BIT)),
96       isRingBuffer_(desc.engineCreationFlags & CORE_ENGINE_BUFFER_CREATION_DYNAMIC_RING_BUFFER),
97       bufferingCount_(isRingBuffer_ ? device_.GetCommandBufferingCount() : 1u)
98 {
99     PLUGIN_ASSERT_MSG(
100         (isRingBuffer_ && isPersistantlyMapped_) || !isRingBuffer_, "dynamic ring buffer needs persistend mapping");
101 
102     VkMemoryPropertyFlags memoryPropertyFlags = static_cast<VkMemoryPropertyFlags>(desc_.memoryPropertyFlags);
103     const VkMemoryPropertyFlags requiredFlags =
104         (memoryPropertyFlags & (~(VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
105                                    CORE_MEMORY_PROPERTY_PROTECTED_BIT)));
106     const VkMemoryPropertyFlags preferredFlags = memoryPropertyFlags;
107 
108     const auto& limits = static_cast<const DevicePlatformDataVk&>(device.GetPlatformData())
109                              .physicalDeviceProperties.physicalDeviceProperties.limits;
110     // force min buffer alignment always
111     const uint32_t minBufferAlignment = GetMinBufferAlignment(limits);
112     const uint32_t minMapAlignment = (isRingBuffer_ || isPersistantlyMapped_) ? GetMemoryMapAlignment(limits) : 1u;
113     plat_.bindMemoryByteSize = GetAlignedByteSize(desc_.byteSize, Math::max(minBufferAlignment, minMapAlignment));
114     plat_.fullByteSize = plat_.bindMemoryByteSize * bufferingCount_;
115     plat_.currentByteOffset = 0;
116     plat_.usage = static_cast<VkBufferUsageFlags>(desc_.usageFlags);
117 
118     AllocateMemory(requiredFlags, preferredFlags);
119 
120     if (PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator(); gpuMemAllocator) {
121         const VkMemoryPropertyFlags memFlags =
122             (VkMemoryPropertyFlags)gpuMemAllocator->GetMemoryTypeProperties(mem_.allocationInfo.memoryType);
123         isMappable_ = (memFlags & VkMemoryPropertyFlagBits::VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) ? true : false;
124 #if (RENDER_PERF_ENABLED == 1)
125         RecordAllocation(*gpuMemAllocator, desc, plat_.fullByteSize);
126 #endif
127     }
128 
129 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
130     PLUGIN_LOG_E("gpu buffer id >: 0x%" PRIxPTR, (uintptr_t)plat_.buffer);
131 #endif
132 }
133 
~GpuBufferVk()134 GpuBufferVk::~GpuBufferVk()
135 {
136     if (isMapped_) {
137         Unmap();
138     }
139 
140     if (PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator(); gpuMemAllocator) {
141         gpuMemAllocator->DestroyBuffer(plat_.buffer, mem_.allocation);
142 #if (RENDER_PERF_ENABLED == 1)
143         RecordAllocation(*gpuMemAllocator, desc_, -static_cast<int64_t>(plat_.fullByteSize));
144 #endif
145     }
146 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
147     PLUGIN_LOG_E("gpu buffer id <: 0x%" PRIxPTR, (uintptr_t)plat_.buffer);
148 #endif
149 }
150 
GetDesc() const151 const GpuBufferDesc& GpuBufferVk::GetDesc() const
152 {
153     return desc_;
154 }
155 
GetPlatformData() const156 const GpuBufferPlatformDataVk& GpuBufferVk::GetPlatformData() const
157 {
158     return plat_;
159 }
160 
Map()161 void* GpuBufferVk::Map()
162 {
163     if (!isMappable_) {
164         PLUGIN_LOG_E("trying to map non-mappable gpu buffer");
165         return nullptr;
166     }
167     if (isMapped_) {
168         PLUGIN_LOG_E("gpu buffer already mapped");
169         Unmap();
170     }
171     isMapped_ = true;
172 
173     if (isRingBuffer_) {
174         plat_.currentByteOffset = (plat_.currentByteOffset + plat_.bindMemoryByteSize) % plat_.fullByteSize;
175     }
176 
177     void* data { nullptr };
178     if (isPersistantlyMapped_) {
179         data = reinterpret_cast<uint8_t*>(mem_.allocationInfo.pMappedData) + plat_.currentByteOffset;
180     } else {
181         PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
182         if (gpuMemAllocator) {
183             data = gpuMemAllocator->MapMemory(mem_.allocation);
184         }
185     }
186     return data;
187 }
188 
MapMemory()189 void* GpuBufferVk::MapMemory()
190 {
191     if (!isMappable_) {
192         PLUGIN_LOG_E("trying to map non-mappable gpu buffer");
193         return nullptr;
194     }
195     if (isMapped_) {
196         PLUGIN_LOG_E("gpu buffer already mapped");
197         Unmap();
198     }
199     isMapped_ = true;
200 
201     void* data { nullptr };
202     if (isPersistantlyMapped_) {
203         data = mem_.allocationInfo.pMappedData;
204     } else {
205         PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
206         if (gpuMemAllocator) {
207             data = gpuMemAllocator->MapMemory(mem_.allocation);
208         }
209     }
210     return data;
211 }
212 
Unmap() const213 void GpuBufferVk::Unmap() const
214 {
215     if (!isMappable_) {
216         PLUGIN_LOG_E("trying to unmap non-mappable gpu buffer");
217     }
218     if (!isMapped_) {
219         PLUGIN_LOG_E("gpu buffer not mapped");
220     }
221     isMapped_ = false;
222 
223     if (!isPersistantlyMapped_) {
224         PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
225         if (gpuMemAllocator) {
226             gpuMemAllocator->FlushAllocation(mem_.allocation, 0, VK_WHOLE_SIZE);
227             gpuMemAllocator->UnmapMemory(mem_.allocation);
228         }
229     }
230 }
231 
AllocateMemory(const VkMemoryPropertyFlags requiredFlags,const VkMemoryPropertyFlags preferredFlags)232 void GpuBufferVk::AllocateMemory(const VkMemoryPropertyFlags requiredFlags, const VkMemoryPropertyFlags preferredFlags)
233 {
234     constexpr VkBufferCreateFlags bufferCreateFlags { 0 };
235     const VkBufferCreateInfo bufferCreateInfo {
236         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,     // sType
237         nullptr,                                  // pNext
238         bufferCreateFlags,                        // flags
239         (VkDeviceSize)plat_.fullByteSize,         // size
240         plat_.usage,                              // usage
241         VkSharingMode::VK_SHARING_MODE_EXCLUSIVE, // sharingMode
242         0,                                        // queueFamilyIndexCount
243         nullptr,                                  // pQueueFamilyIndices
244     };
245 
246     VmaAllocationCreateFlags allocationCreateFlags { 0 };
247     if (isPersistantlyMapped_) {
248         allocationCreateFlags |=
249             (VmaAllocationCreateFlags)(VmaAllocationCreateFlagBits::VMA_ALLOCATION_CREATE_MAPPED_BIT |
250                                        VmaAllocationCreateFlagBits::
251                                            VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT);
252     }
253 
254     PlatformGpuMemoryAllocator* gpuMemAllocator = device_.GetPlatformGpuMemoryAllocator();
255     PLUGIN_ASSERT(gpuMemAllocator);
256     if (gpuMemAllocator) {
257         // can be null handle -> default allocator
258         const VmaPool customPool = gpuMemAllocator->GetBufferPool(desc_);
259         const VmaAllocationCreateInfo allocationCreateInfo {
260             allocationCreateFlags,                 // flags
261             VmaMemoryUsage::VMA_MEMORY_USAGE_AUTO, // usage
262             requiredFlags,                         // requiredFlags
263             preferredFlags,                        // preferredFlags
264             0,                                     // memoryTypeBits
265             customPool,                            // pool
266             nullptr,                               // pUserData
267             0.f,                                   // priority
268         };
269 
270         gpuMemAllocator->CreateBuffer(
271             bufferCreateInfo, allocationCreateInfo, plat_.buffer, mem_.allocation, mem_.allocationInfo);
272     }
273 
274     plat_.memory = GetPlatMemory(mem_.allocationInfo, preferredFlags);
275 }
276 
277 RENDER_END_NAMESPACE()
278