1 /*
2 * Copyright 2022 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/gpu/graphite/vk/VulkanBuffer.h"
9
10 #include "include/gpu/vk/VulkanMemoryAllocator.h"
11 #include "src/gpu/graphite/vk/VulkanCommandBuffer.h"
12 #include "src/gpu/graphite/vk/VulkanGraphiteUtilsPriv.h"
13 #include "src/gpu/vk/VulkanMemory.h"
14
15 namespace skgpu::graphite {
16
Make(const VulkanSharedContext * sharedContext,size_t size,BufferType type,AccessPattern accessPattern)17 sk_sp<Buffer> VulkanBuffer::Make(const VulkanSharedContext* sharedContext,
18 size_t size,
19 BufferType type,
20 AccessPattern accessPattern) {
21 if (size <= 0) {
22 return nullptr;
23 }
24 VkBuffer buffer;
25 skgpu::VulkanAlloc alloc;
26
27 // The only time we don't require mappable buffers is when we're on a device where gpu only
28 // memory has faster reads on the gpu than memory that is also mappable on the cpu. Protected
29 // memory always uses mappable buffers.
30 bool requiresMappable = sharedContext->isProtected() == Protected::kYes ||
31 accessPattern == AccessPattern::kHostVisible ||
32 !sharedContext->vulkanCaps().gpuOnlyBuffersMorePerformant();
33
34 using BufferUsage = skgpu::VulkanMemoryAllocator::BufferUsage;
35
36 // The default usage captures use cases besides transfer buffers. GPU-only buffers are preferred
37 // unless mappability is required.
38 BufferUsage allocUsage =
39 requiresMappable ? BufferUsage::kCpuWritesGpuReads : BufferUsage::kGpuOnly;
40
41 // Create the buffer object
42 VkBufferCreateInfo bufInfo;
43 memset(&bufInfo, 0, sizeof(VkBufferCreateInfo));
44 bufInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
45 bufInfo.flags = 0;
46 bufInfo.size = size;
47
48 // To support SkMesh buffer updates we make Vertex and Index buffers capable of being transfer
49 // dsts. To support rtAdjust uniform buffer updates, we make host-visible uniform buffers also
50 // capable of being transfer dsts.
51 switch (type) {
52 case BufferType::kVertex:
53 bufInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
54 break;
55 case BufferType::kIndex:
56 bufInfo.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
57 break;
58 case BufferType::kStorage:
59 bufInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
60 break;
61 case BufferType::kIndirect:
62 bufInfo.usage =
63 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
64 break;
65 case BufferType::kVertexStorage:
66 bufInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
67 break;
68 case BufferType::kIndexStorage:
69 bufInfo.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
70 break;
71 case BufferType::kUniform:
72 bufInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
73 allocUsage = BufferUsage::kCpuWritesGpuReads;
74 break;
75 case BufferType::kXferCpuToGpu:
76 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
77 allocUsage = BufferUsage::kTransfersFromCpuToGpu;
78 break;
79 case BufferType::kXferGpuToCpu:
80 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
81 allocUsage = BufferUsage::kTransfersFromGpuToCpu;
82 break;
83 }
84
85 // We may not always get a mappable buffer for non-dynamic access buffers. Thus we set the
86 // transfer dst usage bit in case we need to do a copy to write data. It doesn't really hurt
87 // to set this extra usage flag, but we could narrow the scope of buffers we set it on more than
88 // just not dynamic.
89 if (!requiresMappable || accessPattern == AccessPattern::kGpuOnly) {
90 bufInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
91 }
92
93 bufInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
94 bufInfo.queueFamilyIndexCount = 0;
95 bufInfo.pQueueFamilyIndices = nullptr;
96
97 VkResult result;
98 VULKAN_CALL_RESULT(sharedContext,
99 result,
100 CreateBuffer(sharedContext->device(),
101 &bufInfo,
102 nullptr, /*const VkAllocationCallbacks*/
103 &buffer));
104 if (result != VK_SUCCESS) {
105 return nullptr;
106 }
107
108 auto allocator = sharedContext->memoryAllocator();
109 bool shouldPersistentlyMapCpuToGpu =
110 sharedContext->vulkanCaps().shouldPersistentlyMapCpuToGpuBuffers();
111 //AllocBufferMemory
112 auto checkResult = [](VkResult result) {
113 return result == VK_SUCCESS;
114 };
115 if (!skgpu::VulkanMemory::AllocBufferMemory(allocator,
116 buffer,
117 allocUsage,
118 shouldPersistentlyMapCpuToGpu,
119 checkResult,
120 &alloc)) {
121 VULKAN_CALL(sharedContext->interface(), DestroyBuffer(sharedContext->device(),
122 buffer,
123 /*const VkAllocationCallbacks*=*/nullptr));
124 return nullptr;
125 }
126
127 // Bind buffer
128 VULKAN_CALL_RESULT(
129 sharedContext,
130 result,
131 BindBufferMemory(sharedContext->device(), buffer, alloc.fMemory, alloc.fOffset));
132 if (result != VK_SUCCESS) {
133 skgpu::VulkanMemory::FreeBufferMemory(allocator, alloc);
134 VULKAN_CALL(sharedContext->interface(), DestroyBuffer(sharedContext->device(),
135 buffer,
136 /*const VkAllocationCallbacks*=*/nullptr));
137 return nullptr;
138 }
139
140 return sk_sp<Buffer>(new VulkanBuffer(
141 sharedContext, size, type, accessPattern, std::move(buffer), alloc, bufInfo.usage));
142 }
143
VulkanBuffer(const VulkanSharedContext * sharedContext,size_t size,BufferType type,AccessPattern accessPattern,VkBuffer buffer,const skgpu::VulkanAlloc & alloc,const VkBufferUsageFlags usageFlags)144 VulkanBuffer::VulkanBuffer(const VulkanSharedContext* sharedContext,
145 size_t size,
146 BufferType type,
147 AccessPattern accessPattern,
148 VkBuffer buffer,
149 const skgpu::VulkanAlloc& alloc,
150 const VkBufferUsageFlags usageFlags)
151 : Buffer(sharedContext, size)
152 , fBuffer(std::move(buffer))
153 , fAlloc(alloc)
154 , fBufferUsageFlags(usageFlags)
155 // We assume a buffer is used for CPU reads only in the case of GPU->CPU transfer buffers.
156 , fBufferUsedForCPURead(type == BufferType::kXferGpuToCpu) {}
157
freeGpuData()158 void VulkanBuffer::freeGpuData() {
159 if (fMapPtr) {
160 this->internalUnmap(0, this->size());
161 fMapPtr = nullptr;
162 }
163
164 const VulkanSharedContext* sharedContext =
165 static_cast<const VulkanSharedContext*>(this->sharedContext());
166 SkASSERT(fBuffer);
167 SkASSERT(fAlloc.fMemory && fAlloc.fBackendMemory);
168 VULKAN_CALL(sharedContext->interface(),
169 DestroyBuffer(sharedContext->device(), fBuffer, nullptr));
170 fBuffer = VK_NULL_HANDLE;
171
172 skgpu::VulkanMemory::FreeBufferMemory(sharedContext->memoryAllocator(), fAlloc);
173 fAlloc.fMemory = VK_NULL_HANDLE;
174 fAlloc.fBackendMemory = 0;
175 }
176
internalMap(size_t readOffset,size_t readSize)177 void VulkanBuffer::internalMap(size_t readOffset, size_t readSize) {
178 SkASSERT(!fMapPtr);
179 if (this->isMappable()) {
180 // Not every buffer will use command buffer usage refs. Instead, the command buffer just
181 // holds normal refs. Systems higher up in Graphite should be making sure not to reuse a
182 // buffer that currently has a ref held by something else. However, we do need to make sure
183 // there isn't a buffer with just a command buffer usage that is trying to be mapped.
184 #ifdef SK_DEBUG
185 SkASSERT(!this->debugHasCommandBufferRef());
186 #endif
187 SkASSERT(fAlloc.fSize > 0);
188 SkASSERT(fAlloc.fSize >= readOffset + readSize);
189
190 const VulkanSharedContext* sharedContext = this->vulkanSharedContext();
191
192 auto allocator = sharedContext->memoryAllocator();
193 auto checkResult = [sharedContext](VkResult result) {
194 VULKAN_LOG_IF_NOT_SUCCESS(sharedContext, result, "skgpu::VulkanMemory::MapAlloc");
195 return sharedContext->checkVkResult(result);
196 };
197 fMapPtr = skgpu::VulkanMemory::MapAlloc(allocator, fAlloc, checkResult);
198 if (fMapPtr && readSize != 0) {
199 auto checkResult_invalidate = [sharedContext, readOffset, readSize](VkResult result) {
200 VULKAN_LOG_IF_NOT_SUCCESS(sharedContext,
201 result,
202 "skgpu::VulkanMemory::InvalidateMappedAlloc "
203 "(readOffset:%zu, readSize:%zu)",
204 readOffset,
205 readSize);
206 return sharedContext->checkVkResult(result);
207 };
208 // "Invalidate" here means make device writes visible to the host. That is, it makes
209 // sure any GPU writes are finished in the range we might read from.
210 skgpu::VulkanMemory::InvalidateMappedAlloc(allocator,
211 fAlloc,
212 readOffset,
213 readSize,
214 checkResult_invalidate);
215 }
216 }
217 }
218
internalUnmap(size_t flushOffset,size_t flushSize)219 void VulkanBuffer::internalUnmap(size_t flushOffset, size_t flushSize) {
220 SkASSERT(fMapPtr && this->isMappable());
221
222 SkASSERT(fAlloc.fSize > 0);
223 SkASSERT(fAlloc.fSize >= flushOffset + flushSize);
224
225 const VulkanSharedContext* sharedContext = this->vulkanSharedContext();
226 auto checkResult = [sharedContext, flushOffset, flushSize](VkResult result) {
227 VULKAN_LOG_IF_NOT_SUCCESS(sharedContext,
228 result,
229 "skgpu::VulkanMemory::FlushMappedAlloc "
230 "(flushOffset:%zu, flushSize:%zu)",
231 flushOffset,
232 flushSize);
233 return sharedContext->checkVkResult(result);
234 };
235
236 auto allocator = sharedContext->memoryAllocator();
237 skgpu::VulkanMemory::FlushMappedAlloc(allocator, fAlloc, flushOffset, flushSize, checkResult);
238 skgpu::VulkanMemory::UnmapAlloc(allocator, fAlloc);
239 }
240
onMap()241 void VulkanBuffer::onMap() {
242 SkASSERT(fBuffer);
243 SkASSERT(!this->isMapped());
244
245 this->internalMap(0, fBufferUsedForCPURead ? this->size() : 0);
246 }
247
onUnmap()248 void VulkanBuffer::onUnmap() {
249 SkASSERT(fBuffer);
250 SkASSERT(this->isMapped());
251 this->internalUnmap(0, fBufferUsedForCPURead ? 0 : this->size());
252 }
253
setBufferAccess(VulkanCommandBuffer * cmdBuffer,VkAccessFlags dstAccessMask,VkPipelineStageFlags dstStageMask) const254 void VulkanBuffer::setBufferAccess(VulkanCommandBuffer* cmdBuffer,
255 VkAccessFlags dstAccessMask,
256 VkPipelineStageFlags dstStageMask) const {
257 // TODO: fill out other cases where we need a barrier
258 if (dstAccessMask == VK_ACCESS_HOST_READ_BIT ||
259 dstAccessMask == VK_ACCESS_TRANSFER_WRITE_BIT ||
260 dstAccessMask == VK_ACCESS_UNIFORM_READ_BIT) {
261 VkPipelineStageFlags srcStageMask =
262 VulkanBuffer::AccessMaskToPipelineSrcStageFlags(fCurrentAccessMask);
263
264 VkBufferMemoryBarrier bufferMemoryBarrier = {
265 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
266 nullptr, // pNext
267 fCurrentAccessMask, // srcAccessMask
268 dstAccessMask, // dstAccessMask
269 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
270 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
271 fBuffer, // buffer
272 0, // offset
273 this->size(), // size
274 };
275 cmdBuffer->addBufferMemoryBarrier(srcStageMask, dstStageMask, &bufferMemoryBarrier);
276 }
277
278 fCurrentAccessMask = dstAccessMask;
279 }
280
AccessMaskToPipelineSrcStageFlags(const VkAccessFlags srcMask)281 VkPipelineStageFlags VulkanBuffer::AccessMaskToPipelineSrcStageFlags(const VkAccessFlags srcMask) {
282 if (srcMask == 0) {
283 return VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
284 }
285 VkPipelineStageFlags flags = 0;
286
287 if (srcMask & VK_ACCESS_TRANSFER_WRITE_BIT || srcMask & VK_ACCESS_TRANSFER_READ_BIT) {
288 flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
289 }
290 if (srcMask & VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT ||
291 srcMask & VK_ACCESS_COLOR_ATTACHMENT_READ_BIT) {
292 flags |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
293 }
294 if (srcMask & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT ||
295 srcMask & VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT) {
296 flags |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
297 }
298 if (srcMask & VK_ACCESS_INPUT_ATTACHMENT_READ_BIT) {
299 flags |= VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
300 }
301 if (srcMask & VK_ACCESS_SHADER_READ_BIT ||
302 srcMask & VK_ACCESS_UNIFORM_READ_BIT) {
303 // TODO(b/307577875): It is possible that uniforms could have simply been used in the vertex
304 // shader and not the fragment shader, so using the fragment shader pipeline stage bit
305 // indiscriminately is a bit overkill. This call should be modified to check & allow for
306 // selecting VK_PIPELINE_STAGE_VERTEX_SHADER_BIT when appropriate.
307 flags |= (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
308 }
309 if (srcMask & VK_ACCESS_SHADER_WRITE_BIT) {
310 flags |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
311 }
312 if (srcMask & VK_ACCESS_INDEX_READ_BIT ||
313 srcMask & VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT) {
314 flags |= VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
315 }
316 if (srcMask & VK_ACCESS_INDIRECT_COMMAND_READ_BIT) {
317 flags |= VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
318 }
319 if (srcMask & VK_ACCESS_HOST_READ_BIT || srcMask & VK_ACCESS_HOST_WRITE_BIT) {
320 flags |= VK_PIPELINE_STAGE_HOST_BIT;
321 }
322
323 return flags;
324 }
325
326 } // namespace skgpu::graphite
327