1 /*
2 * Copyright 2022 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/gpu/graphite/vk/VulkanBuffer.h"
9
10 #include "include/gpu/vk/VulkanMemoryAllocator.h"
11 #include "src/gpu/graphite/vk/VulkanCommandBuffer.h"
12 #include "src/gpu/graphite/vk/VulkanGraphiteUtils.h"
13 #include "src/gpu/vk/VulkanMemory.h"
14
15 namespace skgpu::graphite {
16
Make(const VulkanSharedContext * sharedContext,size_t size,BufferType type,AccessPattern accessPattern)17 sk_sp<Buffer> VulkanBuffer::Make(const VulkanSharedContext* sharedContext,
18 size_t size,
19 BufferType type,
20 AccessPattern accessPattern) {
21 if (size <= 0) {
22 return nullptr;
23 }
24 VkBuffer buffer;
25 skgpu::VulkanAlloc alloc;
26
27 // TODO (b/374749633): We can't use protected buffers in the vertex shader. The checks below
28 // make sure we don't use it for vertex or index buffers. But we currently don't have a way to
29 // check here if it is a uniform or storage buffer that is used in the vertex shader. If we hit
30 // that issue and need those GpuOnly buffers, we'll need to pass in some information to the
31 // factory to say what stage the buffer is for. Maybe expand AccessPattern to be
32 // GpuOnly_NotVertex or some better name like that.
33 bool isProtected = sharedContext->isProtected() == Protected::kYes &&
34 accessPattern == AccessPattern::kGpuOnly &&
35 type != BufferType::kVertex &&
36 type != BufferType::kIndex;
37
38 // Protected memory _never_ uses mappable buffers.
39 // Otherwise, the only time we don't require mappable buffers is when we're on a device
40 // where gpu only memory has faster reads on the gpu than memory that is also mappable
41 // on the cpu.
42 bool requiresMappable = !isProtected &&
43 (accessPattern == AccessPattern::kHostVisible ||
44 !sharedContext->vulkanCaps().gpuOnlyBuffersMorePerformant());
45
46 using BufferUsage = skgpu::VulkanMemoryAllocator::BufferUsage;
47
48 BufferUsage allocUsage;
49 if (type == BufferType::kXferCpuToGpu) {
50 allocUsage = BufferUsage::kTransfersFromCpuToGpu;
51 } else if (type == BufferType::kXferGpuToCpu) {
52 allocUsage = BufferUsage::kTransfersFromGpuToCpu;
53 } else {
54 // GPU-only buffers are preferred unless mappability is required.
55 allocUsage = requiresMappable ? BufferUsage::kCpuWritesGpuReads : BufferUsage::kGpuOnly;
56 }
57
58 // Create the buffer object
59 VkBufferCreateInfo bufInfo;
60 memset(&bufInfo, 0, sizeof(VkBufferCreateInfo));
61 bufInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
62 bufInfo.flags = isProtected ? VK_BUFFER_CREATE_PROTECTED_BIT : 0;
63 bufInfo.size = size;
64
65 // To support SkMesh buffer updates we make Vertex and Index buffers capable of being transfer
66 // dsts. To support rtAdjust uniform buffer updates, we make host-visible uniform buffers also
67 // capable of being transfer dsts.
68 switch (type) {
69 case BufferType::kVertex:
70 bufInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
71 break;
72 case BufferType::kIndex:
73 bufInfo.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
74 break;
75 case BufferType::kStorage:
76 bufInfo.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
77 break;
78 case BufferType::kQuery:
79 SK_ABORT("Query buffers not supported on Vulkan");
80 break;
81 case BufferType::kIndirect:
82 bufInfo.usage =
83 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
84 break;
85 case BufferType::kVertexStorage:
86 bufInfo.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
87 break;
88 case BufferType::kIndexStorage:
89 bufInfo.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
90 break;
91 case BufferType::kUniform:
92 bufInfo.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
93 break;
94 case BufferType::kXferCpuToGpu:
95 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT;
96 break;
97 case BufferType::kXferGpuToCpu:
98 bufInfo.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
99 break;
100 }
101
102 // We may not always get a mappable buffer for non-dynamic access buffers. Thus we set the
103 // transfer dst usage bit in case we need to do a copy to write data. It doesn't really hurt
104 // to set this extra usage flag, but we could narrow the scope of buffers we set it on more than
105 // just not dynamic.
106 if (!requiresMappable || accessPattern == AccessPattern::kGpuOnly) {
107 bufInfo.usage |= VK_BUFFER_USAGE_TRANSFER_DST_BIT;
108 }
109
110 bufInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
111 bufInfo.queueFamilyIndexCount = 0;
112 bufInfo.pQueueFamilyIndices = nullptr;
113
114 VkResult result;
115 VULKAN_CALL_RESULT(sharedContext,
116 result,
117 CreateBuffer(sharedContext->device(),
118 &bufInfo,
119 nullptr, /*const VkAllocationCallbacks*/
120 &buffer));
121 if (result != VK_SUCCESS) {
122 return nullptr;
123 }
124
125 auto allocator = sharedContext->memoryAllocator();
126 bool shouldPersistentlyMapCpuToGpu =
127 sharedContext->vulkanCaps().shouldPersistentlyMapCpuToGpuBuffers();
128 //AllocBufferMemory
129 auto checkResult = [](VkResult result) {
130 return result == VK_SUCCESS;
131 };
132 if (!skgpu::VulkanMemory::AllocBufferMemory(allocator,
133 buffer,
134 skgpu::Protected(isProtected),
135 allocUsage,
136 shouldPersistentlyMapCpuToGpu,
137 checkResult,
138 &alloc)) {
139 VULKAN_CALL(sharedContext->interface(),
140 DestroyBuffer(sharedContext->device(),
141 buffer,
142 /*const VkAllocationCallbacks*=*/nullptr));
143 return nullptr;
144 }
145
146 // Bind buffer
147 VULKAN_CALL_RESULT(
148 sharedContext,
149 result,
150 BindBufferMemory(sharedContext->device(), buffer, alloc.fMemory, alloc.fOffset));
151 if (result != VK_SUCCESS) {
152 skgpu::VulkanMemory::FreeBufferMemory(allocator, alloc);
153 VULKAN_CALL(sharedContext->interface(), DestroyBuffer(sharedContext->device(),
154 buffer,
155 /*const VkAllocationCallbacks*=*/nullptr));
156 return nullptr;
157 }
158
159 return sk_sp<Buffer>(new VulkanBuffer(
160 sharedContext, size, type, accessPattern, std::move(buffer), alloc, bufInfo.usage,
161 Protected(isProtected)));
162 }
163
VulkanBuffer(const VulkanSharedContext * sharedContext,size_t size,BufferType type,AccessPattern accessPattern,VkBuffer buffer,const skgpu::VulkanAlloc & alloc,const VkBufferUsageFlags usageFlags,Protected isProtected)164 VulkanBuffer::VulkanBuffer(const VulkanSharedContext* sharedContext,
165 size_t size,
166 BufferType type,
167 AccessPattern accessPattern,
168 VkBuffer buffer,
169 const skgpu::VulkanAlloc& alloc,
170 const VkBufferUsageFlags usageFlags,
171 Protected isProtected)
172 : Buffer(sharedContext, size, isProtected)
173 , fBuffer(std::move(buffer))
174 , fAlloc(alloc)
175 , fBufferUsageFlags(usageFlags)
176 // We assume a buffer is used for CPU reads only in the case of GPU->CPU transfer buffers.
177 , fBufferUsedForCPURead(type == BufferType::kXferGpuToCpu) {}
178
freeGpuData()179 void VulkanBuffer::freeGpuData() {
180 if (fMapPtr) {
181 this->internalUnmap(0, this->size());
182 fMapPtr = nullptr;
183 }
184
185 const VulkanSharedContext* sharedContext =
186 static_cast<const VulkanSharedContext*>(this->sharedContext());
187 SkASSERT(fBuffer);
188 SkASSERT(fAlloc.fMemory && fAlloc.fBackendMemory);
189 VULKAN_CALL(sharedContext->interface(),
190 DestroyBuffer(sharedContext->device(), fBuffer, nullptr));
191 fBuffer = VK_NULL_HANDLE;
192
193 skgpu::VulkanMemory::FreeBufferMemory(sharedContext->memoryAllocator(), fAlloc);
194 fAlloc.fMemory = VK_NULL_HANDLE;
195 fAlloc.fBackendMemory = 0;
196 }
197
internalMap(size_t readOffset,size_t readSize)198 void VulkanBuffer::internalMap(size_t readOffset, size_t readSize) {
199 SkASSERT(!fMapPtr);
200 if (this->isMappable()) {
201 SkASSERT(fAlloc.fSize > 0);
202 SkASSERT(fAlloc.fSize >= readOffset + readSize);
203
204 const VulkanSharedContext* sharedContext = this->vulkanSharedContext();
205
206 auto allocator = sharedContext->memoryAllocator();
207 auto checkResult = [sharedContext](VkResult result) {
208 VULKAN_LOG_IF_NOT_SUCCESS(sharedContext, result, "skgpu::VulkanMemory::MapAlloc");
209 return sharedContext->checkVkResult(result);
210 };
211 fMapPtr = skgpu::VulkanMemory::MapAlloc(allocator, fAlloc, checkResult);
212 if (fMapPtr && readSize != 0) {
213 auto checkResult_invalidate = [sharedContext, readOffset, readSize](VkResult result) {
214 VULKAN_LOG_IF_NOT_SUCCESS(sharedContext,
215 result,
216 "skgpu::VulkanMemory::InvalidateMappedAlloc "
217 "(readOffset:%zu, readSize:%zu)",
218 readOffset,
219 readSize);
220 return sharedContext->checkVkResult(result);
221 };
222 // "Invalidate" here means make device writes visible to the host. That is, it makes
223 // sure any GPU writes are finished in the range we might read from.
224 skgpu::VulkanMemory::InvalidateMappedAlloc(allocator,
225 fAlloc,
226 readOffset,
227 readSize,
228 checkResult_invalidate);
229 }
230 }
231 }
232
internalUnmap(size_t flushOffset,size_t flushSize)233 void VulkanBuffer::internalUnmap(size_t flushOffset, size_t flushSize) {
234 SkASSERT(fMapPtr && this->isMappable());
235
236 SkASSERT(fAlloc.fSize > 0);
237 SkASSERT(fAlloc.fSize >= flushOffset + flushSize);
238
239 const VulkanSharedContext* sharedContext = this->vulkanSharedContext();
240 auto checkResult = [sharedContext, flushOffset, flushSize](VkResult result) {
241 VULKAN_LOG_IF_NOT_SUCCESS(sharedContext,
242 result,
243 "skgpu::VulkanMemory::FlushMappedAlloc "
244 "(flushOffset:%zu, flushSize:%zu)",
245 flushOffset,
246 flushSize);
247 return sharedContext->checkVkResult(result);
248 };
249
250 auto allocator = sharedContext->memoryAllocator();
251 skgpu::VulkanMemory::FlushMappedAlloc(allocator, fAlloc, flushOffset, flushSize, checkResult);
252 skgpu::VulkanMemory::UnmapAlloc(allocator, fAlloc);
253 }
254
onMap()255 void VulkanBuffer::onMap() {
256 SkASSERT(fBuffer);
257 SkASSERT(!this->isMapped());
258
259 this->internalMap(0, fBufferUsedForCPURead ? this->size() : 0);
260 }
261
onUnmap()262 void VulkanBuffer::onUnmap() {
263 SkASSERT(fBuffer);
264 SkASSERT(this->isMapped());
265 this->internalUnmap(0, fBufferUsedForCPURead ? 0 : this->size());
266 }
267
268 namespace {
269
access_to_pipeline_srcStageFlags(const VkAccessFlags srcAccess)270 VkPipelineStageFlags access_to_pipeline_srcStageFlags(const VkAccessFlags srcAccess) {
271 // For now this function assumes the access flags equal a specific bit and don't act like true
272 // flags (i.e. set of bits). If we ever start having buffer usages that have multiple accesses
273 // in one usage we'll need to update this.
274 switch (srcAccess) {
275 case 0:
276 return VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
277 case (VK_ACCESS_TRANSFER_WRITE_BIT): // fallthrough
278 case (VK_ACCESS_TRANSFER_READ_BIT):
279 return VK_PIPELINE_STAGE_TRANSFER_BIT;
280 case (VK_ACCESS_UNIFORM_READ_BIT):
281 // TODO(b/307577875): It is possible that uniforms could have simply been used in the
282 // vertex shader and not the fragment shader, so using the fragment shader pipeline
283 // stage bit indiscriminately is a bit overkill. This call should be modified to check &
284 // allow for selecting VK_PIPELINE_STAGE_VERTEX_SHADER_BIT when appropriate.
285 return (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT);
286 case (VK_ACCESS_SHADER_WRITE_BIT):
287 return VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
288 case (VK_ACCESS_INDEX_READ_BIT): // fallthrough
289 case (VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT):
290 return VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
291 case (VK_ACCESS_INDIRECT_COMMAND_READ_BIT):
292 return VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT;
293 case (VK_ACCESS_HOST_READ_BIT): // fallthrough
294 case (VK_ACCESS_HOST_WRITE_BIT):
295 return VK_PIPELINE_STAGE_HOST_BIT;
296 default:
297 SkUNREACHABLE;
298 }
299 }
300
access_is_read_only(VkAccessFlags access)301 bool access_is_read_only(VkAccessFlags access) {
302 switch (access) {
303 case 0: // initialization state
304 case (VK_ACCESS_TRANSFER_READ_BIT):
305 case (VK_ACCESS_UNIFORM_READ_BIT):
306 case (VK_ACCESS_INDEX_READ_BIT):
307 case (VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT):
308 case (VK_ACCESS_INDIRECT_COMMAND_READ_BIT):
309 case (VK_ACCESS_HOST_READ_BIT):
310 return true;
311 case (VK_ACCESS_TRANSFER_WRITE_BIT):
312 case (VK_ACCESS_SHADER_WRITE_BIT):
313 case (VK_ACCESS_HOST_WRITE_BIT):
314 return false;
315 default:
316 SkUNREACHABLE;
317 }
318 }
319
320 } // anonymous namespace
321
setBufferAccess(VulkanCommandBuffer * cmdBuffer,VkAccessFlags dstAccess,VkPipelineStageFlags dstStageMask) const322 void VulkanBuffer::setBufferAccess(VulkanCommandBuffer* cmdBuffer,
323 VkAccessFlags dstAccess,
324 VkPipelineStageFlags dstStageMask) const {
325 SkASSERT(dstAccess == VK_ACCESS_HOST_READ_BIT ||
326 dstAccess == VK_ACCESS_TRANSFER_WRITE_BIT ||
327 dstAccess == VK_ACCESS_TRANSFER_READ_BIT ||
328 dstAccess == VK_ACCESS_UNIFORM_READ_BIT ||
329 dstAccess == VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT ||
330 dstAccess == VK_ACCESS_INDEX_READ_BIT);
331
332 VkPipelineStageFlags srcStageMask = access_to_pipeline_srcStageFlags(fCurrentAccess);
333 SkASSERT(srcStageMask);
334
335 bool needsBarrier = true;
336
337 // We don't need a barrier if we're going from a read access to another read access.
338 if (access_is_read_only(fCurrentAccess) && access_is_read_only(dstAccess)) {
339 // Currently all of reads should be the same type of access. If we ever allow and need
340 // different read usages for a buffer, then we'll need to update the logic in this file to
341 // store all the read accesses in a mask. Additionally we'll need to keep track of what the
342 // last write was since we will need to add a barrier to for the new read access. Even if we
343 // had put in a barrier for a previous read already. For example if we have the sequence
344 // Write_1, Read_Access1, Read_Access2. We will first put a barrier going from Write_1 to
345 // Read_Access1. But with the current logic when we add Read_Access2 it will think its going
346 // from a read -> read. Thus no barrier would be added. But we need do to add another
347 // barrier for Write_1 to Read_Access2 so that the changes from write become visibile.
348 SkASSERT(fCurrentAccess == dstAccess || fCurrentAccess == 0);
349 needsBarrier = false;
350 }
351
352 // When the buffer was last used on the host, we don't need to add any barrier as writes on the
353 // CPU host are implicitly synchronized what you submit new commands.
354 if (srcStageMask == VK_PIPELINE_STAGE_HOST_BIT) {
355 needsBarrier = false;
356 }
357
358 if (needsBarrier) {
359 VkBufferMemoryBarrier bufferMemoryBarrier = {
360 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // sType
361 nullptr, // pNext
362 fCurrentAccess, // srcAccessMask
363 dstAccess, // dstAccessMask
364 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
365 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
366 fBuffer, // buffer
367 0, // offset
368 this->size(), // size
369 };
370 cmdBuffer->addBufferMemoryBarrier(srcStageMask, dstStageMask, &bufferMemoryBarrier);
371 }
372
373 fCurrentAccess = dstAccess;
374 }
375
376 } // namespace skgpu::graphite
377