1 //
2 // Copyright 2016 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // BufferVk.cpp:
7 // Implements the class methods for BufferVk.
8 //
9
10 #include "libANGLE/renderer/vulkan/BufferVk.h"
11
12 #include "common/FixedVector.h"
13 #include "common/debug.h"
14 #include "common/mathutil.h"
15 #include "common/utilities.h"
16 #include "libANGLE/Context.h"
17 #include "libANGLE/renderer/vulkan/ContextVk.h"
18 #include "libANGLE/renderer/vulkan/RendererVk.h"
19 #include "libANGLE/trace.h"
20
21 namespace rx
22 {
GetDefaultBufferUsageFlags(RendererVk * renderer)23 VkBufferUsageFlags GetDefaultBufferUsageFlags(RendererVk *renderer)
24 {
25 // We could potentially use multiple backing buffers for different usages.
26 // For now keep a single buffer with all relevant usage flags.
27 VkBufferUsageFlags defaultBufferUsageFlags =
28 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
29 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
30 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
31 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
32 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
33 if (renderer->getFeatures().supportsTransformFeedbackExtension.enabled)
34 {
35 defaultBufferUsageFlags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
36 VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT;
37 }
38 return defaultBufferUsageFlags;
39 }
40
41 namespace
42 {
43 // Vertex attribute buffers are used as storage buffers for conversion in compute, where access to
44 // the buffer is made in 4-byte chunks. Assume the size of the buffer is 4k+n where n is in [0, 3).
45 // On some hardware, reading 4 bytes from address 4k returns 0, making it impossible to read the
46 // last n bytes. By rounding up the buffer sizes to a multiple of 4, the problem is alleviated.
47 constexpr size_t kBufferSizeGranularity = 4;
48 static_assert(gl::isPow2(kBufferSizeGranularity), "use as alignment, must be power of two");
49
50 // Start with a fairly small buffer size. We can increase this dynamically as we convert more data.
51 constexpr size_t kConvertedArrayBufferInitialSize = 1024 * 8;
52
53 // Buffers that have a static usage pattern will be allocated in
54 // device local memory to speed up access to and from the GPU.
55 // Dynamic usage patterns or that are frequently mapped
56 // will now request host cached memory to speed up access from the CPU.
GetPreferredMemoryType(gl::BufferBinding target,gl::BufferUsage usage)57 ANGLE_INLINE VkMemoryPropertyFlags GetPreferredMemoryType(gl::BufferBinding target,
58 gl::BufferUsage usage)
59 {
60 constexpr VkMemoryPropertyFlags kDeviceLocalFlags =
61 (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
62 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
63 constexpr VkMemoryPropertyFlags kHostCachedFlags =
64 (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
65 VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
66 constexpr VkMemoryPropertyFlags kHostUncachedFlags =
67 (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
68
69 if (target == gl::BufferBinding::PixelUnpack)
70 {
71 return kHostCachedFlags;
72 }
73
74 switch (usage)
75 {
76 case gl::BufferUsage::StaticCopy:
77 case gl::BufferUsage::StaticDraw:
78 case gl::BufferUsage::StaticRead:
79 // For static usage, request a device local memory
80 return kDeviceLocalFlags;
81 case gl::BufferUsage::DynamicDraw:
82 case gl::BufferUsage::StreamDraw:
83 // For non-static usage where the CPU performs a write-only access, request
84 // a host uncached memory
85 return kHostUncachedFlags;
86 case gl::BufferUsage::DynamicCopy:
87 case gl::BufferUsage::DynamicRead:
88 case gl::BufferUsage::StreamCopy:
89 case gl::BufferUsage::StreamRead:
90 // For all other types of usage, request a host cached memory
91 return kHostCachedFlags;
92 default:
93 UNREACHABLE();
94 return kHostCachedFlags;
95 }
96 }
97
GetStorageMemoryType(GLbitfield storageFlags,bool externalBuffer)98 ANGLE_INLINE VkMemoryPropertyFlags GetStorageMemoryType(GLbitfield storageFlags,
99 bool externalBuffer)
100 {
101 constexpr VkMemoryPropertyFlags kDeviceLocalHostVisibleFlags =
102 (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
103 constexpr VkMemoryPropertyFlags kDeviceLocalHostCoherentFlags =
104 (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
105 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
106
107 const bool isCoherentMap = (storageFlags & GL_MAP_COHERENT_BIT_EXT) != 0;
108 const bool isPersistentMap = (storageFlags & GL_MAP_PERSISTENT_BIT_EXT) != 0;
109
110 if (isCoherentMap || isPersistentMap || externalBuffer)
111 {
112 // We currently allocate coherent memory for persistently mapped buffers.
113 // GL_EXT_buffer_storage allows non-coherent memory, but currently the implementation of
114 // |glMemoryBarrier(CLIENT_MAPPED_BUFFER_BARRIER_BIT_EXT)| relies on the mapping being
115 // coherent.
116 //
117 // If persistently mapped buffers ever use non-coherent memory, then said |glMemoryBarrier|
118 // call must result in |vkInvalidateMappedMemoryRanges| for all persistently mapped buffers.
119 return kDeviceLocalHostCoherentFlags;
120 }
121
122 return kDeviceLocalHostVisibleFlags;
123 }
124
ShouldAllocateNewMemoryForUpdate(ContextVk * contextVk,size_t subDataSize,size_t bufferSize)125 ANGLE_INLINE bool ShouldAllocateNewMemoryForUpdate(ContextVk *contextVk,
126 size_t subDataSize,
127 size_t bufferSize)
128 {
129 // A sub data update with size > 50% of buffer size meets the threshold
130 // to acquire a new BufferHelper from the pool.
131 return contextVk->getRenderer()->getFeatures().preferCPUForBufferSubData.enabled ||
132 subDataSize > (bufferSize / 2);
133 }
134
ShouldUseCPUToCopyData(ContextVk * contextVk,size_t copySize,size_t bufferSize)135 ANGLE_INLINE bool ShouldUseCPUToCopyData(ContextVk *contextVk, size_t copySize, size_t bufferSize)
136 {
137 RendererVk *renderer = contextVk->getRenderer();
138 // For some GPU (ARM) we always prefer using CPU to do copy instead of use GPU to avoid pipeline
139 // bubbles. If GPU is currently busy and data copy size is less than certain threshold, we
140 // choose to use CPU to do data copy over GPU to achieve better parallelism.
141 return renderer->getFeatures().preferCPUForBufferSubData.enabled ||
142 (renderer->isCommandQueueBusy() &&
143 copySize < renderer->getMaxCopyBytesUsingCPUWhenPreservingBufferData());
144 }
145
IsUsageDynamic(gl::BufferUsage usage)146 ANGLE_INLINE bool IsUsageDynamic(gl::BufferUsage usage)
147 {
148 return (usage == gl::BufferUsage::DynamicDraw || usage == gl::BufferUsage::DynamicCopy ||
149 usage == gl::BufferUsage::DynamicRead);
150 }
151
GetMemoryTypeIndex(ContextVk * contextVk,VkDeviceSize size,VkMemoryPropertyFlags memoryPropertyFlags,uint32_t * memoryTypeIndexOut)152 angle::Result GetMemoryTypeIndex(ContextVk *contextVk,
153 VkDeviceSize size,
154 VkMemoryPropertyFlags memoryPropertyFlags,
155 uint32_t *memoryTypeIndexOut)
156 {
157 RendererVk *renderer = contextVk->getRenderer();
158 const vk::Allocator &allocator = renderer->getAllocator();
159
160 bool persistentlyMapped = renderer->getFeatures().persistentlyMappedBuffers.enabled;
161 VkBufferUsageFlags defaultBufferUsageFlags = GetDefaultBufferUsageFlags(renderer);
162
163 VkBufferCreateInfo createInfo = {};
164 createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
165 createInfo.flags = 0;
166 createInfo.size = size;
167 createInfo.usage = defaultBufferUsageFlags;
168 createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
169 createInfo.queueFamilyIndexCount = 0;
170 createInfo.pQueueFamilyIndices = nullptr;
171
172 // Host visible is required, all other bits are preferred, (i.e., optional)
173 VkMemoryPropertyFlags requiredFlags =
174 (memoryPropertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
175 VkMemoryPropertyFlags preferredFlags =
176 (memoryPropertyFlags & (~VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
177
178 // Check that the allocation is not too large.
179 uint32_t memoryTypeIndex = 0;
180 ANGLE_VK_TRY(contextVk, allocator.findMemoryTypeIndexForBufferInfo(
181 createInfo, requiredFlags, preferredFlags, persistentlyMapped,
182 &memoryTypeIndex));
183 *memoryTypeIndexOut = memoryTypeIndex;
184
185 return angle::Result::Continue;
186 }
187 } // namespace
188
189 // ConversionBuffer implementation.
ConversionBuffer(RendererVk * renderer,VkBufferUsageFlags usageFlags,size_t initialSize,size_t alignment,bool hostVisible)190 ConversionBuffer::ConversionBuffer(RendererVk *renderer,
191 VkBufferUsageFlags usageFlags,
192 size_t initialSize,
193 size_t alignment,
194 bool hostVisible)
195 : dirty(true)
196 {
197 data = std::make_unique<vk::BufferHelper>();
198 }
199
~ConversionBuffer()200 ConversionBuffer::~ConversionBuffer()
201 {
202 ASSERT(!data || !data->valid());
203 }
204
205 ConversionBuffer::ConversionBuffer(ConversionBuffer &&other) = default;
206
207 // BufferVk::VertexConversionBuffer implementation.
VertexConversionBuffer(RendererVk * renderer,angle::FormatID formatIDIn,GLuint strideIn,size_t offsetIn,bool hostVisible)208 BufferVk::VertexConversionBuffer::VertexConversionBuffer(RendererVk *renderer,
209 angle::FormatID formatIDIn,
210 GLuint strideIn,
211 size_t offsetIn,
212 bool hostVisible)
213 : ConversionBuffer(renderer,
214 vk::kVertexBufferUsageFlags,
215 kConvertedArrayBufferInitialSize,
216 vk::kVertexBufferAlignment,
217 hostVisible),
218 formatID(formatIDIn),
219 stride(strideIn),
220 offset(offsetIn)
221 {}
222
223 BufferVk::VertexConversionBuffer::VertexConversionBuffer(VertexConversionBuffer &&other) = default;
224
225 BufferVk::VertexConversionBuffer::~VertexConversionBuffer() = default;
226
227 // BufferVk implementation.
BufferVk(const gl::BufferState & state)228 BufferVk::BufferVk(const gl::BufferState &state)
229 : BufferImpl(state),
230 mClientBuffer(nullptr),
231 mMemoryTypeIndex(0),
232 mMemoryPropertyFlags(0),
233 mIsStagingBufferMapped(false),
234 mHasValidData(false),
235 mHasBeenReferencedByGPU(false)
236 {}
237
~BufferVk()238 BufferVk::~BufferVk() {}
239
destroy(const gl::Context * context)240 void BufferVk::destroy(const gl::Context *context)
241 {
242 ContextVk *contextVk = vk::GetImpl(context);
243
244 release(contextVk);
245 }
246
release(ContextVk * contextVk)247 void BufferVk::release(ContextVk *contextVk)
248 {
249 RendererVk *renderer = contextVk->getRenderer();
250 if (mBuffer.valid())
251 {
252 mBuffer.release(renderer);
253 }
254 if (mStagingBuffer.valid())
255 {
256 mStagingBuffer.release(renderer);
257 }
258
259 for (ConversionBuffer &buffer : mVertexConversionBuffers)
260 {
261 buffer.data->release(renderer);
262 }
263 mVertexConversionBuffers.clear();
264 }
265
setExternalBufferData(const gl::Context * context,gl::BufferBinding target,GLeglClientBufferEXT clientBuffer,size_t size,VkMemoryPropertyFlags memoryPropertyFlags)266 angle::Result BufferVk::setExternalBufferData(const gl::Context *context,
267 gl::BufferBinding target,
268 GLeglClientBufferEXT clientBuffer,
269 size_t size,
270 VkMemoryPropertyFlags memoryPropertyFlags)
271 {
272 ContextVk *contextVk = vk::GetImpl(context);
273
274 // Release and re-create the memory and buffer.
275 release(contextVk);
276
277 VkBufferCreateInfo createInfo = {};
278 createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
279 createInfo.flags = 0;
280 createInfo.size = size;
281 createInfo.usage = GetDefaultBufferUsageFlags(contextVk->getRenderer());
282 createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
283 createInfo.queueFamilyIndexCount = 0;
284 createInfo.pQueueFamilyIndices = nullptr;
285
286 return mBuffer.initExternal(contextVk, memoryPropertyFlags, createInfo, clientBuffer);
287 }
288
setDataWithUsageFlags(const gl::Context * context,gl::BufferBinding target,GLeglClientBufferEXT clientBuffer,const void * data,size_t size,gl::BufferUsage usage,GLbitfield flags)289 angle::Result BufferVk::setDataWithUsageFlags(const gl::Context *context,
290 gl::BufferBinding target,
291 GLeglClientBufferEXT clientBuffer,
292 const void *data,
293 size_t size,
294 gl::BufferUsage usage,
295 GLbitfield flags)
296 {
297 VkMemoryPropertyFlags memoryPropertyFlags = 0;
298 bool persistentMapRequired = false;
299 const bool isExternalBuffer = clientBuffer != nullptr;
300
301 switch (usage)
302 {
303 case gl::BufferUsage::InvalidEnum:
304 {
305 // glBufferStorage API call
306 memoryPropertyFlags = GetStorageMemoryType(flags, isExternalBuffer);
307 persistentMapRequired = (flags & GL_MAP_PERSISTENT_BIT_EXT) != 0;
308 break;
309 }
310 default:
311 {
312 // glBufferData API call
313 memoryPropertyFlags = GetPreferredMemoryType(target, usage);
314 break;
315 }
316 }
317
318 if (isExternalBuffer)
319 {
320 ANGLE_TRY(setExternalBufferData(context, target, clientBuffer, size, memoryPropertyFlags));
321 if (!mBuffer.isHostVisible())
322 {
323 // If external buffer's memory does not support host visible memory property, we cannot
324 // support a persistent map request.
325 ANGLE_VK_CHECK(vk::GetImpl(context), !persistentMapRequired,
326 VK_ERROR_MEMORY_MAP_FAILED);
327 }
328
329 mClientBuffer = clientBuffer;
330
331 return angle::Result::Continue;
332 }
333 return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags,
334 persistentMapRequired, usage);
335 }
336
setData(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,gl::BufferUsage usage)337 angle::Result BufferVk::setData(const gl::Context *context,
338 gl::BufferBinding target,
339 const void *data,
340 size_t size,
341 gl::BufferUsage usage)
342 {
343 // Assume host visible/coherent memory available.
344 VkMemoryPropertyFlags memoryPropertyFlags = GetPreferredMemoryType(target, usage);
345 return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags, false, usage);
346 }
347
setDataWithMemoryType(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,VkMemoryPropertyFlags memoryPropertyFlags,bool persistentMapRequired,gl::BufferUsage usage)348 angle::Result BufferVk::setDataWithMemoryType(const gl::Context *context,
349 gl::BufferBinding target,
350 const void *data,
351 size_t size,
352 VkMemoryPropertyFlags memoryPropertyFlags,
353 bool persistentMapRequired,
354 gl::BufferUsage usage)
355 {
356 ContextVk *contextVk = vk::GetImpl(context);
357
358 // Reset the flag since the buffer contents are being reinitialized. If the caller passed in
359 // data to fill the buffer, the flag will be updated when the data is copied to the buffer.
360 mHasValidData = false;
361
362 if (size == 0)
363 {
364 // Nothing to do.
365 return angle::Result::Continue;
366 }
367
368 const bool bufferSizeChanged = size != static_cast<size_t>(mState.getSize());
369 const bool inUseAndRespecifiedWithoutData = (data == nullptr && isCurrentlyInUse(contextVk));
370
371 // The entire buffer is being respecified, possibly with null data.
372 // Release and init a new mBuffer with requested size.
373 if (bufferSizeChanged || inUseAndRespecifiedWithoutData)
374 {
375 // Release and re-create the memory and buffer.
376 release(contextVk);
377
378 mMemoryPropertyFlags = memoryPropertyFlags;
379 ANGLE_TRY(GetMemoryTypeIndex(contextVk, size, memoryPropertyFlags, &mMemoryTypeIndex));
380
381 ANGLE_TRY(acquireBufferHelper(contextVk, size, BufferUpdateType::StorageRedefined));
382 }
383
384 if (data)
385 {
386 // Treat full-buffer updates as SubData calls.
387 BufferUpdateType updateType = bufferSizeChanged ? BufferUpdateType::StorageRedefined
388 : BufferUpdateType::ContentsUpdate;
389
390 ANGLE_TRY(setDataImpl(contextVk, static_cast<const uint8_t *>(data), size, 0, updateType));
391 }
392
393 return angle::Result::Continue;
394 }
395
setSubData(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,size_t offset)396 angle::Result BufferVk::setSubData(const gl::Context *context,
397 gl::BufferBinding target,
398 const void *data,
399 size_t size,
400 size_t offset)
401 {
402 ASSERT(mBuffer.valid());
403
404 ContextVk *contextVk = vk::GetImpl(context);
405 ANGLE_TRY(setDataImpl(contextVk, static_cast<const uint8_t *>(data), size, offset,
406 BufferUpdateType::ContentsUpdate));
407
408 return angle::Result::Continue;
409 }
410
copySubData(const gl::Context * context,BufferImpl * source,GLintptr sourceOffset,GLintptr destOffset,GLsizeiptr size)411 angle::Result BufferVk::copySubData(const gl::Context *context,
412 BufferImpl *source,
413 GLintptr sourceOffset,
414 GLintptr destOffset,
415 GLsizeiptr size)
416 {
417 ASSERT(mBuffer.valid());
418
419 ContextVk *contextVk = vk::GetImpl(context);
420 BufferVk *sourceVk = GetAs<BufferVk>(source);
421 vk::BufferHelper &sourceBuffer = sourceVk->getBuffer();
422 ASSERT(sourceBuffer.valid());
423 VkDeviceSize sourceBufferOffset = sourceBuffer.getOffset();
424
425 // Check for self-dependency.
426 vk::CommandBufferAccess access;
427 if (sourceBuffer.getBufferSerial() == mBuffer.getBufferSerial())
428 {
429 access.onBufferSelfCopy(&mBuffer);
430 }
431 else
432 {
433 access.onBufferTransferRead(&sourceBuffer);
434 access.onBufferTransferWrite(&mBuffer);
435 }
436
437 vk::OutsideRenderPassCommandBuffer *commandBuffer;
438 ANGLE_TRY(contextVk->getOutsideRenderPassCommandBuffer(access, &commandBuffer));
439
440 // Enqueue a copy command on the GPU.
441 const VkBufferCopy copyRegion = {static_cast<VkDeviceSize>(sourceOffset) + sourceBufferOffset,
442 static_cast<VkDeviceSize>(destOffset) + mBuffer.getOffset(),
443 static_cast<VkDeviceSize>(size)};
444
445 commandBuffer->copyBuffer(sourceBuffer.getBuffer(), mBuffer.getBuffer(), 1, ©Region);
446 mHasBeenReferencedByGPU = true;
447
448 // The new destination buffer data may require a conversion for the next draw, so mark it dirty.
449 onDataChanged();
450
451 return angle::Result::Continue;
452 }
453
allocStagingBuffer(ContextVk * contextVk,vk::MemoryCoherency coherency,VkDeviceSize size,uint8_t ** mapPtr)454 angle::Result BufferVk::allocStagingBuffer(ContextVk *contextVk,
455 vk::MemoryCoherency coherency,
456 VkDeviceSize size,
457 uint8_t **mapPtr)
458 {
459 ASSERT(!mIsStagingBufferMapped);
460
461 if (mStagingBuffer.valid())
462 {
463 if (size <= mStagingBuffer.getSize() &&
464 (coherency == vk::MemoryCoherency::Coherent) == mStagingBuffer.isCoherent() &&
465 !mStagingBuffer.isCurrentlyInUse(contextVk->getLastCompletedQueueSerial()))
466 {
467 // If size is big enough and it is idle, then just reuse the existing staging buffer
468 *mapPtr = mStagingBuffer.getMappedMemory();
469 mIsStagingBufferMapped = true;
470 return angle::Result::Continue;
471 }
472 mStagingBuffer.release(contextVk->getRenderer());
473 }
474
475 ANGLE_TRY(
476 mStagingBuffer.allocateForCopyBuffer(contextVk, static_cast<size_t>(size), coherency));
477 *mapPtr = mStagingBuffer.getMappedMemory();
478 mIsStagingBufferMapped = true;
479
480 return angle::Result::Continue;
481 }
482
flushStagingBuffer(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize size)483 angle::Result BufferVk::flushStagingBuffer(ContextVk *contextVk,
484 VkDeviceSize offset,
485 VkDeviceSize size)
486 {
487 RendererVk *renderer = contextVk->getRenderer();
488
489 ASSERT(mIsStagingBufferMapped);
490 ASSERT(mStagingBuffer.valid());
491
492 if (!mStagingBuffer.isCoherent())
493 {
494 ANGLE_TRY(mStagingBuffer.flush(renderer));
495 }
496
497 // Enqueue a copy command on the GPU.
498 VkBufferCopy copyRegion = {mStagingBuffer.getOffset(), mBuffer.getOffset() + offset, size};
499 ANGLE_TRY(mBuffer.copyFromBuffer(contextVk, &mStagingBuffer, 1, ©Region));
500
501 mHasBeenReferencedByGPU = true;
502
503 return angle::Result::Continue;
504 }
505
handleDeviceLocalBufferMap(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize size,uint8_t ** mapPtr)506 angle::Result BufferVk::handleDeviceLocalBufferMap(ContextVk *contextVk,
507 VkDeviceSize offset,
508 VkDeviceSize size,
509 uint8_t **mapPtr)
510 {
511 ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::Coherent, size, mapPtr));
512
513 // Copy data from device local buffer to host visible staging buffer.
514 VkBufferCopy copyRegion = {mBuffer.getOffset() + offset, mStagingBuffer.getOffset(), size};
515 ANGLE_TRY(mStagingBuffer.copyFromBuffer(contextVk, &mBuffer, 1, ©Region));
516 ANGLE_TRY(mStagingBuffer.waitForIdle(contextVk, "GPU stall due to mapping device local buffer",
517 RenderPassClosureReason::DeviceLocalBufferMap));
518 // Because the buffer is coherent, no need to call invalidate here.
519
520 return angle::Result::Continue;
521 }
522
map(const gl::Context * context,GLenum access,void ** mapPtr)523 angle::Result BufferVk::map(const gl::Context *context, GLenum access, void **mapPtr)
524 {
525 ASSERT(mBuffer.valid());
526 ASSERT(access == GL_WRITE_ONLY_OES);
527
528 return mapImpl(vk::GetImpl(context), GL_MAP_WRITE_BIT, mapPtr);
529 }
530
mapRange(const gl::Context * context,size_t offset,size_t length,GLbitfield access,void ** mapPtr)531 angle::Result BufferVk::mapRange(const gl::Context *context,
532 size_t offset,
533 size_t length,
534 GLbitfield access,
535 void **mapPtr)
536 {
537 ANGLE_TRACE_EVENT0("gpu.angle", "BufferVk::mapRange");
538 return mapRangeImpl(vk::GetImpl(context), offset, length, access, mapPtr);
539 }
540
mapImpl(ContextVk * contextVk,GLbitfield access,void ** mapPtr)541 angle::Result BufferVk::mapImpl(ContextVk *contextVk, GLbitfield access, void **mapPtr)
542 {
543 return mapRangeImpl(contextVk, 0, static_cast<VkDeviceSize>(mState.getSize()), access, mapPtr);
544 }
545
ghostMappedBuffer(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize length,GLbitfield access,void ** mapPtr)546 angle::Result BufferVk::ghostMappedBuffer(ContextVk *contextVk,
547 VkDeviceSize offset,
548 VkDeviceSize length,
549 GLbitfield access,
550 void **mapPtr)
551 {
552 // We shouldn't get here if it is external memory
553 ASSERT(!isExternalBuffer());
554
555 ++contextVk->getPerfCounters().buffersGhosted;
556
557 // If we are creating a new buffer because the GPU is using it as read-only, then we
558 // also need to copy the contents of the previous buffer into the new buffer, in
559 // case the caller only updates a portion of the new buffer.
560 vk::BufferHelper src = std::move(mBuffer);
561
562 ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
563 BufferUpdateType::ContentsUpdate));
564
565 // Before returning the new buffer, map the previous buffer and copy its entire
566 // contents into the new buffer.
567 uint8_t *srcMapPtr = nullptr;
568 uint8_t *dstMapPtr = nullptr;
569 ANGLE_TRY(src.map(contextVk, &srcMapPtr));
570 ANGLE_TRY(mBuffer.map(contextVk, &dstMapPtr));
571
572 ASSERT(src.isCoherent());
573 ASSERT(mBuffer.isCoherent());
574
575 // No need to copy over [offset, offset + length), just around it
576 if ((access & GL_MAP_INVALIDATE_RANGE_BIT) != 0)
577 {
578 if (offset != 0)
579 {
580 memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(offset));
581 }
582 size_t totalSize = static_cast<size_t>(mState.getSize());
583 size_t remainingStart = static_cast<size_t>(offset + length);
584 size_t remainingSize = totalSize - remainingStart;
585 if (remainingSize != 0)
586 {
587 memcpy(dstMapPtr + remainingStart, srcMapPtr + remainingStart, remainingSize);
588 }
589 }
590 else
591 {
592 memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(mState.getSize()));
593 }
594
595 src.release(contextVk->getRenderer());
596
597 // Return the already mapped pointer with the offset adjustment to avoid the call to unmap().
598 *mapPtr = dstMapPtr + offset;
599
600 return angle::Result::Continue;
601 }
602
mapRangeImpl(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize length,GLbitfield access,void ** mapPtr)603 angle::Result BufferVk::mapRangeImpl(ContextVk *contextVk,
604 VkDeviceSize offset,
605 VkDeviceSize length,
606 GLbitfield access,
607 void **mapPtr)
608 {
609 uint8_t **mapPtrBytes = reinterpret_cast<uint8_t **>(mapPtr);
610
611 ASSERT(mBuffer.valid());
612
613 bool hostVisible = mBuffer.isHostVisible();
614
615 // MAP_UNSYNCHRONIZED_BIT, so immediately map.
616 if ((access & GL_MAP_UNSYNCHRONIZED_BIT) != 0)
617 {
618 if (hostVisible)
619 {
620 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
621 }
622 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
623 }
624
625 // Read case
626 if ((access & GL_MAP_WRITE_BIT) == 0)
627 {
628 // If app is not going to write, all we need is to ensure GPU write is finished.
629 // Concurrent reads from CPU and GPU is allowed.
630 if (mBuffer.isCurrentlyInUseForWrite(contextVk->getLastCompletedQueueSerial()))
631 {
632 // If there are pending commands for the resource, flush them.
633 if (mBuffer.usedInRecordedCommands())
634 {
635 ANGLE_TRY(
636 contextVk->flushImpl(nullptr, RenderPassClosureReason::BufferWriteThenMap));
637 }
638 ANGLE_TRY(mBuffer.finishGPUWriteCommands(contextVk));
639 }
640 if (hostVisible)
641 {
642 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
643 }
644 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
645 }
646
647 // Write case
648 if (!hostVisible)
649 {
650 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
651 }
652
653 // Write case, buffer not in use.
654 if (isExternalBuffer() || !isCurrentlyInUse(contextVk))
655 {
656 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
657 }
658
659 // Write case, buffer in use.
660 //
661 // Here, we try to map the buffer, but it's busy. Instead of waiting for the GPU to
662 // finish, we just allocate a new buffer if:
663 // 1.) Caller has told us it doesn't care about previous contents, or
664 // 2.) The GPU won't write to the buffer.
665
666 bool rangeInvalidate = (access & GL_MAP_INVALIDATE_RANGE_BIT) != 0;
667 bool entireBufferInvalidated =
668 ((access & GL_MAP_INVALIDATE_BUFFER_BIT) != 0) ||
669 (rangeInvalidate && offset == 0 && static_cast<VkDeviceSize>(mState.getSize()) == length);
670
671 if (entireBufferInvalidated)
672 {
673 ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
674 BufferUpdateType::ContentsUpdate));
675 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
676 }
677
678 bool smallMapRange = (length < static_cast<VkDeviceSize>(mState.getSize()) / 2);
679
680 if (smallMapRange && rangeInvalidate)
681 {
682 ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::NonCoherent,
683 static_cast<size_t>(length), mapPtrBytes));
684 return angle::Result::Continue;
685 }
686
687 if (!mBuffer.isCurrentlyInUseForWrite(contextVk->getLastCompletedQueueSerial()))
688 {
689 // This will keep the new buffer mapped and update mapPtr, so return immediately.
690 return ghostMappedBuffer(contextVk, offset, length, access, mapPtr);
691 }
692
693 // Write case (worst case, buffer in use for write)
694 ANGLE_TRY(mBuffer.waitForIdle(contextVk, "GPU stall due to mapping buffer in use by the GPU",
695 RenderPassClosureReason::BufferInUseWhenSynchronizedMap));
696 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
697 }
698
unmap(const gl::Context * context,GLboolean * result)699 angle::Result BufferVk::unmap(const gl::Context *context, GLboolean *result)
700 {
701 ANGLE_TRY(unmapImpl(vk::GetImpl(context)));
702
703 // This should be false if the contents have been corrupted through external means. Vulkan
704 // doesn't provide such information.
705 *result = true;
706
707 return angle::Result::Continue;
708 }
709
unmapImpl(ContextVk * contextVk)710 angle::Result BufferVk::unmapImpl(ContextVk *contextVk)
711 {
712 ASSERT(mBuffer.valid());
713
714 bool writeOperation = ((mState.getAccessFlags() & GL_MAP_WRITE_BIT) != 0);
715
716 if (mIsStagingBufferMapped)
717 {
718 ASSERT(mStagingBuffer.valid());
719 // The buffer is device local or optimization of small range map.
720 if (writeOperation)
721 {
722 ANGLE_TRY(flushStagingBuffer(contextVk, mState.getMapOffset(), mState.getMapLength()));
723 }
724
725 mIsStagingBufferMapped = false;
726 }
727 else
728 {
729 ASSERT(mBuffer.isHostVisible());
730 mBuffer.unmap(contextVk->getRenderer());
731 }
732
733 if (writeOperation)
734 {
735 dataUpdated();
736 }
737
738 return angle::Result::Continue;
739 }
740
getSubData(const gl::Context * context,GLintptr offset,GLsizeiptr size,void * outData)741 angle::Result BufferVk::getSubData(const gl::Context *context,
742 GLintptr offset,
743 GLsizeiptr size,
744 void *outData)
745 {
746 ASSERT(offset + size <= getSize());
747 ASSERT(mBuffer.valid());
748 ContextVk *contextVk = vk::GetImpl(context);
749 void *mapPtr;
750 ANGLE_TRY(mapRangeImpl(contextVk, offset, size, GL_MAP_READ_BIT, &mapPtr));
751 memcpy(outData, mapPtr, size);
752 return unmapImpl(contextVk);
753 }
754
getIndexRange(const gl::Context * context,gl::DrawElementsType type,size_t offset,size_t count,bool primitiveRestartEnabled,gl::IndexRange * outRange)755 angle::Result BufferVk::getIndexRange(const gl::Context *context,
756 gl::DrawElementsType type,
757 size_t offset,
758 size_t count,
759 bool primitiveRestartEnabled,
760 gl::IndexRange *outRange)
761 {
762 ContextVk *contextVk = vk::GetImpl(context);
763 RendererVk *renderer = contextVk->getRenderer();
764
765 // This is a workaround for the mock ICD not implementing buffer memory state.
766 // Could be removed if https://github.com/KhronosGroup/Vulkan-Tools/issues/84 is fixed.
767 if (renderer->isMockICDEnabled())
768 {
769 outRange->start = 0;
770 outRange->end = 0;
771 return angle::Result::Continue;
772 }
773
774 ANGLE_TRACE_EVENT0("gpu.angle", "BufferVk::getIndexRange");
775
776 void *mapPtr;
777 ANGLE_TRY(mapRangeImpl(contextVk, offset, getSize(), GL_MAP_READ_BIT, &mapPtr));
778 *outRange = gl::ComputeIndexRange(type, mapPtr, count, primitiveRestartEnabled);
779 ANGLE_TRY(unmapImpl(contextVk));
780
781 return angle::Result::Continue;
782 }
783
updateBuffer(ContextVk * contextVk,const uint8_t * data,size_t size,size_t offset)784 angle::Result BufferVk::updateBuffer(ContextVk *contextVk,
785 const uint8_t *data,
786 size_t size,
787 size_t offset)
788 {
789 if (mBuffer.isHostVisible())
790 {
791 ANGLE_TRY(directUpdate(contextVk, data, size, offset));
792 }
793 else
794 {
795 ANGLE_TRY(stagedUpdate(contextVk, data, size, offset));
796 }
797 return angle::Result::Continue;
798 }
directUpdate(ContextVk * contextVk,const uint8_t * data,size_t size,size_t offset)799 angle::Result BufferVk::directUpdate(ContextVk *contextVk,
800 const uint8_t *data,
801 size_t size,
802 size_t offset)
803 {
804 uint8_t *mapPointer = nullptr;
805
806 ANGLE_TRY(mBuffer.mapWithOffset(contextVk, &mapPointer, offset));
807 ASSERT(mapPointer);
808
809 memcpy(mapPointer, data, size);
810
811 // If the buffer has dynamic usage then the intent is frequent client side updates to the
812 // buffer. Don't CPU unmap the buffer, we will take care of unmapping when releasing the buffer
813 // to either the renderer or mBufferFreeList.
814 if (!IsUsageDynamic(mState.getUsage()))
815 {
816 mBuffer.unmap(contextVk->getRenderer());
817 }
818 ASSERT(mBuffer.isCoherent());
819
820 return angle::Result::Continue;
821 }
822
stagedUpdate(ContextVk * contextVk,const uint8_t * data,size_t size,size_t offset)823 angle::Result BufferVk::stagedUpdate(ContextVk *contextVk,
824 const uint8_t *data,
825 size_t size,
826 size_t offset)
827 {
828 // Acquire a "new" staging buffer
829 uint8_t *mapPointer = nullptr;
830 ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::NonCoherent, size, &mapPointer));
831 memcpy(mapPointer, data, size);
832 ANGLE_TRY(flushStagingBuffer(contextVk, offset, size));
833 mIsStagingBufferMapped = false;
834
835 return angle::Result::Continue;
836 }
837
acquireAndUpdate(ContextVk * contextVk,const uint8_t * data,size_t updateSize,size_t offset,BufferUpdateType updateType)838 angle::Result BufferVk::acquireAndUpdate(ContextVk *contextVk,
839 const uint8_t *data,
840 size_t updateSize,
841 size_t offset,
842 BufferUpdateType updateType)
843 {
844 // We shouldn't get here if this is external memory
845 ASSERT(!isExternalBuffer());
846
847 // Here we acquire a new BufferHelper and directUpdate() the new buffer.
848 // If the subData size was less than the buffer's size we additionally enqueue
849 // a GPU copy of the remaining regions from the old mBuffer to the new one.
850 vk::BufferHelper src;
851 size_t bufferSize = static_cast<size_t>(mState.getSize());
852 size_t offsetAfterSubdata = (offset + updateSize);
853 bool updateRegionBeforeSubData = mHasValidData && (offset > 0);
854 bool updateRegionAfterSubData = mHasValidData && (offsetAfterSubdata < bufferSize);
855
856 uint8_t *srcMapPtrBeforeSubData = nullptr;
857 uint8_t *srcMapPtrAfterSubData = nullptr;
858 if (updateRegionBeforeSubData || updateRegionAfterSubData)
859 {
860 src = std::move(mBuffer);
861
862 // The total bytes that we need to copy from old buffer to new buffer
863 size_t copySize = bufferSize - updateSize;
864
865 // If the buffer is host visible and the GPU is done writing to, we use the CPU to do the
866 // copy. We need to save the source buffer pointer before we acquire a new buffer.
867 if (src.isHostVisible() &&
868 !src.isCurrentlyInUseForWrite(contextVk->getLastCompletedQueueSerial()) &&
869 ShouldUseCPUToCopyData(contextVk, copySize, bufferSize))
870 {
871 uint8_t *mapPointer = nullptr;
872 // src buffer will be recycled (or released and unmapped) by acquireBufferHelper
873 ANGLE_TRY(src.map(contextVk, &mapPointer));
874 ASSERT(mapPointer);
875 srcMapPtrBeforeSubData = mapPointer;
876 srcMapPtrAfterSubData = mapPointer + offsetAfterSubdata;
877 }
878 }
879
880 ANGLE_TRY(acquireBufferHelper(contextVk, bufferSize, updateType));
881 ANGLE_TRY(updateBuffer(contextVk, data, updateSize, offset));
882
883 constexpr int kMaxCopyRegions = 2;
884 angle::FixedVector<VkBufferCopy, kMaxCopyRegions> copyRegions;
885
886 if (updateRegionBeforeSubData)
887 {
888 if (srcMapPtrBeforeSubData)
889 {
890 ASSERT(mBuffer.isHostVisible());
891 ANGLE_TRY(directUpdate(contextVk, srcMapPtrBeforeSubData, offset, 0));
892 }
893 else
894 {
895 copyRegions.push_back({src.getOffset(), mBuffer.getOffset(), offset});
896 }
897 }
898
899 if (updateRegionAfterSubData)
900 {
901 size_t copySize = bufferSize - offsetAfterSubdata;
902 if (srcMapPtrAfterSubData)
903 {
904 ASSERT(mBuffer.isHostVisible());
905 ANGLE_TRY(directUpdate(contextVk, srcMapPtrAfterSubData, copySize, offsetAfterSubdata));
906 }
907 else
908 {
909 copyRegions.push_back({src.getOffset() + offsetAfterSubdata,
910 mBuffer.getOffset() + offsetAfterSubdata, copySize});
911 }
912 }
913
914 if (!copyRegions.empty())
915 {
916 ANGLE_TRY(mBuffer.copyFromBuffer(contextVk, &src, static_cast<uint32_t>(copyRegions.size()),
917 copyRegions.data()));
918 mHasBeenReferencedByGPU = true;
919 }
920
921 if (src.valid())
922 {
923 src.release(contextVk->getRenderer());
924 }
925
926 return angle::Result::Continue;
927 }
928
setDataImpl(ContextVk * contextVk,const uint8_t * data,size_t size,size_t offset,BufferUpdateType updateType)929 angle::Result BufferVk::setDataImpl(ContextVk *contextVk,
930 const uint8_t *data,
931 size_t size,
932 size_t offset,
933 BufferUpdateType updateType)
934 {
935 // if the buffer is currently in use
936 // if it isn't an external buffer and sub data size meets threshold
937 // acquire a new BufferHelper from the pool
938 // else stage the update
939 // else update the buffer directly
940 if (isCurrentlyInUse(contextVk))
941 {
942 // If BufferVk does not have any valid data, which means there is no data needs to be copied
943 // from old buffer to new buffer when we acquire a new buffer, we also favor
944 // acquireAndUpdate over stagedUpdate. This could happen when app calls glBufferData with
945 // same size and we will try to reuse the existing buffer storage.
946 if (!isExternalBuffer() &&
947 (!mHasValidData || ShouldAllocateNewMemoryForUpdate(
948 contextVk, size, static_cast<size_t>(mState.getSize()))))
949 {
950 ANGLE_TRY(acquireAndUpdate(contextVk, data, size, offset, updateType));
951 }
952 else
953 {
954 ANGLE_TRY(stagedUpdate(contextVk, data, size, offset));
955 }
956 }
957 else
958 {
959 ANGLE_TRY(updateBuffer(contextVk, data, size, offset));
960 }
961
962 // Update conversions
963 dataUpdated();
964
965 return angle::Result::Continue;
966 }
967
getVertexConversionBuffer(RendererVk * renderer,angle::FormatID formatID,GLuint stride,size_t offset,bool hostVisible)968 ConversionBuffer *BufferVk::getVertexConversionBuffer(RendererVk *renderer,
969 angle::FormatID formatID,
970 GLuint stride,
971 size_t offset,
972 bool hostVisible)
973 {
974 for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
975 {
976 if (buffer.formatID == formatID && buffer.stride == stride && buffer.offset == offset)
977 {
978 ASSERT(buffer.data && buffer.data->valid());
979 return &buffer;
980 }
981 }
982
983 mVertexConversionBuffers.emplace_back(renderer, formatID, stride, offset, hostVisible);
984 return &mVertexConversionBuffers.back();
985 }
986
dataUpdated()987 void BufferVk::dataUpdated()
988 {
989 for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
990 {
991 buffer.dirty = true;
992 }
993 // Now we have valid data
994 mHasValidData = true;
995 }
996
onDataChanged()997 void BufferVk::onDataChanged()
998 {
999 dataUpdated();
1000 }
1001
acquireBufferHelper(ContextVk * contextVk,size_t sizeInBytes,BufferUpdateType updateType)1002 angle::Result BufferVk::acquireBufferHelper(ContextVk *contextVk,
1003 size_t sizeInBytes,
1004 BufferUpdateType updateType)
1005 {
1006 RendererVk *renderer = contextVk->getRenderer();
1007 size_t size = roundUpPow2(sizeInBytes, kBufferSizeGranularity);
1008 size_t alignment = renderer->getDefaultBufferAlignment();
1009
1010 if (mBuffer.valid())
1011 {
1012 mBuffer.release(renderer);
1013 }
1014
1015 // Allocate the buffer directly
1016 ANGLE_TRY(mBuffer.initSuballocation(contextVk, mMemoryTypeIndex, size, alignment));
1017
1018 if (updateType == BufferUpdateType::ContentsUpdate)
1019 {
1020 // Tell the observers (front end) that a new buffer was created, so the necessary
1021 // dirty bits can be set. This allows the buffer views pointing to the old buffer to
1022 // be recreated and point to the new buffer, along with updating the descriptor sets
1023 // to use the new buffer.
1024 onStateChange(angle::SubjectMessage::InternalMemoryAllocationChanged);
1025 }
1026 else if (updateType == BufferUpdateType::StorageRedefined)
1027 {
1028 // Tell the observers (front end) that a buffer's storage has changed.
1029 onStateChange(angle::SubjectMessage::BufferVkStorageChanged);
1030 }
1031
1032 return angle::Result::Continue;
1033 }
1034
isCurrentlyInUse(ContextVk * contextVk) const1035 bool BufferVk::isCurrentlyInUse(ContextVk *contextVk) const
1036 {
1037 return mHasBeenReferencedByGPU &&
1038 mBuffer.isCurrentlyInUse(contextVk->getLastCompletedQueueSerial());
1039 }
1040
1041 } // namespace rx
1042