• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 //
2 // Copyright 2016 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // BufferVk.cpp:
7 //    Implements the class methods for BufferVk.
8 //
9 
10 #include "libANGLE/renderer/vulkan/BufferVk.h"
11 
12 #include "common/FixedVector.h"
13 #include "common/debug.h"
14 #include "common/mathutil.h"
15 #include "common/utilities.h"
16 #include "libANGLE/Context.h"
17 #include "libANGLE/renderer/vulkan/ContextVk.h"
18 #include "libANGLE/renderer/vulkan/RendererVk.h"
19 #include "libANGLE/trace.h"
20 
21 namespace rx
22 {
GetDefaultBufferUsageFlags(RendererVk * renderer)23 VkBufferUsageFlags GetDefaultBufferUsageFlags(RendererVk *renderer)
24 {
25     // We could potentially use multiple backing buffers for different usages.
26     // For now keep a single buffer with all relevant usage flags.
27     VkBufferUsageFlags defaultBufferUsageFlags =
28         VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
29         VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
30         VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
31         VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
32         VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
33     if (renderer->getFeatures().supportsTransformFeedbackExtension.enabled)
34     {
35         defaultBufferUsageFlags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
36                                    VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT;
37     }
38     return defaultBufferUsageFlags;
39 }
40 
41 namespace
42 {
43 // Vertex attribute buffers are used as storage buffers for conversion in compute, where access to
44 // the buffer is made in 4-byte chunks.  Assume the size of the buffer is 4k+n where n is in [0, 3).
45 // On some hardware, reading 4 bytes from address 4k returns 0, making it impossible to read the
46 // last n bytes.  By rounding up the buffer sizes to a multiple of 4, the problem is alleviated.
47 constexpr size_t kBufferSizeGranularity = 4;
48 static_assert(gl::isPow2(kBufferSizeGranularity), "use as alignment, must be power of two");
49 
50 // Start with a fairly small buffer size. We can increase this dynamically as we convert more data.
51 constexpr size_t kConvertedArrayBufferInitialSize = 1024 * 8;
52 
53 // Buffers that have a static usage pattern will be allocated in
54 // device local memory to speed up access to and from the GPU.
55 // Dynamic usage patterns or that are frequently mapped
56 // will now request host cached memory to speed up access from the CPU.
GetPreferredMemoryType(gl::BufferBinding target,gl::BufferUsage usage)57 ANGLE_INLINE VkMemoryPropertyFlags GetPreferredMemoryType(gl::BufferBinding target,
58                                                           gl::BufferUsage usage)
59 {
60     constexpr VkMemoryPropertyFlags kDeviceLocalFlags =
61         (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
62          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
63     constexpr VkMemoryPropertyFlags kHostCachedFlags =
64         (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
65          VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
66     constexpr VkMemoryPropertyFlags kHostUncachedFlags =
67         (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
68 
69     if (target == gl::BufferBinding::PixelUnpack)
70     {
71         return kHostCachedFlags;
72     }
73 
74     switch (usage)
75     {
76         case gl::BufferUsage::StaticCopy:
77         case gl::BufferUsage::StaticDraw:
78         case gl::BufferUsage::StaticRead:
79             // For static usage, request a device local memory
80             return kDeviceLocalFlags;
81         case gl::BufferUsage::DynamicDraw:
82         case gl::BufferUsage::StreamDraw:
83             // For non-static usage where the CPU performs a write-only access, request
84             // a host uncached memory
85             return kHostUncachedFlags;
86         case gl::BufferUsage::DynamicCopy:
87         case gl::BufferUsage::DynamicRead:
88         case gl::BufferUsage::StreamCopy:
89         case gl::BufferUsage::StreamRead:
90             // For all other types of usage, request a host cached memory
91             return kHostCachedFlags;
92         default:
93             UNREACHABLE();
94             return kHostCachedFlags;
95     }
96 }
97 
GetStorageMemoryType(GLbitfield storageFlags,bool externalBuffer)98 ANGLE_INLINE VkMemoryPropertyFlags GetStorageMemoryType(GLbitfield storageFlags,
99                                                         bool externalBuffer)
100 {
101     constexpr VkMemoryPropertyFlags kDeviceLocalHostVisibleFlags =
102         (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
103     constexpr VkMemoryPropertyFlags kDeviceLocalHostCoherentFlags =
104         (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
105          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
106 
107     const bool isCoherentMap   = (storageFlags & GL_MAP_COHERENT_BIT_EXT) != 0;
108     const bool isPersistentMap = (storageFlags & GL_MAP_PERSISTENT_BIT_EXT) != 0;
109 
110     if (isCoherentMap || isPersistentMap || externalBuffer)
111     {
112         // We currently allocate coherent memory for persistently mapped buffers.
113         // GL_EXT_buffer_storage allows non-coherent memory, but currently the implementation of
114         // |glMemoryBarrier(CLIENT_MAPPED_BUFFER_BARRIER_BIT_EXT)| relies on the mapping being
115         // coherent.
116         //
117         // If persistently mapped buffers ever use non-coherent memory, then said |glMemoryBarrier|
118         // call must result in |vkInvalidateMappedMemoryRanges| for all persistently mapped buffers.
119         return kDeviceLocalHostCoherentFlags;
120     }
121 
122     return kDeviceLocalHostVisibleFlags;
123 }
124 
ShouldAllocateNewMemoryForUpdate(ContextVk * contextVk,size_t subDataSize,size_t bufferSize)125 ANGLE_INLINE bool ShouldAllocateNewMemoryForUpdate(ContextVk *contextVk,
126                                                    size_t subDataSize,
127                                                    size_t bufferSize)
128 {
129     // A sub data update with size > 50% of buffer size meets the threshold
130     // to acquire a new BufferHelper from the pool.
131     return contextVk->getRenderer()->getFeatures().preferCPUForBufferSubData.enabled ||
132            subDataSize > (bufferSize / 2);
133 }
134 
ShouldUseCPUToCopyData(ContextVk * contextVk,size_t copySize,size_t bufferSize)135 ANGLE_INLINE bool ShouldUseCPUToCopyData(ContextVk *contextVk, size_t copySize, size_t bufferSize)
136 {
137     RendererVk *renderer = contextVk->getRenderer();
138     // For some GPU (ARM) we always prefer using CPU to do copy instead of use GPU to avoid pipeline
139     // bubbles. If GPU is currently busy and data copy size is less than certain threshold, we
140     // choose to use CPU to do data copy over GPU to achieve better parallelism.
141     return renderer->getFeatures().preferCPUForBufferSubData.enabled ||
142            (renderer->isCommandQueueBusy() &&
143             copySize < renderer->getMaxCopyBytesUsingCPUWhenPreservingBufferData());
144 }
145 
IsUsageDynamic(gl::BufferUsage usage)146 ANGLE_INLINE bool IsUsageDynamic(gl::BufferUsage usage)
147 {
148     return (usage == gl::BufferUsage::DynamicDraw || usage == gl::BufferUsage::DynamicCopy ||
149             usage == gl::BufferUsage::DynamicRead);
150 }
151 
GetMemoryTypeIndex(ContextVk * contextVk,VkDeviceSize size,VkMemoryPropertyFlags memoryPropertyFlags,uint32_t * memoryTypeIndexOut)152 angle::Result GetMemoryTypeIndex(ContextVk *contextVk,
153                                  VkDeviceSize size,
154                                  VkMemoryPropertyFlags memoryPropertyFlags,
155                                  uint32_t *memoryTypeIndexOut)
156 {
157     RendererVk *renderer           = contextVk->getRenderer();
158     const vk::Allocator &allocator = renderer->getAllocator();
159 
160     bool persistentlyMapped = renderer->getFeatures().persistentlyMappedBuffers.enabled;
161     VkBufferUsageFlags defaultBufferUsageFlags = GetDefaultBufferUsageFlags(renderer);
162 
163     VkBufferCreateInfo createInfo    = {};
164     createInfo.sType                 = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
165     createInfo.flags                 = 0;
166     createInfo.size                  = size;
167     createInfo.usage                 = defaultBufferUsageFlags;
168     createInfo.sharingMode           = VK_SHARING_MODE_EXCLUSIVE;
169     createInfo.queueFamilyIndexCount = 0;
170     createInfo.pQueueFamilyIndices   = nullptr;
171 
172     // Host visible is required, all other bits are preferred, (i.e., optional)
173     VkMemoryPropertyFlags requiredFlags =
174         (memoryPropertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
175     VkMemoryPropertyFlags preferredFlags =
176         (memoryPropertyFlags & (~VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
177 
178     // Check that the allocation is not too large.
179     uint32_t memoryTypeIndex = 0;
180     ANGLE_VK_TRY(contextVk, allocator.findMemoryTypeIndexForBufferInfo(
181                                 createInfo, requiredFlags, preferredFlags, persistentlyMapped,
182                                 &memoryTypeIndex));
183     *memoryTypeIndexOut = memoryTypeIndex;
184 
185     return angle::Result::Continue;
186 }
187 }  // namespace
188 
189 // ConversionBuffer implementation.
ConversionBuffer(RendererVk * renderer,VkBufferUsageFlags usageFlags,size_t initialSize,size_t alignment,bool hostVisible)190 ConversionBuffer::ConversionBuffer(RendererVk *renderer,
191                                    VkBufferUsageFlags usageFlags,
192                                    size_t initialSize,
193                                    size_t alignment,
194                                    bool hostVisible)
195     : dirty(true)
196 {
197     data = std::make_unique<vk::BufferHelper>();
198 }
199 
~ConversionBuffer()200 ConversionBuffer::~ConversionBuffer()
201 {
202     ASSERT(!data || !data->valid());
203 }
204 
205 ConversionBuffer::ConversionBuffer(ConversionBuffer &&other) = default;
206 
207 // BufferVk::VertexConversionBuffer implementation.
VertexConversionBuffer(RendererVk * renderer,angle::FormatID formatIDIn,GLuint strideIn,size_t offsetIn,bool hostVisible)208 BufferVk::VertexConversionBuffer::VertexConversionBuffer(RendererVk *renderer,
209                                                          angle::FormatID formatIDIn,
210                                                          GLuint strideIn,
211                                                          size_t offsetIn,
212                                                          bool hostVisible)
213     : ConversionBuffer(renderer,
214                        vk::kVertexBufferUsageFlags,
215                        kConvertedArrayBufferInitialSize,
216                        vk::kVertexBufferAlignment,
217                        hostVisible),
218       formatID(formatIDIn),
219       stride(strideIn),
220       offset(offsetIn)
221 {}
222 
223 BufferVk::VertexConversionBuffer::VertexConversionBuffer(VertexConversionBuffer &&other) = default;
224 
225 BufferVk::VertexConversionBuffer::~VertexConversionBuffer() = default;
226 
227 // BufferVk implementation.
BufferVk(const gl::BufferState & state)228 BufferVk::BufferVk(const gl::BufferState &state)
229     : BufferImpl(state),
230       mClientBuffer(nullptr),
231       mMemoryTypeIndex(0),
232       mMemoryPropertyFlags(0),
233       mIsStagingBufferMapped(false),
234       mHasValidData(false),
235       mHasBeenReferencedByGPU(false)
236 {}
237 
~BufferVk()238 BufferVk::~BufferVk() {}
239 
destroy(const gl::Context * context)240 void BufferVk::destroy(const gl::Context *context)
241 {
242     ContextVk *contextVk = vk::GetImpl(context);
243 
244     release(contextVk);
245 }
246 
release(ContextVk * contextVk)247 void BufferVk::release(ContextVk *contextVk)
248 {
249     RendererVk *renderer = contextVk->getRenderer();
250     if (mBuffer.valid())
251     {
252         mBuffer.release(renderer);
253     }
254     if (mStagingBuffer.valid())
255     {
256         mStagingBuffer.release(renderer);
257     }
258 
259     for (ConversionBuffer &buffer : mVertexConversionBuffers)
260     {
261         buffer.data->release(renderer);
262     }
263     mVertexConversionBuffers.clear();
264 }
265 
setExternalBufferData(const gl::Context * context,gl::BufferBinding target,GLeglClientBufferEXT clientBuffer,size_t size,VkMemoryPropertyFlags memoryPropertyFlags)266 angle::Result BufferVk::setExternalBufferData(const gl::Context *context,
267                                               gl::BufferBinding target,
268                                               GLeglClientBufferEXT clientBuffer,
269                                               size_t size,
270                                               VkMemoryPropertyFlags memoryPropertyFlags)
271 {
272     ContextVk *contextVk = vk::GetImpl(context);
273 
274     // Release and re-create the memory and buffer.
275     release(contextVk);
276 
277     VkBufferCreateInfo createInfo    = {};
278     createInfo.sType                 = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
279     createInfo.flags                 = 0;
280     createInfo.size                  = size;
281     createInfo.usage                 = GetDefaultBufferUsageFlags(contextVk->getRenderer());
282     createInfo.sharingMode           = VK_SHARING_MODE_EXCLUSIVE;
283     createInfo.queueFamilyIndexCount = 0;
284     createInfo.pQueueFamilyIndices   = nullptr;
285 
286     return mBuffer.initExternal(contextVk, memoryPropertyFlags, createInfo, clientBuffer);
287 }
288 
setDataWithUsageFlags(const gl::Context * context,gl::BufferBinding target,GLeglClientBufferEXT clientBuffer,const void * data,size_t size,gl::BufferUsage usage,GLbitfield flags)289 angle::Result BufferVk::setDataWithUsageFlags(const gl::Context *context,
290                                               gl::BufferBinding target,
291                                               GLeglClientBufferEXT clientBuffer,
292                                               const void *data,
293                                               size_t size,
294                                               gl::BufferUsage usage,
295                                               GLbitfield flags)
296 {
297     VkMemoryPropertyFlags memoryPropertyFlags = 0;
298     bool persistentMapRequired                = false;
299     const bool isExternalBuffer               = clientBuffer != nullptr;
300 
301     switch (usage)
302     {
303         case gl::BufferUsage::InvalidEnum:
304         {
305             // glBufferStorage API call
306             memoryPropertyFlags   = GetStorageMemoryType(flags, isExternalBuffer);
307             persistentMapRequired = (flags & GL_MAP_PERSISTENT_BIT_EXT) != 0;
308             break;
309         }
310         default:
311         {
312             // glBufferData API call
313             memoryPropertyFlags = GetPreferredMemoryType(target, usage);
314             break;
315         }
316     }
317 
318     if (isExternalBuffer)
319     {
320         ANGLE_TRY(setExternalBufferData(context, target, clientBuffer, size, memoryPropertyFlags));
321         if (!mBuffer.isHostVisible())
322         {
323             // If external buffer's memory does not support host visible memory property, we cannot
324             // support a persistent map request.
325             ANGLE_VK_CHECK(vk::GetImpl(context), !persistentMapRequired,
326                            VK_ERROR_MEMORY_MAP_FAILED);
327         }
328 
329         mClientBuffer = clientBuffer;
330 
331         return angle::Result::Continue;
332     }
333     return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags,
334                                  persistentMapRequired, usage);
335 }
336 
setData(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,gl::BufferUsage usage)337 angle::Result BufferVk::setData(const gl::Context *context,
338                                 gl::BufferBinding target,
339                                 const void *data,
340                                 size_t size,
341                                 gl::BufferUsage usage)
342 {
343     // Assume host visible/coherent memory available.
344     VkMemoryPropertyFlags memoryPropertyFlags = GetPreferredMemoryType(target, usage);
345     return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags, false, usage);
346 }
347 
setDataWithMemoryType(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,VkMemoryPropertyFlags memoryPropertyFlags,bool persistentMapRequired,gl::BufferUsage usage)348 angle::Result BufferVk::setDataWithMemoryType(const gl::Context *context,
349                                               gl::BufferBinding target,
350                                               const void *data,
351                                               size_t size,
352                                               VkMemoryPropertyFlags memoryPropertyFlags,
353                                               bool persistentMapRequired,
354                                               gl::BufferUsage usage)
355 {
356     ContextVk *contextVk = vk::GetImpl(context);
357 
358     // Reset the flag since the buffer contents are being reinitialized. If the caller passed in
359     // data to fill the buffer, the flag will be updated when the data is copied to the buffer.
360     mHasValidData = false;
361 
362     if (size == 0)
363     {
364         // Nothing to do.
365         return angle::Result::Continue;
366     }
367 
368     const bool bufferSizeChanged              = size != static_cast<size_t>(mState.getSize());
369     const bool inUseAndRespecifiedWithoutData = (data == nullptr && isCurrentlyInUse(contextVk));
370 
371     // The entire buffer is being respecified, possibly with null data.
372     // Release and init a new mBuffer with requested size.
373     if (bufferSizeChanged || inUseAndRespecifiedWithoutData)
374     {
375         // Release and re-create the memory and buffer.
376         release(contextVk);
377 
378         mMemoryPropertyFlags = memoryPropertyFlags;
379         ANGLE_TRY(GetMemoryTypeIndex(contextVk, size, memoryPropertyFlags, &mMemoryTypeIndex));
380 
381         ANGLE_TRY(acquireBufferHelper(contextVk, size, BufferUpdateType::StorageRedefined));
382     }
383 
384     if (data)
385     {
386         // Treat full-buffer updates as SubData calls.
387         BufferUpdateType updateType = bufferSizeChanged ? BufferUpdateType::StorageRedefined
388                                                         : BufferUpdateType::ContentsUpdate;
389 
390         ANGLE_TRY(setDataImpl(contextVk, static_cast<const uint8_t *>(data), size, 0, updateType));
391     }
392 
393     return angle::Result::Continue;
394 }
395 
setSubData(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,size_t offset)396 angle::Result BufferVk::setSubData(const gl::Context *context,
397                                    gl::BufferBinding target,
398                                    const void *data,
399                                    size_t size,
400                                    size_t offset)
401 {
402     ASSERT(mBuffer.valid());
403 
404     ContextVk *contextVk = vk::GetImpl(context);
405     ANGLE_TRY(setDataImpl(contextVk, static_cast<const uint8_t *>(data), size, offset,
406                           BufferUpdateType::ContentsUpdate));
407 
408     return angle::Result::Continue;
409 }
410 
copySubData(const gl::Context * context,BufferImpl * source,GLintptr sourceOffset,GLintptr destOffset,GLsizeiptr size)411 angle::Result BufferVk::copySubData(const gl::Context *context,
412                                     BufferImpl *source,
413                                     GLintptr sourceOffset,
414                                     GLintptr destOffset,
415                                     GLsizeiptr size)
416 {
417     ASSERT(mBuffer.valid());
418 
419     ContextVk *contextVk           = vk::GetImpl(context);
420     BufferVk *sourceVk             = GetAs<BufferVk>(source);
421     vk::BufferHelper &sourceBuffer = sourceVk->getBuffer();
422     ASSERT(sourceBuffer.valid());
423     VkDeviceSize sourceBufferOffset = sourceBuffer.getOffset();
424 
425     // Check for self-dependency.
426     vk::CommandBufferAccess access;
427     if (sourceBuffer.getBufferSerial() == mBuffer.getBufferSerial())
428     {
429         access.onBufferSelfCopy(&mBuffer);
430     }
431     else
432     {
433         access.onBufferTransferRead(&sourceBuffer);
434         access.onBufferTransferWrite(&mBuffer);
435     }
436 
437     vk::OutsideRenderPassCommandBuffer *commandBuffer;
438     ANGLE_TRY(contextVk->getOutsideRenderPassCommandBuffer(access, &commandBuffer));
439 
440     // Enqueue a copy command on the GPU.
441     const VkBufferCopy copyRegion = {static_cast<VkDeviceSize>(sourceOffset) + sourceBufferOffset,
442                                      static_cast<VkDeviceSize>(destOffset) + mBuffer.getOffset(),
443                                      static_cast<VkDeviceSize>(size)};
444 
445     commandBuffer->copyBuffer(sourceBuffer.getBuffer(), mBuffer.getBuffer(), 1, &copyRegion);
446     mHasBeenReferencedByGPU = true;
447 
448     // The new destination buffer data may require a conversion for the next draw, so mark it dirty.
449     onDataChanged();
450 
451     return angle::Result::Continue;
452 }
453 
allocStagingBuffer(ContextVk * contextVk,vk::MemoryCoherency coherency,VkDeviceSize size,uint8_t ** mapPtr)454 angle::Result BufferVk::allocStagingBuffer(ContextVk *contextVk,
455                                            vk::MemoryCoherency coherency,
456                                            VkDeviceSize size,
457                                            uint8_t **mapPtr)
458 {
459     ASSERT(!mIsStagingBufferMapped);
460 
461     if (mStagingBuffer.valid())
462     {
463         if (size <= mStagingBuffer.getSize() &&
464             (coherency == vk::MemoryCoherency::Coherent) == mStagingBuffer.isCoherent() &&
465             !mStagingBuffer.isCurrentlyInUse(contextVk->getLastCompletedQueueSerial()))
466         {
467             // If size is big enough and it is idle, then just reuse the existing staging buffer
468             *mapPtr                = mStagingBuffer.getMappedMemory();
469             mIsStagingBufferMapped = true;
470             return angle::Result::Continue;
471         }
472         mStagingBuffer.release(contextVk->getRenderer());
473     }
474 
475     ANGLE_TRY(
476         mStagingBuffer.allocateForCopyBuffer(contextVk, static_cast<size_t>(size), coherency));
477     *mapPtr                = mStagingBuffer.getMappedMemory();
478     mIsStagingBufferMapped = true;
479 
480     return angle::Result::Continue;
481 }
482 
flushStagingBuffer(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize size)483 angle::Result BufferVk::flushStagingBuffer(ContextVk *contextVk,
484                                            VkDeviceSize offset,
485                                            VkDeviceSize size)
486 {
487     RendererVk *renderer = contextVk->getRenderer();
488 
489     ASSERT(mIsStagingBufferMapped);
490     ASSERT(mStagingBuffer.valid());
491 
492     if (!mStagingBuffer.isCoherent())
493     {
494         ANGLE_TRY(mStagingBuffer.flush(renderer));
495     }
496 
497     // Enqueue a copy command on the GPU.
498     VkBufferCopy copyRegion = {mStagingBuffer.getOffset(), mBuffer.getOffset() + offset, size};
499     ANGLE_TRY(mBuffer.copyFromBuffer(contextVk, &mStagingBuffer, 1, &copyRegion));
500 
501     mHasBeenReferencedByGPU = true;
502 
503     return angle::Result::Continue;
504 }
505 
handleDeviceLocalBufferMap(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize size,uint8_t ** mapPtr)506 angle::Result BufferVk::handleDeviceLocalBufferMap(ContextVk *contextVk,
507                                                    VkDeviceSize offset,
508                                                    VkDeviceSize size,
509                                                    uint8_t **mapPtr)
510 {
511     ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::Coherent, size, mapPtr));
512 
513     // Copy data from device local buffer to host visible staging buffer.
514     VkBufferCopy copyRegion = {mBuffer.getOffset() + offset, mStagingBuffer.getOffset(), size};
515     ANGLE_TRY(mStagingBuffer.copyFromBuffer(contextVk, &mBuffer, 1, &copyRegion));
516     ANGLE_TRY(mStagingBuffer.waitForIdle(contextVk, "GPU stall due to mapping device local buffer",
517                                          RenderPassClosureReason::DeviceLocalBufferMap));
518     // Because the buffer is coherent, no need to call invalidate here.
519 
520     return angle::Result::Continue;
521 }
522 
map(const gl::Context * context,GLenum access,void ** mapPtr)523 angle::Result BufferVk::map(const gl::Context *context, GLenum access, void **mapPtr)
524 {
525     ASSERT(mBuffer.valid());
526     ASSERT(access == GL_WRITE_ONLY_OES);
527 
528     return mapImpl(vk::GetImpl(context), GL_MAP_WRITE_BIT, mapPtr);
529 }
530 
mapRange(const gl::Context * context,size_t offset,size_t length,GLbitfield access,void ** mapPtr)531 angle::Result BufferVk::mapRange(const gl::Context *context,
532                                  size_t offset,
533                                  size_t length,
534                                  GLbitfield access,
535                                  void **mapPtr)
536 {
537     ANGLE_TRACE_EVENT0("gpu.angle", "BufferVk::mapRange");
538     return mapRangeImpl(vk::GetImpl(context), offset, length, access, mapPtr);
539 }
540 
mapImpl(ContextVk * contextVk,GLbitfield access,void ** mapPtr)541 angle::Result BufferVk::mapImpl(ContextVk *contextVk, GLbitfield access, void **mapPtr)
542 {
543     return mapRangeImpl(contextVk, 0, static_cast<VkDeviceSize>(mState.getSize()), access, mapPtr);
544 }
545 
ghostMappedBuffer(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize length,GLbitfield access,void ** mapPtr)546 angle::Result BufferVk::ghostMappedBuffer(ContextVk *contextVk,
547                                           VkDeviceSize offset,
548                                           VkDeviceSize length,
549                                           GLbitfield access,
550                                           void **mapPtr)
551 {
552     // We shouldn't get here if it is external memory
553     ASSERT(!isExternalBuffer());
554 
555     ++contextVk->getPerfCounters().buffersGhosted;
556 
557     // If we are creating a new buffer because the GPU is using it as read-only, then we
558     // also need to copy the contents of the previous buffer into the new buffer, in
559     // case the caller only updates a portion of the new buffer.
560     vk::BufferHelper src = std::move(mBuffer);
561 
562     ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
563                                   BufferUpdateType::ContentsUpdate));
564 
565     // Before returning the new buffer, map the previous buffer and copy its entire
566     // contents into the new buffer.
567     uint8_t *srcMapPtr = nullptr;
568     uint8_t *dstMapPtr = nullptr;
569     ANGLE_TRY(src.map(contextVk, &srcMapPtr));
570     ANGLE_TRY(mBuffer.map(contextVk, &dstMapPtr));
571 
572     ASSERT(src.isCoherent());
573     ASSERT(mBuffer.isCoherent());
574 
575     // No need to copy over [offset, offset + length), just around it
576     if ((access & GL_MAP_INVALIDATE_RANGE_BIT) != 0)
577     {
578         if (offset != 0)
579         {
580             memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(offset));
581         }
582         size_t totalSize      = static_cast<size_t>(mState.getSize());
583         size_t remainingStart = static_cast<size_t>(offset + length);
584         size_t remainingSize  = totalSize - remainingStart;
585         if (remainingSize != 0)
586         {
587             memcpy(dstMapPtr + remainingStart, srcMapPtr + remainingStart, remainingSize);
588         }
589     }
590     else
591     {
592         memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(mState.getSize()));
593     }
594 
595     src.release(contextVk->getRenderer());
596 
597     // Return the already mapped pointer with the offset adjustment to avoid the call to unmap().
598     *mapPtr = dstMapPtr + offset;
599 
600     return angle::Result::Continue;
601 }
602 
mapRangeImpl(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize length,GLbitfield access,void ** mapPtr)603 angle::Result BufferVk::mapRangeImpl(ContextVk *contextVk,
604                                      VkDeviceSize offset,
605                                      VkDeviceSize length,
606                                      GLbitfield access,
607                                      void **mapPtr)
608 {
609     uint8_t **mapPtrBytes = reinterpret_cast<uint8_t **>(mapPtr);
610 
611     ASSERT(mBuffer.valid());
612 
613     bool hostVisible = mBuffer.isHostVisible();
614 
615     // MAP_UNSYNCHRONIZED_BIT, so immediately map.
616     if ((access & GL_MAP_UNSYNCHRONIZED_BIT) != 0)
617     {
618         if (hostVisible)
619         {
620             return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
621         }
622         return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
623     }
624 
625     // Read case
626     if ((access & GL_MAP_WRITE_BIT) == 0)
627     {
628         // If app is not going to write, all we need is to ensure GPU write is finished.
629         // Concurrent reads from CPU and GPU is allowed.
630         if (mBuffer.isCurrentlyInUseForWrite(contextVk->getLastCompletedQueueSerial()))
631         {
632             // If there are pending commands for the resource, flush them.
633             if (mBuffer.usedInRecordedCommands())
634             {
635                 ANGLE_TRY(
636                     contextVk->flushImpl(nullptr, RenderPassClosureReason::BufferWriteThenMap));
637             }
638             ANGLE_TRY(mBuffer.finishGPUWriteCommands(contextVk));
639         }
640         if (hostVisible)
641         {
642             return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
643         }
644         return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
645     }
646 
647     // Write case
648     if (!hostVisible)
649     {
650         return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
651     }
652 
653     // Write case, buffer not in use.
654     if (isExternalBuffer() || !isCurrentlyInUse(contextVk))
655     {
656         return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
657     }
658 
659     // Write case, buffer in use.
660     //
661     // Here, we try to map the buffer, but it's busy. Instead of waiting for the GPU to
662     // finish, we just allocate a new buffer if:
663     // 1.) Caller has told us it doesn't care about previous contents, or
664     // 2.) The GPU won't write to the buffer.
665 
666     bool rangeInvalidate = (access & GL_MAP_INVALIDATE_RANGE_BIT) != 0;
667     bool entireBufferInvalidated =
668         ((access & GL_MAP_INVALIDATE_BUFFER_BIT) != 0) ||
669         (rangeInvalidate && offset == 0 && static_cast<VkDeviceSize>(mState.getSize()) == length);
670 
671     if (entireBufferInvalidated)
672     {
673         ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
674                                       BufferUpdateType::ContentsUpdate));
675         return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
676     }
677 
678     bool smallMapRange = (length < static_cast<VkDeviceSize>(mState.getSize()) / 2);
679 
680     if (smallMapRange && rangeInvalidate)
681     {
682         ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::NonCoherent,
683                                      static_cast<size_t>(length), mapPtrBytes));
684         return angle::Result::Continue;
685     }
686 
687     if (!mBuffer.isCurrentlyInUseForWrite(contextVk->getLastCompletedQueueSerial()))
688     {
689         // This will keep the new buffer mapped and update mapPtr, so return immediately.
690         return ghostMappedBuffer(contextVk, offset, length, access, mapPtr);
691     }
692 
693     // Write case (worst case, buffer in use for write)
694     ANGLE_TRY(mBuffer.waitForIdle(contextVk, "GPU stall due to mapping buffer in use by the GPU",
695                                   RenderPassClosureReason::BufferInUseWhenSynchronizedMap));
696     return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
697 }
698 
unmap(const gl::Context * context,GLboolean * result)699 angle::Result BufferVk::unmap(const gl::Context *context, GLboolean *result)
700 {
701     ANGLE_TRY(unmapImpl(vk::GetImpl(context)));
702 
703     // This should be false if the contents have been corrupted through external means.  Vulkan
704     // doesn't provide such information.
705     *result = true;
706 
707     return angle::Result::Continue;
708 }
709 
unmapImpl(ContextVk * contextVk)710 angle::Result BufferVk::unmapImpl(ContextVk *contextVk)
711 {
712     ASSERT(mBuffer.valid());
713 
714     bool writeOperation = ((mState.getAccessFlags() & GL_MAP_WRITE_BIT) != 0);
715 
716     if (mIsStagingBufferMapped)
717     {
718         ASSERT(mStagingBuffer.valid());
719         // The buffer is device local or optimization of small range map.
720         if (writeOperation)
721         {
722             ANGLE_TRY(flushStagingBuffer(contextVk, mState.getMapOffset(), mState.getMapLength()));
723         }
724 
725         mIsStagingBufferMapped = false;
726     }
727     else
728     {
729         ASSERT(mBuffer.isHostVisible());
730         mBuffer.unmap(contextVk->getRenderer());
731     }
732 
733     if (writeOperation)
734     {
735         dataUpdated();
736     }
737 
738     return angle::Result::Continue;
739 }
740 
getSubData(const gl::Context * context,GLintptr offset,GLsizeiptr size,void * outData)741 angle::Result BufferVk::getSubData(const gl::Context *context,
742                                    GLintptr offset,
743                                    GLsizeiptr size,
744                                    void *outData)
745 {
746     ASSERT(offset + size <= getSize());
747     ASSERT(mBuffer.valid());
748     ContextVk *contextVk = vk::GetImpl(context);
749     void *mapPtr;
750     ANGLE_TRY(mapRangeImpl(contextVk, offset, size, GL_MAP_READ_BIT, &mapPtr));
751     memcpy(outData, mapPtr, size);
752     return unmapImpl(contextVk);
753 }
754 
getIndexRange(const gl::Context * context,gl::DrawElementsType type,size_t offset,size_t count,bool primitiveRestartEnabled,gl::IndexRange * outRange)755 angle::Result BufferVk::getIndexRange(const gl::Context *context,
756                                       gl::DrawElementsType type,
757                                       size_t offset,
758                                       size_t count,
759                                       bool primitiveRestartEnabled,
760                                       gl::IndexRange *outRange)
761 {
762     ContextVk *contextVk = vk::GetImpl(context);
763     RendererVk *renderer = contextVk->getRenderer();
764 
765     // This is a workaround for the mock ICD not implementing buffer memory state.
766     // Could be removed if https://github.com/KhronosGroup/Vulkan-Tools/issues/84 is fixed.
767     if (renderer->isMockICDEnabled())
768     {
769         outRange->start = 0;
770         outRange->end   = 0;
771         return angle::Result::Continue;
772     }
773 
774     ANGLE_TRACE_EVENT0("gpu.angle", "BufferVk::getIndexRange");
775 
776     void *mapPtr;
777     ANGLE_TRY(mapRangeImpl(contextVk, offset, getSize(), GL_MAP_READ_BIT, &mapPtr));
778     *outRange = gl::ComputeIndexRange(type, mapPtr, count, primitiveRestartEnabled);
779     ANGLE_TRY(unmapImpl(contextVk));
780 
781     return angle::Result::Continue;
782 }
783 
updateBuffer(ContextVk * contextVk,const uint8_t * data,size_t size,size_t offset)784 angle::Result BufferVk::updateBuffer(ContextVk *contextVk,
785                                      const uint8_t *data,
786                                      size_t size,
787                                      size_t offset)
788 {
789     if (mBuffer.isHostVisible())
790     {
791         ANGLE_TRY(directUpdate(contextVk, data, size, offset));
792     }
793     else
794     {
795         ANGLE_TRY(stagedUpdate(contextVk, data, size, offset));
796     }
797     return angle::Result::Continue;
798 }
directUpdate(ContextVk * contextVk,const uint8_t * data,size_t size,size_t offset)799 angle::Result BufferVk::directUpdate(ContextVk *contextVk,
800                                      const uint8_t *data,
801                                      size_t size,
802                                      size_t offset)
803 {
804     uint8_t *mapPointer = nullptr;
805 
806     ANGLE_TRY(mBuffer.mapWithOffset(contextVk, &mapPointer, offset));
807     ASSERT(mapPointer);
808 
809     memcpy(mapPointer, data, size);
810 
811     // If the buffer has dynamic usage then the intent is frequent client side updates to the
812     // buffer. Don't CPU unmap the buffer, we will take care of unmapping when releasing the buffer
813     // to either the renderer or mBufferFreeList.
814     if (!IsUsageDynamic(mState.getUsage()))
815     {
816         mBuffer.unmap(contextVk->getRenderer());
817     }
818     ASSERT(mBuffer.isCoherent());
819 
820     return angle::Result::Continue;
821 }
822 
stagedUpdate(ContextVk * contextVk,const uint8_t * data,size_t size,size_t offset)823 angle::Result BufferVk::stagedUpdate(ContextVk *contextVk,
824                                      const uint8_t *data,
825                                      size_t size,
826                                      size_t offset)
827 {
828     // Acquire a "new" staging buffer
829     uint8_t *mapPointer = nullptr;
830     ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::NonCoherent, size, &mapPointer));
831     memcpy(mapPointer, data, size);
832     ANGLE_TRY(flushStagingBuffer(contextVk, offset, size));
833     mIsStagingBufferMapped = false;
834 
835     return angle::Result::Continue;
836 }
837 
acquireAndUpdate(ContextVk * contextVk,const uint8_t * data,size_t updateSize,size_t offset,BufferUpdateType updateType)838 angle::Result BufferVk::acquireAndUpdate(ContextVk *contextVk,
839                                          const uint8_t *data,
840                                          size_t updateSize,
841                                          size_t offset,
842                                          BufferUpdateType updateType)
843 {
844     // We shouldn't get here if this is external memory
845     ASSERT(!isExternalBuffer());
846 
847     // Here we acquire a new BufferHelper and directUpdate() the new buffer.
848     // If the subData size was less than the buffer's size we additionally enqueue
849     // a GPU copy of the remaining regions from the old mBuffer to the new one.
850     vk::BufferHelper src;
851     size_t bufferSize              = static_cast<size_t>(mState.getSize());
852     size_t offsetAfterSubdata      = (offset + updateSize);
853     bool updateRegionBeforeSubData = mHasValidData && (offset > 0);
854     bool updateRegionAfterSubData  = mHasValidData && (offsetAfterSubdata < bufferSize);
855 
856     uint8_t *srcMapPtrBeforeSubData = nullptr;
857     uint8_t *srcMapPtrAfterSubData  = nullptr;
858     if (updateRegionBeforeSubData || updateRegionAfterSubData)
859     {
860         src = std::move(mBuffer);
861 
862         // The total bytes that we need to copy from old buffer to new buffer
863         size_t copySize = bufferSize - updateSize;
864 
865         // If the buffer is host visible and the GPU is done writing to, we use the CPU to do the
866         // copy. We need to save the source buffer pointer before we acquire a new buffer.
867         if (src.isHostVisible() &&
868             !src.isCurrentlyInUseForWrite(contextVk->getLastCompletedQueueSerial()) &&
869             ShouldUseCPUToCopyData(contextVk, copySize, bufferSize))
870         {
871             uint8_t *mapPointer = nullptr;
872             // src buffer will be recycled (or released and unmapped) by acquireBufferHelper
873             ANGLE_TRY(src.map(contextVk, &mapPointer));
874             ASSERT(mapPointer);
875             srcMapPtrBeforeSubData = mapPointer;
876             srcMapPtrAfterSubData  = mapPointer + offsetAfterSubdata;
877         }
878     }
879 
880     ANGLE_TRY(acquireBufferHelper(contextVk, bufferSize, updateType));
881     ANGLE_TRY(updateBuffer(contextVk, data, updateSize, offset));
882 
883     constexpr int kMaxCopyRegions = 2;
884     angle::FixedVector<VkBufferCopy, kMaxCopyRegions> copyRegions;
885 
886     if (updateRegionBeforeSubData)
887     {
888         if (srcMapPtrBeforeSubData)
889         {
890             ASSERT(mBuffer.isHostVisible());
891             ANGLE_TRY(directUpdate(contextVk, srcMapPtrBeforeSubData, offset, 0));
892         }
893         else
894         {
895             copyRegions.push_back({src.getOffset(), mBuffer.getOffset(), offset});
896         }
897     }
898 
899     if (updateRegionAfterSubData)
900     {
901         size_t copySize = bufferSize - offsetAfterSubdata;
902         if (srcMapPtrAfterSubData)
903         {
904             ASSERT(mBuffer.isHostVisible());
905             ANGLE_TRY(directUpdate(contextVk, srcMapPtrAfterSubData, copySize, offsetAfterSubdata));
906         }
907         else
908         {
909             copyRegions.push_back({src.getOffset() + offsetAfterSubdata,
910                                    mBuffer.getOffset() + offsetAfterSubdata, copySize});
911         }
912     }
913 
914     if (!copyRegions.empty())
915     {
916         ANGLE_TRY(mBuffer.copyFromBuffer(contextVk, &src, static_cast<uint32_t>(copyRegions.size()),
917                                          copyRegions.data()));
918         mHasBeenReferencedByGPU = true;
919     }
920 
921     if (src.valid())
922     {
923         src.release(contextVk->getRenderer());
924     }
925 
926     return angle::Result::Continue;
927 }
928 
setDataImpl(ContextVk * contextVk,const uint8_t * data,size_t size,size_t offset,BufferUpdateType updateType)929 angle::Result BufferVk::setDataImpl(ContextVk *contextVk,
930                                     const uint8_t *data,
931                                     size_t size,
932                                     size_t offset,
933                                     BufferUpdateType updateType)
934 {
935     // if the buffer is currently in use
936     //     if it isn't an external buffer and sub data size meets threshold
937     //          acquire a new BufferHelper from the pool
938     //     else stage the update
939     // else update the buffer directly
940     if (isCurrentlyInUse(contextVk))
941     {
942         // If BufferVk does not have any valid data, which means there is no data needs to be copied
943         // from old buffer to new buffer when we acquire a new buffer, we also favor
944         // acquireAndUpdate over stagedUpdate. This could happen when app calls glBufferData with
945         // same size and we will try to reuse the existing buffer storage.
946         if (!isExternalBuffer() &&
947             (!mHasValidData || ShouldAllocateNewMemoryForUpdate(
948                                    contextVk, size, static_cast<size_t>(mState.getSize()))))
949         {
950             ANGLE_TRY(acquireAndUpdate(contextVk, data, size, offset, updateType));
951         }
952         else
953         {
954             ANGLE_TRY(stagedUpdate(contextVk, data, size, offset));
955         }
956     }
957     else
958     {
959         ANGLE_TRY(updateBuffer(contextVk, data, size, offset));
960     }
961 
962     // Update conversions
963     dataUpdated();
964 
965     return angle::Result::Continue;
966 }
967 
getVertexConversionBuffer(RendererVk * renderer,angle::FormatID formatID,GLuint stride,size_t offset,bool hostVisible)968 ConversionBuffer *BufferVk::getVertexConversionBuffer(RendererVk *renderer,
969                                                       angle::FormatID formatID,
970                                                       GLuint stride,
971                                                       size_t offset,
972                                                       bool hostVisible)
973 {
974     for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
975     {
976         if (buffer.formatID == formatID && buffer.stride == stride && buffer.offset == offset)
977         {
978             ASSERT(buffer.data && buffer.data->valid());
979             return &buffer;
980         }
981     }
982 
983     mVertexConversionBuffers.emplace_back(renderer, formatID, stride, offset, hostVisible);
984     return &mVertexConversionBuffers.back();
985 }
986 
dataUpdated()987 void BufferVk::dataUpdated()
988 {
989     for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
990     {
991         buffer.dirty = true;
992     }
993     // Now we have valid data
994     mHasValidData = true;
995 }
996 
onDataChanged()997 void BufferVk::onDataChanged()
998 {
999     dataUpdated();
1000 }
1001 
acquireBufferHelper(ContextVk * contextVk,size_t sizeInBytes,BufferUpdateType updateType)1002 angle::Result BufferVk::acquireBufferHelper(ContextVk *contextVk,
1003                                             size_t sizeInBytes,
1004                                             BufferUpdateType updateType)
1005 {
1006     RendererVk *renderer = contextVk->getRenderer();
1007     size_t size          = roundUpPow2(sizeInBytes, kBufferSizeGranularity);
1008     size_t alignment     = renderer->getDefaultBufferAlignment();
1009 
1010     if (mBuffer.valid())
1011     {
1012         mBuffer.release(renderer);
1013     }
1014 
1015     // Allocate the buffer directly
1016     ANGLE_TRY(mBuffer.initSuballocation(contextVk, mMemoryTypeIndex, size, alignment));
1017 
1018     if (updateType == BufferUpdateType::ContentsUpdate)
1019     {
1020         // Tell the observers (front end) that a new buffer was created, so the necessary
1021         // dirty bits can be set. This allows the buffer views pointing to the old buffer to
1022         // be recreated and point to the new buffer, along with updating the descriptor sets
1023         // to use the new buffer.
1024         onStateChange(angle::SubjectMessage::InternalMemoryAllocationChanged);
1025     }
1026     else if (updateType == BufferUpdateType::StorageRedefined)
1027     {
1028         // Tell the observers (front end) that a buffer's storage has changed.
1029         onStateChange(angle::SubjectMessage::BufferVkStorageChanged);
1030     }
1031 
1032     return angle::Result::Continue;
1033 }
1034 
isCurrentlyInUse(ContextVk * contextVk) const1035 bool BufferVk::isCurrentlyInUse(ContextVk *contextVk) const
1036 {
1037     return mHasBeenReferencedByGPU &&
1038            mBuffer.isCurrentlyInUse(contextVk->getLastCompletedQueueSerial());
1039 }
1040 
1041 }  // namespace rx
1042