1 //
2 // Copyright 2016 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // BufferVk.cpp:
7 // Implements the class methods for BufferVk.
8 //
9
10 #include "libANGLE/renderer/vulkan/BufferVk.h"
11
12 #include "common/FixedVector.h"
13 #include "common/debug.h"
14 #include "common/mathutil.h"
15 #include "common/utilities.h"
16 #include "libANGLE/Context.h"
17 #include "libANGLE/renderer/vulkan/ContextVk.h"
18 #include "libANGLE/renderer/vulkan/RendererVk.h"
19
20 namespace rx
21 {
GetDefaultBufferUsageFlags(RendererVk * renderer)22 VkBufferUsageFlags GetDefaultBufferUsageFlags(RendererVk *renderer)
23 {
24 // We could potentially use multiple backing buffers for different usages.
25 // For now keep a single buffer with all relevant usage flags.
26 VkBufferUsageFlags defaultBufferUsageFlags =
27 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
28 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
29 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
30 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
31 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
32 if (renderer->getFeatures().supportsTransformFeedbackExtension.enabled)
33 {
34 defaultBufferUsageFlags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
35 VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT;
36 }
37 return defaultBufferUsageFlags;
38 }
39
40 namespace
41 {
42 constexpr VkMemoryPropertyFlags kDeviceLocalFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
43 constexpr VkMemoryPropertyFlags kDeviceLocalHostCoherentFlags =
44 (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
45 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
46 constexpr VkMemoryPropertyFlags kHostCachedFlags =
47 (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
48 VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
49 constexpr VkMemoryPropertyFlags kHostUncachedFlags =
50 (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
51
52 // Vertex attribute buffers are used as storage buffers for conversion in compute, where access to
53 // the buffer is made in 4-byte chunks. Assume the size of the buffer is 4k+n where n is in [0, 3).
54 // On some hardware, reading 4 bytes from address 4k returns 0, making it impossible to read the
55 // last n bytes. By rounding up the buffer sizes to a multiple of 4, the problem is alleviated.
56 constexpr size_t kBufferSizeGranularity = 4;
57 static_assert(gl::isPow2(kBufferSizeGranularity), "use as alignment, must be power of two");
58
59 // Start with a fairly small buffer size. We can increase this dynamically as we convert more data.
60 constexpr size_t kConvertedArrayBufferInitialSize = 1024 * 8;
61
62 // Buffers that have a static usage pattern will be allocated in
63 // device local memory to speed up access to and from the GPU.
64 // Dynamic usage patterns or that are frequently mapped
65 // will now request host cached memory to speed up access from the CPU.
GetPreferredMemoryType(RendererVk * renderer,gl::BufferBinding target,gl::BufferUsage usage)66 VkMemoryPropertyFlags GetPreferredMemoryType(RendererVk *renderer,
67 gl::BufferBinding target,
68 gl::BufferUsage usage)
69 {
70 if (target == gl::BufferBinding::PixelUnpack)
71 {
72 return kHostCachedFlags;
73 }
74
75 switch (usage)
76 {
77 case gl::BufferUsage::StaticCopy:
78 case gl::BufferUsage::StaticDraw:
79 case gl::BufferUsage::StaticRead:
80 // For static usage, request a device local memory
81 return renderer->getFeatures().preferDeviceLocalMemoryHostVisible.enabled
82 ? kDeviceLocalHostCoherentFlags
83 : kDeviceLocalFlags;
84 case gl::BufferUsage::DynamicDraw:
85 case gl::BufferUsage::StreamDraw:
86 // For non-static usage where the CPU performs a write-only access, request
87 // a host uncached memory
88 return kHostUncachedFlags;
89 case gl::BufferUsage::DynamicCopy:
90 case gl::BufferUsage::DynamicRead:
91 case gl::BufferUsage::StreamCopy:
92 case gl::BufferUsage::StreamRead:
93 // For all other types of usage, request a host cached memory
94 return kHostCachedFlags;
95 default:
96 UNREACHABLE();
97 return kHostCachedFlags;
98 }
99 }
100
GetStorageMemoryType(RendererVk * renderer,GLbitfield storageFlags,bool externalBuffer)101 VkMemoryPropertyFlags GetStorageMemoryType(RendererVk *renderer,
102 GLbitfield storageFlags,
103 bool externalBuffer)
104 {
105 const bool hasMapAccess =
106 (storageFlags & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT_EXT)) != 0;
107
108 if (renderer->getFeatures().preferDeviceLocalMemoryHostVisible.enabled)
109 {
110 const bool canUpdate = (storageFlags & GL_DYNAMIC_STORAGE_BIT_EXT) != 0;
111 if (canUpdate || hasMapAccess || externalBuffer)
112 {
113 // We currently allocate coherent memory for persistently mapped buffers.
114 // GL_EXT_buffer_storage allows non-coherent memory, but currently the implementation of
115 // |glMemoryBarrier(CLIENT_MAPPED_BUFFER_BARRIER_BIT_EXT)| relies on the mapping being
116 // coherent.
117 //
118 // If persistently mapped buffers ever use non-coherent memory, then said
119 // |glMemoryBarrier| call must result in |vkInvalidateMappedMemoryRanges| for all
120 // persistently mapped buffers.
121 return kDeviceLocalHostCoherentFlags;
122 }
123 return kDeviceLocalFlags;
124 }
125
126 return hasMapAccess ? kHostCachedFlags : kDeviceLocalFlags;
127 }
128
ShouldAllocateNewMemoryForUpdate(ContextVk * contextVk,size_t subDataSize,size_t bufferSize)129 bool ShouldAllocateNewMemoryForUpdate(ContextVk *contextVk, size_t subDataSize, size_t bufferSize)
130 {
131 // A sub data update with size > 50% of buffer size meets the threshold
132 // to acquire a new BufferHelper from the pool.
133 return contextVk->getRenderer()->getFeatures().preferCPUForBufferSubData.enabled ||
134 subDataSize > (bufferSize / 2);
135 }
136
ShouldUseCPUToCopyData(ContextVk * contextVk,const vk::BufferHelper & buffer,size_t copySize,size_t bufferSize)137 bool ShouldUseCPUToCopyData(ContextVk *contextVk,
138 const vk::BufferHelper &buffer,
139 size_t copySize,
140 size_t bufferSize)
141 {
142 RendererVk *renderer = contextVk->getRenderer();
143
144 // If the buffer is not host-visible, or if it's busy on the GPU, can't read from it from the
145 // CPU
146 if (!buffer.isHostVisible() || !renderer->hasResourceUseFinished(buffer.getWriteResourceUse()))
147 {
148 return false;
149 }
150
151 // For some GPUs (e.g. ARM) we always prefer using CPU to do copy instead of using the GPU to
152 // avoid pipeline bubbles. If the GPU is currently busy and data copy size is less than certain
153 // threshold, we choose to use CPU to do the copy over GPU to achieve better parallelism.
154 return renderer->getFeatures().preferCPUForBufferSubData.enabled ||
155 (renderer->isCommandQueueBusy() &&
156 copySize < renderer->getMaxCopyBytesUsingCPUWhenPreservingBufferData());
157 }
158
RenderPassUsesBufferForReadOnly(ContextVk * contextVk,const vk::BufferHelper & buffer)159 bool RenderPassUsesBufferForReadOnly(ContextVk *contextVk, const vk::BufferHelper &buffer)
160 {
161 if (!contextVk->hasActiveRenderPass())
162 {
163 return false;
164 }
165
166 vk::RenderPassCommandBufferHelper &renderPassCommands =
167 contextVk->getStartedRenderPassCommands();
168 return renderPassCommands.usesBuffer(buffer) && !renderPassCommands.usesBufferForWrite(buffer);
169 }
170
171 // If a render pass is open which uses the buffer in read-only mode, render pass break can be
172 // avoided by using acquireAndUpdate. This can be costly however if the update is very small, and
173 // is limited to platforms where render pass break is itself costly (i.e. tiled-based renderers).
ShouldAvoidRenderPassBreakOnUpdate(ContextVk * contextVk,const vk::BufferHelper & buffer,size_t bufferSize)174 bool ShouldAvoidRenderPassBreakOnUpdate(ContextVk *contextVk,
175 const vk::BufferHelper &buffer,
176 size_t bufferSize)
177 {
178 // Only avoid breaking the render pass if the buffer is not so big such that duplicating it
179 // would outweight the cost of breaking the render pass. A value of 1KB is temporary chosen as
180 // a heuristic, and can be adjusted when such a situation is encountered.
181 constexpr size_t kPreferDuplicateOverRenderPassBreakMaxBufferSize = 1024;
182 if (!contextVk->getFeatures().preferCPUForBufferSubData.enabled ||
183 bufferSize > kPreferDuplicateOverRenderPassBreakMaxBufferSize)
184 {
185 return false;
186 }
187
188 return RenderPassUsesBufferForReadOnly(contextVk, buffer);
189 }
190
GetBufferUsageType(gl::BufferUsage usage)191 BufferUsageType GetBufferUsageType(gl::BufferUsage usage)
192 {
193 return (usage == gl::BufferUsage::DynamicDraw || usage == gl::BufferUsage::DynamicCopy ||
194 usage == gl::BufferUsage::DynamicRead)
195 ? BufferUsageType::Dynamic
196 : BufferUsageType::Static;
197 }
198
GetMemoryTypeIndex(ContextVk * contextVk,VkDeviceSize size,VkMemoryPropertyFlags memoryPropertyFlags,uint32_t * memoryTypeIndexOut)199 angle::Result GetMemoryTypeIndex(ContextVk *contextVk,
200 VkDeviceSize size,
201 VkMemoryPropertyFlags memoryPropertyFlags,
202 uint32_t *memoryTypeIndexOut)
203 {
204 RendererVk *renderer = contextVk->getRenderer();
205 const vk::Allocator &allocator = renderer->getAllocator();
206
207 bool persistentlyMapped = renderer->getFeatures().persistentlyMappedBuffers.enabled;
208 VkBufferUsageFlags defaultBufferUsageFlags = GetDefaultBufferUsageFlags(renderer);
209
210 VkBufferCreateInfo createInfo = {};
211 createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
212 createInfo.flags = 0;
213 createInfo.size = size;
214 createInfo.usage = defaultBufferUsageFlags;
215 createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
216 createInfo.queueFamilyIndexCount = 0;
217 createInfo.pQueueFamilyIndices = nullptr;
218
219 // Host visible is required, all other bits are preferred, (i.e., optional)
220 VkMemoryPropertyFlags requiredFlags =
221 (memoryPropertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
222 VkMemoryPropertyFlags preferredFlags =
223 (memoryPropertyFlags & (~VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
224
225 // Check that the allocation is not too large.
226 uint32_t memoryTypeIndex = 0;
227 ANGLE_VK_TRY(contextVk, allocator.findMemoryTypeIndexForBufferInfo(
228 createInfo, requiredFlags, preferredFlags, persistentlyMapped,
229 &memoryTypeIndex));
230 *memoryTypeIndexOut = memoryTypeIndex;
231
232 return angle::Result::Continue;
233 }
234
IsSelfCopy(const BufferDataSource & dataSource,const vk::BufferHelper & destination)235 bool IsSelfCopy(const BufferDataSource &dataSource, const vk::BufferHelper &destination)
236 {
237 return dataSource.data == nullptr &&
238 dataSource.buffer->getBufferSerial() == destination.getBufferSerial();
239 }
240 } // namespace
241
242 // ConversionBuffer implementation.
ConversionBuffer(RendererVk * renderer,VkBufferUsageFlags usageFlags,size_t initialSize,size_t alignment,bool hostVisible)243 ConversionBuffer::ConversionBuffer(RendererVk *renderer,
244 VkBufferUsageFlags usageFlags,
245 size_t initialSize,
246 size_t alignment,
247 bool hostVisible)
248 : dirty(true)
249 {
250 data = std::make_unique<vk::BufferHelper>();
251 }
252
~ConversionBuffer()253 ConversionBuffer::~ConversionBuffer()
254 {
255 ASSERT(!data || !data->valid());
256 }
257
258 ConversionBuffer::ConversionBuffer(ConversionBuffer &&other) = default;
259
260 // BufferVk::VertexConversionBuffer implementation.
VertexConversionBuffer(RendererVk * renderer,angle::FormatID formatIDIn,GLuint strideIn,size_t offsetIn,bool hostVisible)261 BufferVk::VertexConversionBuffer::VertexConversionBuffer(RendererVk *renderer,
262 angle::FormatID formatIDIn,
263 GLuint strideIn,
264 size_t offsetIn,
265 bool hostVisible)
266 : ConversionBuffer(renderer,
267 vk::kVertexBufferUsageFlags,
268 kConvertedArrayBufferInitialSize,
269 vk::kVertexBufferAlignment,
270 hostVisible),
271 formatID(formatIDIn),
272 stride(strideIn),
273 offset(offsetIn)
274 {}
275
276 BufferVk::VertexConversionBuffer::VertexConversionBuffer(VertexConversionBuffer &&other) = default;
277
278 BufferVk::VertexConversionBuffer::~VertexConversionBuffer() = default;
279
280 // BufferVk implementation.
BufferVk(const gl::BufferState & state)281 BufferVk::BufferVk(const gl::BufferState &state)
282 : BufferImpl(state),
283 mClientBuffer(nullptr),
284 mMemoryTypeIndex(0),
285 mMemoryPropertyFlags(0),
286 mIsStagingBufferMapped(false),
287 mHasValidData(false),
288 mIsMappedForWrite(false),
289 mUsageType(BufferUsageType::Static),
290 mMappedOffset(0),
291 mMappedLength(0)
292 {}
293
~BufferVk()294 BufferVk::~BufferVk() {}
295
destroy(const gl::Context * context)296 void BufferVk::destroy(const gl::Context *context)
297 {
298 ContextVk *contextVk = vk::GetImpl(context);
299
300 release(contextVk);
301 }
302
release(ContextVk * contextVk)303 void BufferVk::release(ContextVk *contextVk)
304 {
305 RendererVk *renderer = contextVk->getRenderer();
306 if (mBuffer.valid())
307 {
308 mBuffer.releaseBufferAndDescriptorSetCache(contextVk);
309 }
310 if (mStagingBuffer.valid())
311 {
312 mStagingBuffer.release(renderer);
313 }
314
315 for (ConversionBuffer &buffer : mVertexConversionBuffers)
316 {
317 buffer.data->release(renderer);
318 }
319 mVertexConversionBuffers.clear();
320 }
321
setExternalBufferData(const gl::Context * context,gl::BufferBinding target,GLeglClientBufferEXT clientBuffer,size_t size,VkMemoryPropertyFlags memoryPropertyFlags)322 angle::Result BufferVk::setExternalBufferData(const gl::Context *context,
323 gl::BufferBinding target,
324 GLeglClientBufferEXT clientBuffer,
325 size_t size,
326 VkMemoryPropertyFlags memoryPropertyFlags)
327 {
328 ContextVk *contextVk = vk::GetImpl(context);
329
330 // Release and re-create the memory and buffer.
331 release(contextVk);
332
333 VkBufferCreateInfo createInfo = {};
334 createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
335 createInfo.flags = 0;
336 createInfo.size = size;
337 createInfo.usage = GetDefaultBufferUsageFlags(contextVk->getRenderer());
338 createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
339 createInfo.queueFamilyIndexCount = 0;
340 createInfo.pQueueFamilyIndices = nullptr;
341
342 return mBuffer.initExternal(contextVk, memoryPropertyFlags, createInfo, clientBuffer);
343 }
344
setDataWithUsageFlags(const gl::Context * context,gl::BufferBinding target,GLeglClientBufferEXT clientBuffer,const void * data,size_t size,gl::BufferUsage usage,GLbitfield flags)345 angle::Result BufferVk::setDataWithUsageFlags(const gl::Context *context,
346 gl::BufferBinding target,
347 GLeglClientBufferEXT clientBuffer,
348 const void *data,
349 size_t size,
350 gl::BufferUsage usage,
351 GLbitfield flags)
352 {
353 ContextVk *contextVk = vk::GetImpl(context);
354 VkMemoryPropertyFlags memoryPropertyFlags = 0;
355 bool persistentMapRequired = false;
356 const bool isExternalBuffer = clientBuffer != nullptr;
357
358 switch (usage)
359 {
360 case gl::BufferUsage::InvalidEnum:
361 {
362 // glBufferStorage API call
363 memoryPropertyFlags =
364 GetStorageMemoryType(contextVk->getRenderer(), flags, isExternalBuffer);
365 persistentMapRequired = (flags & GL_MAP_PERSISTENT_BIT_EXT) != 0;
366 break;
367 }
368 default:
369 {
370 // glBufferData API call
371 memoryPropertyFlags = GetPreferredMemoryType(contextVk->getRenderer(), target, usage);
372 break;
373 }
374 }
375
376 if (isExternalBuffer)
377 {
378 ANGLE_TRY(setExternalBufferData(context, target, clientBuffer, size, memoryPropertyFlags));
379 if (!mBuffer.isHostVisible())
380 {
381 // If external buffer's memory does not support host visible memory property, we cannot
382 // support a persistent map request.
383 ANGLE_VK_CHECK(contextVk, !persistentMapRequired, VK_ERROR_MEMORY_MAP_FAILED);
384 }
385
386 mClientBuffer = clientBuffer;
387
388 return angle::Result::Continue;
389 }
390 return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags, usage);
391 }
392
setData(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,gl::BufferUsage usage)393 angle::Result BufferVk::setData(const gl::Context *context,
394 gl::BufferBinding target,
395 const void *data,
396 size_t size,
397 gl::BufferUsage usage)
398 {
399 ContextVk *contextVk = vk::GetImpl(context);
400 // Assume host visible/coherent memory available.
401 VkMemoryPropertyFlags memoryPropertyFlags =
402 GetPreferredMemoryType(contextVk->getRenderer(), target, usage);
403 return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags, usage);
404 }
405
setDataWithMemoryType(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,VkMemoryPropertyFlags memoryPropertyFlags,gl::BufferUsage usage)406 angle::Result BufferVk::setDataWithMemoryType(const gl::Context *context,
407 gl::BufferBinding target,
408 const void *data,
409 size_t size,
410 VkMemoryPropertyFlags memoryPropertyFlags,
411 gl::BufferUsage usage)
412 {
413 ContextVk *contextVk = vk::GetImpl(context);
414 RendererVk *renderer = contextVk->getRenderer();
415
416 // Since the buffer is being entirely reinitialized, reset the valid-data flag. If the caller
417 // passed in data to fill the buffer, the flag will be updated when the data is copied to the
418 // buffer.
419 mHasValidData = false;
420
421 if (size == 0)
422 {
423 // Nothing to do.
424 return angle::Result::Continue;
425 }
426
427 const BufferUsageType usageType = GetBufferUsageType(usage);
428 const BufferUpdateType updateType =
429 calculateBufferUpdateTypeOnFullUpdate(renderer, size, memoryPropertyFlags, usageType, data);
430
431 if (updateType == BufferUpdateType::StorageRedefined)
432 {
433 mUsageType = usageType;
434 mMemoryPropertyFlags = memoryPropertyFlags;
435 ANGLE_TRY(GetMemoryTypeIndex(contextVk, size, memoryPropertyFlags, &mMemoryTypeIndex));
436 ANGLE_TRY(acquireBufferHelper(contextVk, size, mUsageType));
437 }
438
439 if (data != nullptr)
440 {
441 BufferDataSource dataSource = {};
442 dataSource.data = data;
443
444 // Handle full-buffer updates similarly to glBufferSubData
445 ANGLE_TRY(setDataImpl(contextVk, size, dataSource, size, 0, updateType));
446 }
447
448 return angle::Result::Continue;
449 }
450
setSubData(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,size_t offset)451 angle::Result BufferVk::setSubData(const gl::Context *context,
452 gl::BufferBinding target,
453 const void *data,
454 size_t size,
455 size_t offset)
456 {
457 ASSERT(mBuffer.valid());
458
459 BufferDataSource dataSource = {};
460 dataSource.data = data;
461
462 ContextVk *contextVk = vk::GetImpl(context);
463 return setDataImpl(contextVk, static_cast<size_t>(mState.getSize()), dataSource, size, offset,
464 BufferUpdateType::ContentsUpdate);
465 }
466
copySubData(const gl::Context * context,BufferImpl * source,GLintptr sourceOffset,GLintptr destOffset,GLsizeiptr size)467 angle::Result BufferVk::copySubData(const gl::Context *context,
468 BufferImpl *source,
469 GLintptr sourceOffset,
470 GLintptr destOffset,
471 GLsizeiptr size)
472 {
473 ASSERT(mBuffer.valid());
474
475 ContextVk *contextVk = vk::GetImpl(context);
476 BufferVk *sourceVk = GetAs<BufferVk>(source);
477
478 BufferDataSource dataSource = {};
479 dataSource.buffer = &sourceVk->getBuffer();
480 dataSource.bufferOffset = static_cast<VkDeviceSize>(sourceOffset);
481
482 ASSERT(dataSource.buffer->valid());
483
484 return setDataImpl(contextVk, static_cast<size_t>(mState.getSize()), dataSource, size,
485 destOffset, BufferUpdateType::ContentsUpdate);
486 }
487
allocStagingBuffer(ContextVk * contextVk,vk::MemoryCoherency coherency,VkDeviceSize size,uint8_t ** mapPtr)488 angle::Result BufferVk::allocStagingBuffer(ContextVk *contextVk,
489 vk::MemoryCoherency coherency,
490 VkDeviceSize size,
491 uint8_t **mapPtr)
492 {
493 ASSERT(!mIsStagingBufferMapped);
494
495 if (mStagingBuffer.valid())
496 {
497 if (size <= mStagingBuffer.getSize() &&
498 (coherency == vk::MemoryCoherency::Coherent) == mStagingBuffer.isCoherent() &&
499 contextVk->getRenderer()->hasResourceUseFinished(mStagingBuffer.getResourceUse()))
500 {
501 // If size is big enough and it is idle, then just reuse the existing staging buffer
502 *mapPtr = mStagingBuffer.getMappedMemory();
503 mIsStagingBufferMapped = true;
504 return angle::Result::Continue;
505 }
506 mStagingBuffer.release(contextVk->getRenderer());
507 }
508
509 ANGLE_TRY(
510 mStagingBuffer.allocateForCopyBuffer(contextVk, static_cast<size_t>(size), coherency));
511 *mapPtr = mStagingBuffer.getMappedMemory();
512 mIsStagingBufferMapped = true;
513
514 return angle::Result::Continue;
515 }
516
flushStagingBuffer(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize size)517 angle::Result BufferVk::flushStagingBuffer(ContextVk *contextVk,
518 VkDeviceSize offset,
519 VkDeviceSize size)
520 {
521 RendererVk *renderer = contextVk->getRenderer();
522
523 ASSERT(mIsStagingBufferMapped);
524 ASSERT(mStagingBuffer.valid());
525
526 if (!mStagingBuffer.isCoherent())
527 {
528 ANGLE_TRY(mStagingBuffer.flush(renderer));
529 }
530
531 // Enqueue a copy command on the GPU.
532 VkBufferCopy copyRegion = {mStagingBuffer.getOffset(), mBuffer.getOffset() + offset, size};
533 ANGLE_TRY(mBuffer.copyFromBuffer(contextVk, &mStagingBuffer, 1, ©Region));
534
535 return angle::Result::Continue;
536 }
537
handleDeviceLocalBufferMap(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize size,uint8_t ** mapPtr)538 angle::Result BufferVk::handleDeviceLocalBufferMap(ContextVk *contextVk,
539 VkDeviceSize offset,
540 VkDeviceSize size,
541 uint8_t **mapPtr)
542 {
543 ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::Coherent, size, mapPtr));
544
545 // Copy data from device local buffer to host visible staging buffer.
546 VkBufferCopy copyRegion = {mBuffer.getOffset() + offset, mStagingBuffer.getOffset(), size};
547 ANGLE_TRY(mStagingBuffer.copyFromBuffer(contextVk, &mBuffer, 1, ©Region));
548 ANGLE_TRY(mStagingBuffer.waitForIdle(contextVk, "GPU stall due to mapping device local buffer",
549 RenderPassClosureReason::DeviceLocalBufferMap));
550 // Because the buffer is coherent, no need to call invalidate here.
551
552 return angle::Result::Continue;
553 }
554
map(const gl::Context * context,GLenum access,void ** mapPtr)555 angle::Result BufferVk::map(const gl::Context *context, GLenum access, void **mapPtr)
556 {
557 ASSERT(mBuffer.valid());
558 ASSERT(access == GL_WRITE_ONLY_OES);
559
560 return mapImpl(vk::GetImpl(context), GL_MAP_WRITE_BIT, mapPtr);
561 }
562
mapRange(const gl::Context * context,size_t offset,size_t length,GLbitfield access,void ** mapPtr)563 angle::Result BufferVk::mapRange(const gl::Context *context,
564 size_t offset,
565 size_t length,
566 GLbitfield access,
567 void **mapPtr)
568 {
569 ANGLE_TRACE_EVENT0("gpu.angle", "BufferVk::mapRange");
570 return mapRangeImpl(vk::GetImpl(context), offset, length, access, mapPtr);
571 }
572
mapImpl(ContextVk * contextVk,GLbitfield access,void ** mapPtr)573 angle::Result BufferVk::mapImpl(ContextVk *contextVk, GLbitfield access, void **mapPtr)
574 {
575 return mapRangeImpl(contextVk, 0, static_cast<VkDeviceSize>(mState.getSize()), access, mapPtr);
576 }
577
ghostMappedBuffer(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize length,GLbitfield access,void ** mapPtr)578 angle::Result BufferVk::ghostMappedBuffer(ContextVk *contextVk,
579 VkDeviceSize offset,
580 VkDeviceSize length,
581 GLbitfield access,
582 void **mapPtr)
583 {
584 // We shouldn't get here if it is external memory
585 ASSERT(!isExternalBuffer());
586
587 ++contextVk->getPerfCounters().buffersGhosted;
588
589 // If we are creating a new buffer because the GPU is using it as read-only, then we
590 // also need to copy the contents of the previous buffer into the new buffer, in
591 // case the caller only updates a portion of the new buffer.
592 vk::BufferHelper src = std::move(mBuffer);
593 ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
594 BufferUsageType::Dynamic));
595
596 // Before returning the new buffer, map the previous buffer and copy its entire
597 // contents into the new buffer.
598 uint8_t *srcMapPtr = nullptr;
599 uint8_t *dstMapPtr = nullptr;
600 ANGLE_TRY(src.map(contextVk, &srcMapPtr));
601 ANGLE_TRY(mBuffer.map(contextVk, &dstMapPtr));
602
603 ASSERT(src.isCoherent());
604 ASSERT(mBuffer.isCoherent());
605
606 // No need to copy over [offset, offset + length), just around it
607 if ((access & GL_MAP_INVALIDATE_RANGE_BIT) != 0)
608 {
609 if (offset != 0)
610 {
611 memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(offset));
612 }
613 size_t totalSize = static_cast<size_t>(mState.getSize());
614 size_t remainingStart = static_cast<size_t>(offset + length);
615 size_t remainingSize = totalSize - remainingStart;
616 if (remainingSize != 0)
617 {
618 memcpy(dstMapPtr + remainingStart, srcMapPtr + remainingStart, remainingSize);
619 }
620 }
621 else
622 {
623 memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(mState.getSize()));
624 }
625
626 src.releaseBufferAndDescriptorSetCache(contextVk);
627
628 // Return the already mapped pointer with the offset adjustment to avoid the call to unmap().
629 *mapPtr = dstMapPtr + offset;
630
631 return angle::Result::Continue;
632 }
633
mapRangeImpl(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize length,GLbitfield access,void ** mapPtr)634 angle::Result BufferVk::mapRangeImpl(ContextVk *contextVk,
635 VkDeviceSize offset,
636 VkDeviceSize length,
637 GLbitfield access,
638 void **mapPtr)
639 {
640 RendererVk *renderer = contextVk->getRenderer();
641 ASSERT(mBuffer.valid());
642
643 // Record map call parameters in case this call is from angle internal (the access/offset/length
644 // will be inconsistent from mState).
645 mIsMappedForWrite = (access & GL_MAP_WRITE_BIT) != 0;
646 mMappedOffset = offset;
647 mMappedLength = length;
648
649 uint8_t **mapPtrBytes = reinterpret_cast<uint8_t **>(mapPtr);
650 bool hostVisible = mBuffer.isHostVisible();
651
652 // MAP_UNSYNCHRONIZED_BIT, so immediately map.
653 if ((access & GL_MAP_UNSYNCHRONIZED_BIT) != 0)
654 {
655 if (hostVisible)
656 {
657 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
658 }
659 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
660 }
661
662 // Read case
663 if ((access & GL_MAP_WRITE_BIT) == 0)
664 {
665 // If app is not going to write, all we need is to ensure GPU write is finished.
666 // Concurrent reads from CPU and GPU is allowed.
667 if (!renderer->hasResourceUseFinished(mBuffer.getWriteResourceUse()))
668 {
669 // If there are unflushed write commands for the resource, flush them.
670 if (contextVk->hasUnsubmittedUse(mBuffer.getWriteResourceUse()))
671 {
672 ANGLE_TRY(contextVk->flushImpl(nullptr, nullptr,
673 RenderPassClosureReason::BufferWriteThenMap));
674 }
675 ANGLE_TRY(renderer->finishResourceUse(contextVk, mBuffer.getWriteResourceUse()));
676 }
677 if (hostVisible)
678 {
679 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
680 }
681 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
682 }
683
684 // Write case
685 if (!hostVisible)
686 {
687 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
688 }
689
690 // Write case, buffer not in use.
691 if (isExternalBuffer() || !isCurrentlyInUse(contextVk->getRenderer()))
692 {
693 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
694 }
695
696 // Write case, buffer in use.
697 //
698 // Here, we try to map the buffer, but it's busy. Instead of waiting for the GPU to
699 // finish, we just allocate a new buffer if:
700 // 1.) Caller has told us it doesn't care about previous contents, or
701 // 2.) The GPU won't write to the buffer.
702
703 bool rangeInvalidate = (access & GL_MAP_INVALIDATE_RANGE_BIT) != 0;
704 bool entireBufferInvalidated =
705 ((access & GL_MAP_INVALIDATE_BUFFER_BIT) != 0) ||
706 (rangeInvalidate && offset == 0 && static_cast<VkDeviceSize>(mState.getSize()) == length);
707
708 if (entireBufferInvalidated)
709 {
710 ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
711 BufferUsageType::Dynamic));
712 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
713 }
714
715 bool smallMapRange = (length < static_cast<VkDeviceSize>(mState.getSize()) / 2);
716
717 if (smallMapRange && rangeInvalidate)
718 {
719 ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::NonCoherent,
720 static_cast<size_t>(length), mapPtrBytes));
721 return angle::Result::Continue;
722 }
723
724 if (renderer->hasResourceUseFinished(mBuffer.getWriteResourceUse()))
725 {
726 // This will keep the new buffer mapped and update mapPtr, so return immediately.
727 return ghostMappedBuffer(contextVk, offset, length, access, mapPtr);
728 }
729
730 // Write case (worst case, buffer in use for write)
731 ANGLE_TRY(mBuffer.waitForIdle(contextVk, "GPU stall due to mapping buffer in use by the GPU",
732 RenderPassClosureReason::BufferInUseWhenSynchronizedMap));
733 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
734 }
735
unmap(const gl::Context * context,GLboolean * result)736 angle::Result BufferVk::unmap(const gl::Context *context, GLboolean *result)
737 {
738 ANGLE_TRY(unmapImpl(vk::GetImpl(context)));
739
740 // This should be false if the contents have been corrupted through external means. Vulkan
741 // doesn't provide such information.
742 *result = true;
743
744 return angle::Result::Continue;
745 }
746
unmapImpl(ContextVk * contextVk)747 angle::Result BufferVk::unmapImpl(ContextVk *contextVk)
748 {
749 ASSERT(mBuffer.valid());
750
751 if (mIsStagingBufferMapped)
752 {
753 ASSERT(mStagingBuffer.valid());
754 // The buffer is device local or optimization of small range map.
755 if (mIsMappedForWrite)
756 {
757 ANGLE_TRY(flushStagingBuffer(contextVk, mMappedOffset, mMappedLength));
758 }
759
760 mIsStagingBufferMapped = false;
761 }
762 else
763 {
764 ASSERT(mBuffer.isHostVisible());
765 mBuffer.unmap(contextVk->getRenderer());
766 }
767
768 if (mIsMappedForWrite)
769 {
770 dataUpdated();
771 }
772
773 // Reset the mapping parameters
774 mIsMappedForWrite = false;
775 mMappedOffset = 0;
776 mMappedLength = 0;
777
778 return angle::Result::Continue;
779 }
780
getSubData(const gl::Context * context,GLintptr offset,GLsizeiptr size,void * outData)781 angle::Result BufferVk::getSubData(const gl::Context *context,
782 GLintptr offset,
783 GLsizeiptr size,
784 void *outData)
785 {
786 ASSERT(offset + size <= getSize());
787 ASSERT(mBuffer.valid());
788 ContextVk *contextVk = vk::GetImpl(context);
789 void *mapPtr;
790 ANGLE_TRY(mapRangeImpl(contextVk, offset, size, GL_MAP_READ_BIT, &mapPtr));
791 memcpy(outData, mapPtr, size);
792 return unmapImpl(contextVk);
793 }
794
getIndexRange(const gl::Context * context,gl::DrawElementsType type,size_t offset,size_t count,bool primitiveRestartEnabled,gl::IndexRange * outRange)795 angle::Result BufferVk::getIndexRange(const gl::Context *context,
796 gl::DrawElementsType type,
797 size_t offset,
798 size_t count,
799 bool primitiveRestartEnabled,
800 gl::IndexRange *outRange)
801 {
802 ContextVk *contextVk = vk::GetImpl(context);
803 RendererVk *renderer = contextVk->getRenderer();
804
805 // This is a workaround for the mock ICD not implementing buffer memory state.
806 // Could be removed if https://github.com/KhronosGroup/Vulkan-Tools/issues/84 is fixed.
807 if (renderer->isMockICDEnabled())
808 {
809 outRange->start = 0;
810 outRange->end = 0;
811 return angle::Result::Continue;
812 }
813
814 ANGLE_TRACE_EVENT0("gpu.angle", "BufferVk::getIndexRange");
815
816 void *mapPtr;
817 ANGLE_TRY(mapRangeImpl(contextVk, offset, getSize(), GL_MAP_READ_BIT, &mapPtr));
818 *outRange = gl::ComputeIndexRange(type, mapPtr, count, primitiveRestartEnabled);
819 ANGLE_TRY(unmapImpl(contextVk));
820
821 return angle::Result::Continue;
822 }
823
updateBuffer(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset)824 angle::Result BufferVk::updateBuffer(ContextVk *contextVk,
825 size_t bufferSize,
826 const BufferDataSource &dataSource,
827 size_t updateSize,
828 size_t updateOffset)
829 {
830 // To copy on the CPU, destination must be host-visible. The source should also be either a CPU
831 // pointer or other a host-visible buffer that is not being written to by the GPU.
832 const bool shouldCopyOnCPU =
833 mBuffer.isHostVisible() &&
834 (dataSource.data != nullptr ||
835 ShouldUseCPUToCopyData(contextVk, *dataSource.buffer, updateSize, bufferSize));
836
837 if (shouldCopyOnCPU)
838 {
839 ANGLE_TRY(directUpdate(contextVk, dataSource, updateSize, updateOffset));
840 }
841 else
842 {
843 ANGLE_TRY(stagedUpdate(contextVk, dataSource, updateSize, updateOffset));
844 }
845 return angle::Result::Continue;
846 }
847
directUpdate(ContextVk * contextVk,const BufferDataSource & dataSource,size_t size,size_t offset)848 angle::Result BufferVk::directUpdate(ContextVk *contextVk,
849 const BufferDataSource &dataSource,
850 size_t size,
851 size_t offset)
852 {
853 RendererVk *renderer = contextVk->getRenderer();
854 uint8_t *srcPointerMapped = nullptr;
855 const uint8_t *srcPointer = nullptr;
856 uint8_t *dstPointer = nullptr;
857
858 // Map the destination buffer.
859 ASSERT(mBuffer.isHostVisible());
860 ANGLE_TRY(mBuffer.mapWithOffset(contextVk, &dstPointer, offset));
861 ASSERT(dstPointer);
862
863 // If source data is coming from a buffer, map it. If this is a self-copy, avoid double-mapping
864 // the buffer.
865 if (dataSource.data != nullptr)
866 {
867 srcPointer = static_cast<const uint8_t *>(dataSource.data);
868 }
869 else
870 {
871 ANGLE_TRY(dataSource.buffer->mapWithOffset(contextVk, &srcPointerMapped,
872 static_cast<size_t>(dataSource.bufferOffset)));
873 srcPointer = srcPointerMapped;
874 }
875
876 memcpy(dstPointer, srcPointer, size);
877
878 // Unmap the destination and source buffers if applicable.
879 //
880 // If the buffer has dynamic usage then the intent is frequent client side updates to the
881 // buffer. Don't CPU unmap the buffer, we will take care of unmapping when releasing the buffer
882 // to either the renderer or mBufferFreeList.
883 if (GetBufferUsageType(mState.getUsage()) == BufferUsageType::Static)
884 {
885 mBuffer.unmap(renderer);
886 }
887 ASSERT(mBuffer.isCoherent());
888
889 if (srcPointerMapped != nullptr)
890 {
891 dataSource.buffer->unmap(renderer);
892 }
893
894 return angle::Result::Continue;
895 }
896
stagedUpdate(ContextVk * contextVk,const BufferDataSource & dataSource,size_t size,size_t offset)897 angle::Result BufferVk::stagedUpdate(ContextVk *contextVk,
898 const BufferDataSource &dataSource,
899 size_t size,
900 size_t offset)
901 {
902 // If data is coming from a CPU pointer, stage it in a temporary staging buffer.
903 // Otherwise, do a GPU copy directly from the given buffer.
904 if (dataSource.data != nullptr)
905 {
906 uint8_t *mapPointer = nullptr;
907 ANGLE_TRY(
908 allocStagingBuffer(contextVk, vk::MemoryCoherency::NonCoherent, size, &mapPointer));
909 memcpy(mapPointer, dataSource.data, size);
910 ANGLE_TRY(flushStagingBuffer(contextVk, offset, size));
911 mIsStagingBufferMapped = false;
912 }
913 else
914 {
915 // Check for self-dependency.
916 vk::CommandBufferAccess access;
917 if (dataSource.buffer->getBufferSerial() == mBuffer.getBufferSerial())
918 {
919 access.onBufferSelfCopy(&mBuffer);
920 }
921 else
922 {
923 access.onBufferTransferRead(dataSource.buffer);
924 access.onBufferTransferWrite(&mBuffer);
925 }
926
927 vk::OutsideRenderPassCommandBuffer *commandBuffer;
928 ANGLE_TRY(contextVk->getOutsideRenderPassCommandBuffer(access, &commandBuffer));
929
930 // Enqueue a copy command on the GPU.
931 const VkBufferCopy copyRegion = {dataSource.bufferOffset + dataSource.buffer->getOffset(),
932 static_cast<VkDeviceSize>(offset) + mBuffer.getOffset(),
933 static_cast<VkDeviceSize>(size)};
934
935 commandBuffer->copyBuffer(dataSource.buffer->getBuffer(), mBuffer.getBuffer(), 1,
936 ©Region);
937 }
938
939 return angle::Result::Continue;
940 }
941
acquireAndUpdate(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset,BufferUpdateType updateType)942 angle::Result BufferVk::acquireAndUpdate(ContextVk *contextVk,
943 size_t bufferSize,
944 const BufferDataSource &dataSource,
945 size_t updateSize,
946 size_t updateOffset,
947 BufferUpdateType updateType)
948 {
949 // We shouldn't get here if this is external memory
950 ASSERT(!isExternalBuffer());
951 // If StorageRedefined, we cannot use mState.getSize() to allocate a new buffer.
952 ASSERT(updateType != BufferUpdateType::StorageRedefined);
953 ASSERT(mBuffer.valid());
954 ASSERT(mBuffer.getSize() >= bufferSize);
955
956 // Here we acquire a new BufferHelper and directUpdate() the new buffer.
957 // If the subData size was less than the buffer's size we additionally enqueue
958 // a GPU copy of the remaining regions from the old mBuffer to the new one.
959 vk::BufferHelper prevBuffer;
960 size_t offsetAfterSubdata = (updateOffset + updateSize);
961 bool updateRegionBeforeSubData = mHasValidData && (updateOffset > 0);
962 bool updateRegionAfterSubData = mHasValidData && (offsetAfterSubdata < bufferSize);
963
964 uint8_t *prevMapPtrBeforeSubData = nullptr;
965 uint8_t *prevMapPtrAfterSubData = nullptr;
966 if (updateRegionBeforeSubData || updateRegionAfterSubData)
967 {
968 prevBuffer = std::move(mBuffer);
969
970 // The total bytes that we need to copy from old buffer to new buffer
971 size_t copySize = bufferSize - updateSize;
972
973 // If the buffer is host visible and the GPU is not writing to it, we use the CPU to do the
974 // copy. We need to save the source buffer pointer before we acquire a new buffer.
975 if (ShouldUseCPUToCopyData(contextVk, prevBuffer, copySize, bufferSize))
976 {
977 uint8_t *mapPointer = nullptr;
978 // prevBuffer buffer will be recycled (or released and unmapped) by acquireBufferHelper
979 ANGLE_TRY(prevBuffer.map(contextVk, &mapPointer));
980 ASSERT(mapPointer);
981 prevMapPtrBeforeSubData = mapPointer;
982 prevMapPtrAfterSubData = mapPointer + offsetAfterSubdata;
983 }
984 }
985
986 ANGLE_TRY(acquireBufferHelper(contextVk, bufferSize, BufferUsageType::Dynamic));
987 ANGLE_TRY(updateBuffer(contextVk, bufferSize, dataSource, updateSize, updateOffset));
988
989 constexpr int kMaxCopyRegions = 2;
990 angle::FixedVector<VkBufferCopy, kMaxCopyRegions> copyRegions;
991
992 if (updateRegionBeforeSubData)
993 {
994 if (prevMapPtrBeforeSubData)
995 {
996 BufferDataSource beforeSrc = {};
997 beforeSrc.data = prevMapPtrBeforeSubData;
998
999 ANGLE_TRY(directUpdate(contextVk, beforeSrc, updateOffset, 0));
1000 }
1001 else
1002 {
1003 copyRegions.push_back({prevBuffer.getOffset(), mBuffer.getOffset(), updateOffset});
1004 }
1005 }
1006
1007 if (updateRegionAfterSubData)
1008 {
1009 size_t copySize = bufferSize - offsetAfterSubdata;
1010 if (prevMapPtrAfterSubData)
1011 {
1012 BufferDataSource afterSrc = {};
1013 afterSrc.data = prevMapPtrAfterSubData;
1014
1015 ANGLE_TRY(directUpdate(contextVk, afterSrc, copySize, offsetAfterSubdata));
1016 }
1017 else
1018 {
1019 copyRegions.push_back({prevBuffer.getOffset() + offsetAfterSubdata,
1020 mBuffer.getOffset() + offsetAfterSubdata, copySize});
1021 }
1022 }
1023
1024 if (!copyRegions.empty())
1025 {
1026 ANGLE_TRY(mBuffer.copyFromBuffer(
1027 contextVk, &prevBuffer, static_cast<uint32_t>(copyRegions.size()), copyRegions.data()));
1028 }
1029
1030 if (prevBuffer.valid())
1031 {
1032 prevBuffer.releaseBufferAndDescriptorSetCache(contextVk);
1033 }
1034
1035 return angle::Result::Continue;
1036 }
1037
setDataImpl(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset,BufferUpdateType updateType)1038 angle::Result BufferVk::setDataImpl(ContextVk *contextVk,
1039 size_t bufferSize,
1040 const BufferDataSource &dataSource,
1041 size_t updateSize,
1042 size_t updateOffset,
1043 BufferUpdateType updateType)
1044 {
1045 // if the buffer is currently in use
1046 // if it isn't an external buffer and not a self-copy and sub data size meets threshold
1047 // acquire a new BufferHelper from the pool
1048 // else stage the update
1049 // else update the buffer directly
1050 if (isCurrentlyInUse(contextVk->getRenderer()))
1051 {
1052 // The acquire-and-update path creates a new buffer, which is sometimes more efficient than
1053 // trying to update the existing one. Firstly, this is not done in the following
1054 // situations:
1055 //
1056 // - For external buffers, the underlying storage cannot be reallocated.
1057 // - If storage has just been redefined, this path is not taken because a new buffer has
1058 // already been created by the caller. Besides, this path uses mState.getSize(), which the
1059 // frontend updates only after this call in situations where the storage may be redefined.
1060 // This could happen if the buffer memory is DEVICE_LOCAL and
1061 // renderer->getFeatures().allocateNonZeroMemory.enabled is true. In this case a
1062 // copyToBuffer is immediately issued after allocation and isCurrentlyInUse will be true.
1063 // - If this is a self copy through glCopyBufferSubData, |dataSource| will contain a
1064 // reference to |mBuffer|, in which case source information is lost after acquiring a new
1065 // buffer.
1066 //
1067 // Additionally, this path is taken only if either of the following conditions are true:
1068 //
1069 // - If BufferVk does not have any valid data. This means that there is no data to be
1070 // copied from the old buffer to the new one after acquiring it. This could happen when
1071 // the application calls glBufferData with the same size and we reuse the existing buffer
1072 // storage.
1073 // - If the buffer is used read-only in the current render pass. In this case, acquiring a
1074 // new buffer is preferred to avoid breaking the render pass.
1075 // - The update modifies a significant portion of the buffer
1076 // - The preferCPUForBufferSubData feature is enabled.
1077 //
1078 const bool canAcquireAndUpdate = !isExternalBuffer() &&
1079 updateType != BufferUpdateType::StorageRedefined &&
1080 !IsSelfCopy(dataSource, mBuffer);
1081 if (canAcquireAndUpdate &&
1082 (!mHasValidData || ShouldAvoidRenderPassBreakOnUpdate(contextVk, mBuffer, bufferSize) ||
1083 ShouldAllocateNewMemoryForUpdate(contextVk, updateSize, bufferSize)))
1084 {
1085 ANGLE_TRY(acquireAndUpdate(contextVk, bufferSize, dataSource, updateSize, updateOffset,
1086 updateType));
1087 }
1088 else
1089 {
1090 if (canAcquireAndUpdate && RenderPassUsesBufferForReadOnly(contextVk, mBuffer))
1091 {
1092 ANGLE_VK_PERF_WARNING(contextVk, GL_DEBUG_SEVERITY_LOW,
1093 "Breaking the render pass on small upload to large buffer");
1094 }
1095
1096 ANGLE_TRY(stagedUpdate(contextVk, dataSource, updateSize, updateOffset));
1097 }
1098 }
1099 else
1100 {
1101 ANGLE_TRY(updateBuffer(contextVk, bufferSize, dataSource, updateSize, updateOffset));
1102 }
1103
1104 // Update conversions
1105 dataUpdated();
1106
1107 return angle::Result::Continue;
1108 }
1109
getVertexConversionBuffer(RendererVk * renderer,angle::FormatID formatID,GLuint stride,size_t offset,bool hostVisible)1110 ConversionBuffer *BufferVk::getVertexConversionBuffer(RendererVk *renderer,
1111 angle::FormatID formatID,
1112 GLuint stride,
1113 size_t offset,
1114 bool hostVisible)
1115 {
1116 for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
1117 {
1118 if (buffer.formatID == formatID && buffer.stride == stride && buffer.offset == offset)
1119 {
1120 ASSERT(buffer.data && buffer.data->valid());
1121 return &buffer;
1122 }
1123 }
1124
1125 mVertexConversionBuffers.emplace_back(renderer, formatID, stride, offset, hostVisible);
1126 return &mVertexConversionBuffers.back();
1127 }
1128
dataUpdated()1129 void BufferVk::dataUpdated()
1130 {
1131 for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
1132 {
1133 buffer.dirty = true;
1134 }
1135 // Now we have valid data
1136 mHasValidData = true;
1137 }
1138
onDataChanged()1139 void BufferVk::onDataChanged()
1140 {
1141 dataUpdated();
1142 }
1143
acquireBufferHelper(ContextVk * contextVk,size_t sizeInBytes,BufferUsageType usageType)1144 angle::Result BufferVk::acquireBufferHelper(ContextVk *contextVk,
1145 size_t sizeInBytes,
1146 BufferUsageType usageType)
1147 {
1148 RendererVk *renderer = contextVk->getRenderer();
1149 size_t size = roundUpPow2(sizeInBytes, kBufferSizeGranularity);
1150 size_t alignment = renderer->getDefaultBufferAlignment();
1151
1152 if (mBuffer.valid())
1153 {
1154 mBuffer.releaseBufferAndDescriptorSetCache(contextVk);
1155 }
1156
1157 // Allocate the buffer directly
1158 ANGLE_TRY(mBuffer.initSuballocation(contextVk, mMemoryTypeIndex, size, alignment, usageType));
1159
1160 // Tell the observers (front end) that a new buffer was created, so the necessary
1161 // dirty bits can be set. This allows the buffer views pointing to the old buffer to
1162 // be recreated and point to the new buffer, along with updating the descriptor sets
1163 // to use the new buffer.
1164 onStateChange(angle::SubjectMessage::InternalMemoryAllocationChanged);
1165
1166 return angle::Result::Continue;
1167 }
1168
isCurrentlyInUse(RendererVk * renderer) const1169 bool BufferVk::isCurrentlyInUse(RendererVk *renderer) const
1170 {
1171 return !renderer->hasResourceUseFinished(mBuffer.getResourceUse());
1172 }
1173
1174 // When a buffer is being completely changed, calculate whether it's better to allocate a new buffer
1175 // or overwrite the existing one.
calculateBufferUpdateTypeOnFullUpdate(RendererVk * renderer,size_t size,VkMemoryPropertyFlags memoryPropertyFlags,BufferUsageType usageType,const void * data) const1176 BufferUpdateType BufferVk::calculateBufferUpdateTypeOnFullUpdate(
1177 RendererVk *renderer,
1178 size_t size,
1179 VkMemoryPropertyFlags memoryPropertyFlags,
1180 BufferUsageType usageType,
1181 const void *data) const
1182 {
1183 // 0-sized updates should be no-op'd before this call.
1184 ASSERT(size > 0);
1185
1186 // If there is no existing buffer, this cannot be a content update.
1187 if (!mBuffer.valid())
1188 {
1189 return BufferUpdateType::StorageRedefined;
1190 }
1191
1192 const bool inUseAndRespecifiedWithoutData = data == nullptr && isCurrentlyInUse(renderer);
1193 bool redefineStorage = shouldRedefineStorage(renderer, usageType, memoryPropertyFlags, size);
1194
1195 // Create a new buffer if the buffer is busy and it's being redefined without data.
1196 // Additionally, a new buffer is created if any of the parameters change (memory type, usage,
1197 // size).
1198 return redefineStorage || inUseAndRespecifiedWithoutData ? BufferUpdateType::StorageRedefined
1199 : BufferUpdateType::ContentsUpdate;
1200 }
1201
shouldRedefineStorage(RendererVk * renderer,BufferUsageType usageType,VkMemoryPropertyFlags memoryPropertyFlags,size_t size) const1202 bool BufferVk::shouldRedefineStorage(RendererVk *renderer,
1203 BufferUsageType usageType,
1204 VkMemoryPropertyFlags memoryPropertyFlags,
1205 size_t size) const
1206 {
1207 if (mUsageType != usageType)
1208 {
1209 return true;
1210 }
1211
1212 if (mMemoryPropertyFlags != memoryPropertyFlags)
1213 {
1214 return true;
1215 }
1216
1217 if (size > mBuffer.getSize())
1218 {
1219 return true;
1220 }
1221 else
1222 {
1223 size_t alignment = renderer->getDefaultBufferAlignment();
1224 size_t sizeInBytes = roundUpPow2(size, kBufferSizeGranularity);
1225 size_t alignedSize = roundUp(sizeInBytes, alignment);
1226 if (alignedSize != mBuffer.getSize())
1227 {
1228 return true;
1229 }
1230 }
1231
1232 return false;
1233 }
1234 } // namespace rx
1235