1 //
2 // Copyright 2016 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // BufferVk.cpp:
7 // Implements the class methods for BufferVk.
8 //
9
10 #include "libANGLE/renderer/vulkan/BufferVk.h"
11
12 #include "common/FixedVector.h"
13 #include "common/debug.h"
14 #include "common/mathutil.h"
15 #include "common/utilities.h"
16 #include "libANGLE/Context.h"
17 #include "libANGLE/renderer/vulkan/ContextVk.h"
18 #include "libANGLE/renderer/vulkan/vk_renderer.h"
19
20 namespace rx
21 {
GetDefaultBufferUsageFlags(vk::Renderer * renderer)22 VkBufferUsageFlags GetDefaultBufferUsageFlags(vk::Renderer *renderer)
23 {
24 // We could potentially use multiple backing buffers for different usages.
25 // For now keep a single buffer with all relevant usage flags.
26 VkBufferUsageFlags defaultBufferUsageFlags =
27 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
28 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
29 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
30 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
31 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
32 if (renderer->getFeatures().supportsTransformFeedbackExtension.enabled)
33 {
34 defaultBufferUsageFlags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
35 VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT;
36 }
37 return defaultBufferUsageFlags;
38 }
39
40 namespace
41 {
42 constexpr VkMemoryPropertyFlags kDeviceLocalFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
43 constexpr VkMemoryPropertyFlags kDeviceLocalHostCoherentFlags =
44 (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
45 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
46 constexpr VkMemoryPropertyFlags kHostCachedFlags =
47 (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
48 VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
49 constexpr VkMemoryPropertyFlags kHostUncachedFlags =
50 (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
51
52 // Vertex attribute buffers are used as storage buffers for conversion in compute, where access to
53 // the buffer is made in 4-byte chunks. Assume the size of the buffer is 4k+n where n is in [0, 3).
54 // On some hardware, reading 4 bytes from address 4k returns 0, making it impossible to read the
55 // last n bytes. By rounding up the buffer sizes to a multiple of 4, the problem is alleviated.
56 constexpr size_t kBufferSizeGranularity = 4;
57 static_assert(gl::isPow2(kBufferSizeGranularity), "use as alignment, must be power of two");
58
59 // Start with a fairly small buffer size. We can increase this dynamically as we convert more data.
60 constexpr size_t kConvertedArrayBufferInitialSize = 1024 * 8;
61
62 // Buffers that have a static usage pattern will be allocated in
63 // device local memory to speed up access to and from the GPU.
64 // Dynamic usage patterns or that are frequently mapped
65 // will now request host cached memory to speed up access from the CPU.
GetPreferredMemoryType(vk::Renderer * renderer,gl::BufferBinding target,gl::BufferUsage usage)66 VkMemoryPropertyFlags GetPreferredMemoryType(vk::Renderer *renderer,
67 gl::BufferBinding target,
68 gl::BufferUsage usage)
69 {
70 if (target == gl::BufferBinding::PixelUnpack)
71 {
72 return kHostCachedFlags;
73 }
74
75 switch (usage)
76 {
77 case gl::BufferUsage::StaticCopy:
78 case gl::BufferUsage::StaticDraw:
79 case gl::BufferUsage::StaticRead:
80 // For static usage, request a device local memory
81 return renderer->getFeatures().preferDeviceLocalMemoryHostVisible.enabled
82 ? kDeviceLocalHostCoherentFlags
83 : kDeviceLocalFlags;
84 case gl::BufferUsage::DynamicDraw:
85 case gl::BufferUsage::StreamDraw:
86 // For non-static usage where the CPU performs a write-only access, request
87 // a host uncached memory
88 return renderer->getFeatures().preferHostCachedForNonStaticBufferUsage.enabled
89 ? kHostCachedFlags
90 : kHostUncachedFlags;
91 case gl::BufferUsage::DynamicCopy:
92 case gl::BufferUsage::DynamicRead:
93 case gl::BufferUsage::StreamCopy:
94 case gl::BufferUsage::StreamRead:
95 // For all other types of usage, request a host cached memory
96 return kHostCachedFlags;
97 default:
98 UNREACHABLE();
99 return kHostCachedFlags;
100 }
101 }
102
GetStorageMemoryType(vk::Renderer * renderer,GLbitfield storageFlags,bool externalBuffer)103 VkMemoryPropertyFlags GetStorageMemoryType(vk::Renderer *renderer,
104 GLbitfield storageFlags,
105 bool externalBuffer)
106 {
107 const bool hasMapAccess =
108 (storageFlags & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT_EXT)) != 0;
109
110 if (renderer->getFeatures().preferDeviceLocalMemoryHostVisible.enabled)
111 {
112 const bool canUpdate = (storageFlags & GL_DYNAMIC_STORAGE_BIT_EXT) != 0;
113 if (canUpdate || hasMapAccess || externalBuffer)
114 {
115 // We currently allocate coherent memory for persistently mapped buffers.
116 // GL_EXT_buffer_storage allows non-coherent memory, but currently the implementation of
117 // |glMemoryBarrier(CLIENT_MAPPED_BUFFER_BARRIER_BIT_EXT)| relies on the mapping being
118 // coherent.
119 //
120 // If persistently mapped buffers ever use non-coherent memory, then said
121 // |glMemoryBarrier| call must result in |vkInvalidateMappedMemoryRanges| for all
122 // persistently mapped buffers.
123 return kDeviceLocalHostCoherentFlags;
124 }
125 return kDeviceLocalFlags;
126 }
127
128 return hasMapAccess ? kHostCachedFlags : kDeviceLocalFlags;
129 }
130
ShouldAllocateNewMemoryForUpdate(ContextVk * contextVk,size_t subDataSize,size_t bufferSize)131 bool ShouldAllocateNewMemoryForUpdate(ContextVk *contextVk, size_t subDataSize, size_t bufferSize)
132 {
133 // A sub data update with size > 50% of buffer size meets the threshold
134 // to acquire a new BufferHelper from the pool.
135 return contextVk->getRenderer()->getFeatures().preferCPUForBufferSubData.enabled ||
136 subDataSize > (bufferSize / 2);
137 }
138
ShouldUseCPUToCopyData(ContextVk * contextVk,const vk::BufferHelper & buffer,size_t copySize,size_t bufferSize)139 bool ShouldUseCPUToCopyData(ContextVk *contextVk,
140 const vk::BufferHelper &buffer,
141 size_t copySize,
142 size_t bufferSize)
143 {
144 vk::Renderer *renderer = contextVk->getRenderer();
145
146 // If the buffer is not host-visible, or if it's busy on the GPU, can't read from it from the
147 // CPU
148 if (!buffer.isHostVisible() || !renderer->hasResourceUseFinished(buffer.getWriteResourceUse()))
149 {
150 return false;
151 }
152
153 // For some GPUs (e.g. ARM) we always prefer using CPU to do copy instead of using the GPU to
154 // avoid pipeline bubbles. If the GPU is currently busy and data copy size is less than certain
155 // threshold, we choose to use CPU to do the copy over GPU to achieve better parallelism.
156 return renderer->getFeatures().preferCPUForBufferSubData.enabled ||
157 (renderer->isCommandQueueBusy() &&
158 copySize < renderer->getMaxCopyBytesUsingCPUWhenPreservingBufferData());
159 }
160
RenderPassUsesBufferForReadOnly(ContextVk * contextVk,const vk::BufferHelper & buffer)161 bool RenderPassUsesBufferForReadOnly(ContextVk *contextVk, const vk::BufferHelper &buffer)
162 {
163 if (!contextVk->hasActiveRenderPass())
164 {
165 return false;
166 }
167
168 vk::RenderPassCommandBufferHelper &renderPassCommands =
169 contextVk->getStartedRenderPassCommands();
170 return renderPassCommands.usesBuffer(buffer) && !renderPassCommands.usesBufferForWrite(buffer);
171 }
172
173 // If a render pass is open which uses the buffer in read-only mode, render pass break can be
174 // avoided by using acquireAndUpdate. This can be costly however if the update is very small, and
175 // is limited to platforms where render pass break is itself costly (i.e. tiled-based renderers).
ShouldAvoidRenderPassBreakOnUpdate(ContextVk * contextVk,const vk::BufferHelper & buffer,size_t bufferSize)176 bool ShouldAvoidRenderPassBreakOnUpdate(ContextVk *contextVk,
177 const vk::BufferHelper &buffer,
178 size_t bufferSize)
179 {
180 // Only avoid breaking the render pass if the buffer is not so big such that duplicating it
181 // would outweight the cost of breaking the render pass. A value of 1KB is temporary chosen as
182 // a heuristic, and can be adjusted when such a situation is encountered.
183 constexpr size_t kPreferDuplicateOverRenderPassBreakMaxBufferSize = 1024;
184 if (!contextVk->getFeatures().preferCPUForBufferSubData.enabled ||
185 bufferSize > kPreferDuplicateOverRenderPassBreakMaxBufferSize)
186 {
187 return false;
188 }
189
190 return RenderPassUsesBufferForReadOnly(contextVk, buffer);
191 }
192
GetBufferUsageType(gl::BufferUsage usage)193 BufferUsageType GetBufferUsageType(gl::BufferUsage usage)
194 {
195 return (usage == gl::BufferUsage::DynamicDraw || usage == gl::BufferUsage::DynamicCopy ||
196 usage == gl::BufferUsage::DynamicRead)
197 ? BufferUsageType::Dynamic
198 : BufferUsageType::Static;
199 }
200
GetMemoryTypeIndex(ContextVk * contextVk,VkDeviceSize size,VkMemoryPropertyFlags memoryPropertyFlags,uint32_t * memoryTypeIndexOut)201 angle::Result GetMemoryTypeIndex(ContextVk *contextVk,
202 VkDeviceSize size,
203 VkMemoryPropertyFlags memoryPropertyFlags,
204 uint32_t *memoryTypeIndexOut)
205 {
206 vk::Renderer *renderer = contextVk->getRenderer();
207 const vk::Allocator &allocator = renderer->getAllocator();
208
209 bool persistentlyMapped = renderer->getFeatures().persistentlyMappedBuffers.enabled;
210 VkBufferUsageFlags defaultBufferUsageFlags = GetDefaultBufferUsageFlags(renderer);
211
212 VkBufferCreateInfo createInfo = {};
213 createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
214 createInfo.flags = 0;
215 createInfo.size = size;
216 createInfo.usage = defaultBufferUsageFlags;
217 createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
218 createInfo.queueFamilyIndexCount = 0;
219 createInfo.pQueueFamilyIndices = nullptr;
220
221 // Host visible is required, all other bits are preferred, (i.e., optional)
222 VkMemoryPropertyFlags requiredFlags =
223 (memoryPropertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
224 VkMemoryPropertyFlags preferredFlags =
225 (memoryPropertyFlags & (~VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
226
227 // Check that the allocation is not too large.
228 uint32_t memoryTypeIndex = 0;
229 ANGLE_VK_TRY(contextVk, allocator.findMemoryTypeIndexForBufferInfo(
230 createInfo, requiredFlags, preferredFlags, persistentlyMapped,
231 &memoryTypeIndex));
232 *memoryTypeIndexOut = memoryTypeIndex;
233
234 return angle::Result::Continue;
235 }
236
IsSelfCopy(const BufferDataSource & dataSource,const vk::BufferHelper & destination)237 bool IsSelfCopy(const BufferDataSource &dataSource, const vk::BufferHelper &destination)
238 {
239 return dataSource.data == nullptr &&
240 dataSource.buffer->getBufferSerial() == destination.getBufferSerial();
241 }
242
CopyBuffers(ContextVk * contextVk,vk::BufferHelper * srcBuffer,vk::BufferHelper * dstBuffer,uint32_t regionCount,const VkBufferCopy * copyRegions)243 angle::Result CopyBuffers(ContextVk *contextVk,
244 vk::BufferHelper *srcBuffer,
245 vk::BufferHelper *dstBuffer,
246 uint32_t regionCount,
247 const VkBufferCopy *copyRegions)
248 {
249 ASSERT(srcBuffer->valid() && dstBuffer->valid());
250
251 // Enqueue a copy command on the GPU
252 vk::CommandBufferAccess access;
253 if (srcBuffer->getBufferSerial() == dstBuffer->getBufferSerial())
254 {
255 access.onBufferSelfCopy(srcBuffer);
256 }
257 else
258 {
259 access.onBufferTransferRead(srcBuffer);
260 access.onBufferTransferWrite(dstBuffer);
261 }
262
263 vk::OutsideRenderPassCommandBuffer *commandBuffer;
264 ANGLE_TRY(contextVk->getOutsideRenderPassCommandBuffer(access, &commandBuffer));
265
266 commandBuffer->copyBuffer(srcBuffer->getBuffer(), dstBuffer->getBuffer(), regionCount,
267 copyRegions);
268
269 return angle::Result::Continue;
270 }
271 } // namespace
272
273 // ConversionBuffer implementation.
ConversionBuffer(vk::Renderer * renderer,VkBufferUsageFlags usageFlags,size_t initialSize,size_t alignment,bool hostVisible)274 ConversionBuffer::ConversionBuffer(vk::Renderer *renderer,
275 VkBufferUsageFlags usageFlags,
276 size_t initialSize,
277 size_t alignment,
278 bool hostVisible)
279 : dirty(true)
280 {
281 data = std::make_unique<vk::BufferHelper>();
282 }
283
~ConversionBuffer()284 ConversionBuffer::~ConversionBuffer()
285 {
286 ASSERT(!data || !data->valid());
287 }
288
289 ConversionBuffer::ConversionBuffer(ConversionBuffer &&other) = default;
290
291 // BufferVk::VertexConversionBuffer implementation.
VertexConversionBuffer(vk::Renderer * renderer,angle::FormatID formatIDIn,GLuint strideIn,size_t offsetIn,bool hostVisible)292 BufferVk::VertexConversionBuffer::VertexConversionBuffer(vk::Renderer *renderer,
293 angle::FormatID formatIDIn,
294 GLuint strideIn,
295 size_t offsetIn,
296 bool hostVisible)
297 : ConversionBuffer(renderer,
298 vk::kVertexBufferUsageFlags,
299 kConvertedArrayBufferInitialSize,
300 vk::kVertexBufferAlignment,
301 hostVisible),
302 formatID(formatIDIn),
303 stride(strideIn),
304 offset(offsetIn)
305 {}
306
307 BufferVk::VertexConversionBuffer::VertexConversionBuffer(VertexConversionBuffer &&other) = default;
308
309 BufferVk::VertexConversionBuffer::~VertexConversionBuffer() = default;
310
311 // BufferVk implementation.
BufferVk(const gl::BufferState & state)312 BufferVk::BufferVk(const gl::BufferState &state)
313 : BufferImpl(state),
314 mClientBuffer(nullptr),
315 mMemoryTypeIndex(0),
316 mMemoryPropertyFlags(0),
317 mIsStagingBufferMapped(false),
318 mHasValidData(false),
319 mIsMappedForWrite(false),
320 mUsageType(BufferUsageType::Static),
321 mMappedOffset(0),
322 mMappedLength(0)
323 {}
324
~BufferVk()325 BufferVk::~BufferVk() {}
326
destroy(const gl::Context * context)327 void BufferVk::destroy(const gl::Context *context)
328 {
329 ContextVk *contextVk = vk::GetImpl(context);
330
331 (void)release(contextVk);
332 }
333
release(ContextVk * contextVk)334 angle::Result BufferVk::release(ContextVk *contextVk)
335 {
336 vk::Renderer *renderer = contextVk->getRenderer();
337 if (mBuffer.valid())
338 {
339 ANGLE_TRY(contextVk->releaseBufferAllocation(&mBuffer));
340 }
341 if (mStagingBuffer.valid())
342 {
343 mStagingBuffer.release(renderer);
344 }
345
346 for (ConversionBuffer &buffer : mVertexConversionBuffers)
347 {
348 buffer.data->release(renderer);
349 }
350 mVertexConversionBuffers.clear();
351
352 return angle::Result::Continue;
353 }
354
setExternalBufferData(const gl::Context * context,gl::BufferBinding target,GLeglClientBufferEXT clientBuffer,size_t size,VkMemoryPropertyFlags memoryPropertyFlags)355 angle::Result BufferVk::setExternalBufferData(const gl::Context *context,
356 gl::BufferBinding target,
357 GLeglClientBufferEXT clientBuffer,
358 size_t size,
359 VkMemoryPropertyFlags memoryPropertyFlags)
360 {
361 ContextVk *contextVk = vk::GetImpl(context);
362
363 // Release and re-create the memory and buffer.
364 ANGLE_TRY(release(contextVk));
365
366 VkBufferCreateInfo createInfo = {};
367 createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
368 createInfo.flags = 0;
369 createInfo.size = size;
370 createInfo.usage = GetDefaultBufferUsageFlags(contextVk->getRenderer());
371 createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
372 createInfo.queueFamilyIndexCount = 0;
373 createInfo.pQueueFamilyIndices = nullptr;
374
375 return mBuffer.initExternal(contextVk, memoryPropertyFlags, createInfo, clientBuffer);
376 }
377
setDataWithUsageFlags(const gl::Context * context,gl::BufferBinding target,GLeglClientBufferEXT clientBuffer,const void * data,size_t size,gl::BufferUsage usage,GLbitfield flags)378 angle::Result BufferVk::setDataWithUsageFlags(const gl::Context *context,
379 gl::BufferBinding target,
380 GLeglClientBufferEXT clientBuffer,
381 const void *data,
382 size_t size,
383 gl::BufferUsage usage,
384 GLbitfield flags)
385 {
386 ContextVk *contextVk = vk::GetImpl(context);
387 VkMemoryPropertyFlags memoryPropertyFlags = 0;
388 bool persistentMapRequired = false;
389 const bool isExternalBuffer = clientBuffer != nullptr;
390
391 switch (usage)
392 {
393 case gl::BufferUsage::InvalidEnum:
394 {
395 // glBufferStorage API call
396 memoryPropertyFlags =
397 GetStorageMemoryType(contextVk->getRenderer(), flags, isExternalBuffer);
398 persistentMapRequired = (flags & GL_MAP_PERSISTENT_BIT_EXT) != 0;
399 break;
400 }
401 default:
402 {
403 // glBufferData API call
404 memoryPropertyFlags = GetPreferredMemoryType(contextVk->getRenderer(), target, usage);
405 break;
406 }
407 }
408
409 if (isExternalBuffer)
410 {
411 ANGLE_TRY(setExternalBufferData(context, target, clientBuffer, size, memoryPropertyFlags));
412 if (!mBuffer.isHostVisible())
413 {
414 // If external buffer's memory does not support host visible memory property, we cannot
415 // support a persistent map request.
416 ANGLE_VK_CHECK(contextVk, !persistentMapRequired, VK_ERROR_MEMORY_MAP_FAILED);
417 }
418
419 mClientBuffer = clientBuffer;
420
421 return angle::Result::Continue;
422 }
423 return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags, usage);
424 }
425
setData(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,gl::BufferUsage usage)426 angle::Result BufferVk::setData(const gl::Context *context,
427 gl::BufferBinding target,
428 const void *data,
429 size_t size,
430 gl::BufferUsage usage)
431 {
432 ContextVk *contextVk = vk::GetImpl(context);
433 // Assume host visible/coherent memory available.
434 VkMemoryPropertyFlags memoryPropertyFlags =
435 GetPreferredMemoryType(contextVk->getRenderer(), target, usage);
436 return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags, usage);
437 }
438
setDataWithMemoryType(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,VkMemoryPropertyFlags memoryPropertyFlags,gl::BufferUsage usage)439 angle::Result BufferVk::setDataWithMemoryType(const gl::Context *context,
440 gl::BufferBinding target,
441 const void *data,
442 size_t size,
443 VkMemoryPropertyFlags memoryPropertyFlags,
444 gl::BufferUsage usage)
445 {
446 ContextVk *contextVk = vk::GetImpl(context);
447 vk::Renderer *renderer = contextVk->getRenderer();
448
449 // Since the buffer is being entirely reinitialized, reset the valid-data flag. If the caller
450 // passed in data to fill the buffer, the flag will be updated when the data is copied to the
451 // buffer.
452 mHasValidData = false;
453
454 if (size == 0)
455 {
456 // Nothing to do.
457 return angle::Result::Continue;
458 }
459
460 const BufferUsageType usageType = GetBufferUsageType(usage);
461 const BufferUpdateType updateType =
462 calculateBufferUpdateTypeOnFullUpdate(renderer, size, memoryPropertyFlags, usageType, data);
463
464 if (updateType == BufferUpdateType::StorageRedefined)
465 {
466 mUsageType = usageType;
467 mMemoryPropertyFlags = memoryPropertyFlags;
468 ANGLE_TRY(GetMemoryTypeIndex(contextVk, size, memoryPropertyFlags, &mMemoryTypeIndex));
469 ANGLE_TRY(acquireBufferHelper(contextVk, size, mUsageType));
470 }
471 else if (size != static_cast<size_t>(mState.getSize()))
472 {
473 if (mBuffer.onBufferUserSizeChange(renderer))
474 {
475 // If we have a dedicated VkBuffer created with user size, even if the storage is
476 // reused, we have to recreate that VkBuffer with user size when user size changes.
477 // When this happens, we must notify other objects that observing this buffer, such as
478 // vertex array. The reason vertex array is observing the buffer's storage change is
479 // because they uses VkBuffer. Now VkBuffer have changed, vertex array needs to
480 // re-process it just like storage has been reallocated.
481 onStateChange(angle::SubjectMessage::InternalMemoryAllocationChanged);
482 }
483 }
484
485 if (data != nullptr)
486 {
487 BufferDataSource dataSource = {};
488 dataSource.data = data;
489
490 // Handle full-buffer updates similarly to glBufferSubData
491 ANGLE_TRY(setDataImpl(contextVk, size, dataSource, size, 0, updateType));
492 }
493
494 return angle::Result::Continue;
495 }
496
setSubData(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,size_t offset)497 angle::Result BufferVk::setSubData(const gl::Context *context,
498 gl::BufferBinding target,
499 const void *data,
500 size_t size,
501 size_t offset)
502 {
503 ASSERT(mBuffer.valid());
504
505 BufferDataSource dataSource = {};
506 dataSource.data = data;
507
508 ContextVk *contextVk = vk::GetImpl(context);
509 return setDataImpl(contextVk, static_cast<size_t>(mState.getSize()), dataSource, size, offset,
510 BufferUpdateType::ContentsUpdate);
511 }
512
copySubData(const gl::Context * context,BufferImpl * source,GLintptr sourceOffset,GLintptr destOffset,GLsizeiptr size)513 angle::Result BufferVk::copySubData(const gl::Context *context,
514 BufferImpl *source,
515 GLintptr sourceOffset,
516 GLintptr destOffset,
517 GLsizeiptr size)
518 {
519 ASSERT(mBuffer.valid());
520
521 ContextVk *contextVk = vk::GetImpl(context);
522 BufferVk *sourceVk = GetAs<BufferVk>(source);
523
524 BufferDataSource dataSource = {};
525 dataSource.buffer = &sourceVk->getBuffer();
526 dataSource.bufferOffset = static_cast<VkDeviceSize>(sourceOffset);
527
528 ASSERT(dataSource.buffer->valid());
529
530 return setDataImpl(contextVk, static_cast<size_t>(mState.getSize()), dataSource, size,
531 destOffset, BufferUpdateType::ContentsUpdate);
532 }
533
allocStagingBuffer(ContextVk * contextVk,vk::MemoryCoherency coherency,VkDeviceSize size,uint8_t ** mapPtr)534 angle::Result BufferVk::allocStagingBuffer(ContextVk *contextVk,
535 vk::MemoryCoherency coherency,
536 VkDeviceSize size,
537 uint8_t **mapPtr)
538 {
539 ASSERT(!mIsStagingBufferMapped);
540
541 if (mStagingBuffer.valid())
542 {
543 if (size <= mStagingBuffer.getSize() && IsCached(coherency) == mStagingBuffer.isCached() &&
544 contextVk->getRenderer()->hasResourceUseFinished(mStagingBuffer.getResourceUse()))
545 {
546 // If size is big enough and it is idle, then just reuse the existing staging buffer
547 *mapPtr = mStagingBuffer.getMappedMemory();
548 mIsStagingBufferMapped = true;
549 return angle::Result::Continue;
550 }
551 mStagingBuffer.release(contextVk->getRenderer());
552 }
553
554 ANGLE_TRY(
555 contextVk->initBufferForBufferCopy(&mStagingBuffer, static_cast<size_t>(size), coherency));
556 *mapPtr = mStagingBuffer.getMappedMemory();
557 mIsStagingBufferMapped = true;
558
559 return angle::Result::Continue;
560 }
561
flushStagingBuffer(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize size)562 angle::Result BufferVk::flushStagingBuffer(ContextVk *contextVk,
563 VkDeviceSize offset,
564 VkDeviceSize size)
565 {
566 vk::Renderer *renderer = contextVk->getRenderer();
567
568 ASSERT(mIsStagingBufferMapped);
569 ASSERT(mStagingBuffer.valid());
570
571 if (!mStagingBuffer.isCoherent())
572 {
573 ANGLE_TRY(mStagingBuffer.flush(renderer));
574 }
575
576 VkBufferCopy copyRegion = {mStagingBuffer.getOffset(), mBuffer.getOffset() + offset, size};
577 ANGLE_TRY(CopyBuffers(contextVk, &mStagingBuffer, &mBuffer, 1, ©Region));
578
579 return angle::Result::Continue;
580 }
581
handleDeviceLocalBufferMap(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize size,uint8_t ** mapPtr)582 angle::Result BufferVk::handleDeviceLocalBufferMap(ContextVk *contextVk,
583 VkDeviceSize offset,
584 VkDeviceSize size,
585 uint8_t **mapPtr)
586 {
587 vk::Renderer *renderer = contextVk->getRenderer();
588 ANGLE_TRY(
589 allocStagingBuffer(contextVk, vk::MemoryCoherency::CachedPreferCoherent, size, mapPtr));
590 ANGLE_TRY(mStagingBuffer.flush(renderer));
591
592 // Copy data from device local buffer to host visible staging buffer.
593 VkBufferCopy copyRegion = {mBuffer.getOffset() + offset, mStagingBuffer.getOffset(), size};
594 ANGLE_TRY(CopyBuffers(contextVk, &mBuffer, &mStagingBuffer, 1, ©Region));
595 ANGLE_TRY(mStagingBuffer.waitForIdle(contextVk, "GPU stall due to mapping device local buffer",
596 RenderPassClosureReason::DeviceLocalBufferMap));
597 // Since coherent is prefer, we may end up getting non-coherent. Always call invalidate here (it
598 // will check memory flag before it actually calls into driver).
599 ANGLE_TRY(mStagingBuffer.invalidate(renderer));
600
601 return angle::Result::Continue;
602 }
603
map(const gl::Context * context,GLenum access,void ** mapPtr)604 angle::Result BufferVk::map(const gl::Context *context, GLenum access, void **mapPtr)
605 {
606 ASSERT(mBuffer.valid());
607 ASSERT(access == GL_WRITE_ONLY_OES);
608
609 return mapImpl(vk::GetImpl(context), GL_MAP_WRITE_BIT, mapPtr);
610 }
611
mapRange(const gl::Context * context,size_t offset,size_t length,GLbitfield access,void ** mapPtr)612 angle::Result BufferVk::mapRange(const gl::Context *context,
613 size_t offset,
614 size_t length,
615 GLbitfield access,
616 void **mapPtr)
617 {
618 ANGLE_TRACE_EVENT0("gpu.angle", "BufferVk::mapRange");
619 return mapRangeImpl(vk::GetImpl(context), offset, length, access, mapPtr);
620 }
621
mapImpl(ContextVk * contextVk,GLbitfield access,void ** mapPtr)622 angle::Result BufferVk::mapImpl(ContextVk *contextVk, GLbitfield access, void **mapPtr)
623 {
624 return mapRangeImpl(contextVk, 0, static_cast<VkDeviceSize>(mState.getSize()), access, mapPtr);
625 }
626
ghostMappedBuffer(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize length,GLbitfield access,void ** mapPtr)627 angle::Result BufferVk::ghostMappedBuffer(ContextVk *contextVk,
628 VkDeviceSize offset,
629 VkDeviceSize length,
630 GLbitfield access,
631 void **mapPtr)
632 {
633 // We shouldn't get here if it is external memory
634 ASSERT(!isExternalBuffer());
635
636 ++contextVk->getPerfCounters().buffersGhosted;
637
638 // If we are creating a new buffer because the GPU is using it as read-only, then we
639 // also need to copy the contents of the previous buffer into the new buffer, in
640 // case the caller only updates a portion of the new buffer.
641 vk::BufferHelper src = std::move(mBuffer);
642 ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
643 BufferUsageType::Dynamic));
644
645 // Before returning the new buffer, map the previous buffer and copy its entire
646 // contents into the new buffer.
647 uint8_t *srcMapPtr = nullptr;
648 uint8_t *dstMapPtr = nullptr;
649 ANGLE_TRY(src.map(contextVk, &srcMapPtr));
650 ANGLE_TRY(mBuffer.map(contextVk, &dstMapPtr));
651
652 ASSERT(src.isCoherent());
653 ASSERT(mBuffer.isCoherent());
654
655 // No need to copy over [offset, offset + length), just around it
656 if ((access & GL_MAP_INVALIDATE_RANGE_BIT) != 0)
657 {
658 if (offset != 0)
659 {
660 memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(offset));
661 }
662 size_t totalSize = static_cast<size_t>(mState.getSize());
663 size_t remainingStart = static_cast<size_t>(offset + length);
664 size_t remainingSize = totalSize - remainingStart;
665 if (remainingSize != 0)
666 {
667 memcpy(dstMapPtr + remainingStart, srcMapPtr + remainingStart, remainingSize);
668 }
669 }
670 else
671 {
672 memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(mState.getSize()));
673 }
674
675 ANGLE_TRY(contextVk->releaseBufferAllocation(&src));
676
677 // Return the already mapped pointer with the offset adjustment to avoid the call to unmap().
678 *mapPtr = dstMapPtr + offset;
679
680 return angle::Result::Continue;
681 }
682
mapRangeImpl(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize length,GLbitfield access,void ** mapPtr)683 angle::Result BufferVk::mapRangeImpl(ContextVk *contextVk,
684 VkDeviceSize offset,
685 VkDeviceSize length,
686 GLbitfield access,
687 void **mapPtr)
688 {
689 vk::Renderer *renderer = contextVk->getRenderer();
690 ASSERT(mBuffer.valid());
691
692 // Record map call parameters in case this call is from angle internal (the access/offset/length
693 // will be inconsistent from mState).
694 mIsMappedForWrite = (access & GL_MAP_WRITE_BIT) != 0;
695 mMappedOffset = offset;
696 mMappedLength = length;
697
698 uint8_t **mapPtrBytes = reinterpret_cast<uint8_t **>(mapPtr);
699 bool hostVisible = mBuffer.isHostVisible();
700
701 // MAP_UNSYNCHRONIZED_BIT, so immediately map.
702 if ((access & GL_MAP_UNSYNCHRONIZED_BIT) != 0)
703 {
704 if (hostVisible)
705 {
706 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
707 }
708 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
709 }
710
711 // Read case
712 if ((access & GL_MAP_WRITE_BIT) == 0)
713 {
714 // If app is not going to write, all we need is to ensure GPU write is finished.
715 // Concurrent reads from CPU and GPU is allowed.
716 if (!renderer->hasResourceUseFinished(mBuffer.getWriteResourceUse()))
717 {
718 // If there are unflushed write commands for the resource, flush them.
719 if (contextVk->hasUnsubmittedUse(mBuffer.getWriteResourceUse()))
720 {
721 ANGLE_TRY(contextVk->flushImpl(nullptr, nullptr,
722 RenderPassClosureReason::BufferWriteThenMap));
723 }
724 ANGLE_TRY(renderer->finishResourceUse(contextVk, mBuffer.getWriteResourceUse()));
725 }
726 if (hostVisible)
727 {
728 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
729 }
730 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
731 }
732
733 // Write case
734 if (!hostVisible)
735 {
736 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
737 }
738
739 // Write case, buffer not in use.
740 if (isExternalBuffer() || !isCurrentlyInUse(contextVk->getRenderer()))
741 {
742 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
743 }
744
745 // Write case, buffer in use.
746 //
747 // Here, we try to map the buffer, but it's busy. Instead of waiting for the GPU to
748 // finish, we just allocate a new buffer if:
749 // 1.) Caller has told us it doesn't care about previous contents, or
750 // 2.) The GPU won't write to the buffer.
751
752 bool rangeInvalidate = (access & GL_MAP_INVALIDATE_RANGE_BIT) != 0;
753 bool entireBufferInvalidated =
754 ((access & GL_MAP_INVALIDATE_BUFFER_BIT) != 0) ||
755 (rangeInvalidate && offset == 0 && static_cast<VkDeviceSize>(mState.getSize()) == length);
756
757 if (entireBufferInvalidated)
758 {
759 ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
760 BufferUsageType::Dynamic));
761 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
762 }
763
764 bool smallMapRange = (length < static_cast<VkDeviceSize>(mState.getSize()) / 2);
765
766 if (smallMapRange && rangeInvalidate)
767 {
768 ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::CachedNonCoherent,
769 static_cast<size_t>(length), mapPtrBytes));
770 return angle::Result::Continue;
771 }
772
773 if (renderer->hasResourceUseFinished(mBuffer.getWriteResourceUse()))
774 {
775 // This will keep the new buffer mapped and update mapPtr, so return immediately.
776 return ghostMappedBuffer(contextVk, offset, length, access, mapPtr);
777 }
778
779 // Write case (worst case, buffer in use for write)
780 ANGLE_TRY(mBuffer.waitForIdle(contextVk, "GPU stall due to mapping buffer in use by the GPU",
781 RenderPassClosureReason::BufferInUseWhenSynchronizedMap));
782 return mBuffer.mapWithOffset(contextVk, mapPtrBytes, static_cast<size_t>(offset));
783 }
784
unmap(const gl::Context * context,GLboolean * result)785 angle::Result BufferVk::unmap(const gl::Context *context, GLboolean *result)
786 {
787 ANGLE_TRY(unmapImpl(vk::GetImpl(context)));
788
789 // This should be false if the contents have been corrupted through external means. Vulkan
790 // doesn't provide such information.
791 *result = true;
792
793 return angle::Result::Continue;
794 }
795
unmapImpl(ContextVk * contextVk)796 angle::Result BufferVk::unmapImpl(ContextVk *contextVk)
797 {
798 ASSERT(mBuffer.valid());
799
800 if (mIsStagingBufferMapped)
801 {
802 ASSERT(mStagingBuffer.valid());
803 // The buffer is device local or optimization of small range map.
804 if (mIsMappedForWrite)
805 {
806 ANGLE_TRY(flushStagingBuffer(contextVk, mMappedOffset, mMappedLength));
807 }
808
809 mIsStagingBufferMapped = false;
810 }
811 else
812 {
813 ASSERT(mBuffer.isHostVisible());
814 mBuffer.unmap(contextVk->getRenderer());
815 }
816
817 if (mIsMappedForWrite)
818 {
819 dataUpdated();
820 }
821
822 // Reset the mapping parameters
823 mIsMappedForWrite = false;
824 mMappedOffset = 0;
825 mMappedLength = 0;
826
827 return angle::Result::Continue;
828 }
829
getSubData(const gl::Context * context,GLintptr offset,GLsizeiptr size,void * outData)830 angle::Result BufferVk::getSubData(const gl::Context *context,
831 GLintptr offset,
832 GLsizeiptr size,
833 void *outData)
834 {
835 ASSERT(offset + size <= getSize());
836 ASSERT(mBuffer.valid());
837 ContextVk *contextVk = vk::GetImpl(context);
838 void *mapPtr;
839 ANGLE_TRY(mapRangeImpl(contextVk, offset, size, GL_MAP_READ_BIT, &mapPtr));
840 memcpy(outData, mapPtr, size);
841 return unmapImpl(contextVk);
842 }
843
getIndexRange(const gl::Context * context,gl::DrawElementsType type,size_t offset,size_t count,bool primitiveRestartEnabled,gl::IndexRange * outRange)844 angle::Result BufferVk::getIndexRange(const gl::Context *context,
845 gl::DrawElementsType type,
846 size_t offset,
847 size_t count,
848 bool primitiveRestartEnabled,
849 gl::IndexRange *outRange)
850 {
851 ContextVk *contextVk = vk::GetImpl(context);
852 vk::Renderer *renderer = contextVk->getRenderer();
853
854 // This is a workaround for the mock ICD not implementing buffer memory state.
855 // Could be removed if https://github.com/KhronosGroup/Vulkan-Tools/issues/84 is fixed.
856 if (renderer->isMockICDEnabled())
857 {
858 outRange->start = 0;
859 outRange->end = 0;
860 return angle::Result::Continue;
861 }
862
863 ANGLE_TRACE_EVENT0("gpu.angle", "BufferVk::getIndexRange");
864
865 void *mapPtr;
866 ANGLE_TRY(mapRangeImpl(contextVk, offset, getSize(), GL_MAP_READ_BIT, &mapPtr));
867 *outRange = gl::ComputeIndexRange(type, mapPtr, count, primitiveRestartEnabled);
868 ANGLE_TRY(unmapImpl(contextVk));
869
870 return angle::Result::Continue;
871 }
872
updateBuffer(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset)873 angle::Result BufferVk::updateBuffer(ContextVk *contextVk,
874 size_t bufferSize,
875 const BufferDataSource &dataSource,
876 size_t updateSize,
877 size_t updateOffset)
878 {
879 // To copy on the CPU, destination must be host-visible. The source should also be either a CPU
880 // pointer or other a host-visible buffer that is not being written to by the GPU.
881 const bool shouldCopyOnCPU =
882 mBuffer.isHostVisible() &&
883 (dataSource.data != nullptr ||
884 ShouldUseCPUToCopyData(contextVk, *dataSource.buffer, updateSize, bufferSize));
885
886 if (shouldCopyOnCPU)
887 {
888 ANGLE_TRY(directUpdate(contextVk, dataSource, updateSize, updateOffset));
889 }
890 else
891 {
892 ANGLE_TRY(stagedUpdate(contextVk, dataSource, updateSize, updateOffset));
893 }
894 return angle::Result::Continue;
895 }
896
directUpdate(ContextVk * contextVk,const BufferDataSource & dataSource,size_t size,size_t offset)897 angle::Result BufferVk::directUpdate(ContextVk *contextVk,
898 const BufferDataSource &dataSource,
899 size_t size,
900 size_t offset)
901 {
902 vk::Renderer *renderer = contextVk->getRenderer();
903 uint8_t *srcPointerMapped = nullptr;
904 const uint8_t *srcPointer = nullptr;
905 uint8_t *dstPointer = nullptr;
906
907 // Map the destination buffer.
908 ASSERT(mBuffer.isHostVisible());
909 ANGLE_TRY(mBuffer.mapWithOffset(contextVk, &dstPointer, offset));
910 ASSERT(dstPointer);
911
912 // If source data is coming from a buffer, map it. If this is a self-copy, avoid double-mapping
913 // the buffer.
914 if (dataSource.data != nullptr)
915 {
916 srcPointer = static_cast<const uint8_t *>(dataSource.data);
917 }
918 else
919 {
920 ANGLE_TRY(dataSource.buffer->mapWithOffset(contextVk, &srcPointerMapped,
921 static_cast<size_t>(dataSource.bufferOffset)));
922 srcPointer = srcPointerMapped;
923 }
924
925 memcpy(dstPointer, srcPointer, size);
926
927 // External memory may end up with noncoherent
928 if (!mBuffer.isCoherent())
929 {
930 ANGLE_TRY(mBuffer.flush(renderer, offset, size));
931 }
932
933 // Unmap the destination and source buffers if applicable.
934 //
935 // If the buffer has dynamic usage then the intent is frequent client side updates to the
936 // buffer. Don't CPU unmap the buffer, we will take care of unmapping when releasing the buffer
937 // to either the renderer or mBufferFreeList.
938 if (GetBufferUsageType(mState.getUsage()) == BufferUsageType::Static)
939 {
940 mBuffer.unmap(renderer);
941 }
942
943 if (srcPointerMapped != nullptr)
944 {
945 dataSource.buffer->unmap(renderer);
946 }
947
948 return angle::Result::Continue;
949 }
950
stagedUpdate(ContextVk * contextVk,const BufferDataSource & dataSource,size_t size,size_t offset)951 angle::Result BufferVk::stagedUpdate(ContextVk *contextVk,
952 const BufferDataSource &dataSource,
953 size_t size,
954 size_t offset)
955 {
956 // If data is coming from a CPU pointer, stage it in a temporary staging buffer.
957 // Otherwise, do a GPU copy directly from the given buffer.
958 if (dataSource.data != nullptr)
959 {
960 uint8_t *mapPointer = nullptr;
961 ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::CachedNonCoherent, size,
962 &mapPointer));
963 memcpy(mapPointer, dataSource.data, size);
964 ANGLE_TRY(flushStagingBuffer(contextVk, offset, size));
965 mIsStagingBufferMapped = false;
966 }
967 else
968 {
969 // Check for self-dependency.
970 vk::CommandBufferAccess access;
971 if (dataSource.buffer->getBufferSerial() == mBuffer.getBufferSerial())
972 {
973 access.onBufferSelfCopy(&mBuffer);
974 }
975 else
976 {
977 access.onBufferTransferRead(dataSource.buffer);
978 access.onBufferTransferWrite(&mBuffer);
979 }
980
981 vk::OutsideRenderPassCommandBuffer *commandBuffer;
982 ANGLE_TRY(contextVk->getOutsideRenderPassCommandBuffer(access, &commandBuffer));
983
984 // Enqueue a copy command on the GPU.
985 const VkBufferCopy copyRegion = {dataSource.bufferOffset + dataSource.buffer->getOffset(),
986 static_cast<VkDeviceSize>(offset) + mBuffer.getOffset(),
987 static_cast<VkDeviceSize>(size)};
988
989 commandBuffer->copyBuffer(dataSource.buffer->getBuffer(), mBuffer.getBuffer(), 1,
990 ©Region);
991 }
992
993 return angle::Result::Continue;
994 }
995
acquireAndUpdate(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset,BufferUpdateType updateType)996 angle::Result BufferVk::acquireAndUpdate(ContextVk *contextVk,
997 size_t bufferSize,
998 const BufferDataSource &dataSource,
999 size_t updateSize,
1000 size_t updateOffset,
1001 BufferUpdateType updateType)
1002 {
1003 // We shouldn't get here if this is external memory
1004 ASSERT(!isExternalBuffer());
1005 // If StorageRedefined, we cannot use mState.getSize() to allocate a new buffer.
1006 ASSERT(updateType != BufferUpdateType::StorageRedefined);
1007 ASSERT(mBuffer.valid());
1008 ASSERT(mBuffer.getSize() >= bufferSize);
1009
1010 // Here we acquire a new BufferHelper and directUpdate() the new buffer.
1011 // If the subData size was less than the buffer's size we additionally enqueue
1012 // a GPU copy of the remaining regions from the old mBuffer to the new one.
1013 vk::BufferHelper prevBuffer;
1014 size_t offsetAfterSubdata = (updateOffset + updateSize);
1015 bool updateRegionBeforeSubData = mHasValidData && (updateOffset > 0);
1016 bool updateRegionAfterSubData = mHasValidData && (offsetAfterSubdata < bufferSize);
1017
1018 uint8_t *prevMapPtrBeforeSubData = nullptr;
1019 uint8_t *prevMapPtrAfterSubData = nullptr;
1020 if (updateRegionBeforeSubData || updateRegionAfterSubData)
1021 {
1022 prevBuffer = std::move(mBuffer);
1023
1024 // The total bytes that we need to copy from old buffer to new buffer
1025 size_t copySize = bufferSize - updateSize;
1026
1027 // If the buffer is host visible and the GPU is not writing to it, we use the CPU to do the
1028 // copy. We need to save the source buffer pointer before we acquire a new buffer.
1029 if (ShouldUseCPUToCopyData(contextVk, prevBuffer, copySize, bufferSize))
1030 {
1031 uint8_t *mapPointer = nullptr;
1032 // prevBuffer buffer will be recycled (or released and unmapped) by acquireBufferHelper
1033 ANGLE_TRY(prevBuffer.map(contextVk, &mapPointer));
1034 ASSERT(mapPointer);
1035 prevMapPtrBeforeSubData = mapPointer;
1036 prevMapPtrAfterSubData = mapPointer + offsetAfterSubdata;
1037 }
1038 }
1039
1040 ANGLE_TRY(acquireBufferHelper(contextVk, bufferSize, BufferUsageType::Dynamic));
1041 ANGLE_TRY(updateBuffer(contextVk, bufferSize, dataSource, updateSize, updateOffset));
1042
1043 constexpr int kMaxCopyRegions = 2;
1044 angle::FixedVector<VkBufferCopy, kMaxCopyRegions> copyRegions;
1045
1046 if (updateRegionBeforeSubData)
1047 {
1048 if (prevMapPtrBeforeSubData)
1049 {
1050 BufferDataSource beforeSrc = {};
1051 beforeSrc.data = prevMapPtrBeforeSubData;
1052
1053 ANGLE_TRY(directUpdate(contextVk, beforeSrc, updateOffset, 0));
1054 }
1055 else
1056 {
1057 copyRegions.push_back({prevBuffer.getOffset(), mBuffer.getOffset(), updateOffset});
1058 }
1059 }
1060
1061 if (updateRegionAfterSubData)
1062 {
1063 size_t copySize = bufferSize - offsetAfterSubdata;
1064 if (prevMapPtrAfterSubData)
1065 {
1066 BufferDataSource afterSrc = {};
1067 afterSrc.data = prevMapPtrAfterSubData;
1068
1069 ANGLE_TRY(directUpdate(contextVk, afterSrc, copySize, offsetAfterSubdata));
1070 }
1071 else
1072 {
1073 copyRegions.push_back({prevBuffer.getOffset() + offsetAfterSubdata,
1074 mBuffer.getOffset() + offsetAfterSubdata, copySize});
1075 }
1076 }
1077
1078 if (!copyRegions.empty())
1079 {
1080 ANGLE_TRY(CopyBuffers(contextVk, &prevBuffer, &mBuffer,
1081 static_cast<uint32_t>(copyRegions.size()), copyRegions.data()));
1082 }
1083
1084 if (prevBuffer.valid())
1085 {
1086 ANGLE_TRY(contextVk->releaseBufferAllocation(&prevBuffer));
1087 }
1088
1089 return angle::Result::Continue;
1090 }
1091
setDataImpl(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset,BufferUpdateType updateType)1092 angle::Result BufferVk::setDataImpl(ContextVk *contextVk,
1093 size_t bufferSize,
1094 const BufferDataSource &dataSource,
1095 size_t updateSize,
1096 size_t updateOffset,
1097 BufferUpdateType updateType)
1098 {
1099 // if the buffer is currently in use
1100 // if it isn't an external buffer and not a self-copy and sub data size meets threshold
1101 // acquire a new BufferHelper from the pool
1102 // else stage the update
1103 // else update the buffer directly
1104 if (isCurrentlyInUse(contextVk->getRenderer()))
1105 {
1106 // The acquire-and-update path creates a new buffer, which is sometimes more efficient than
1107 // trying to update the existing one. Firstly, this is not done in the following
1108 // situations:
1109 //
1110 // - For external buffers, the underlying storage cannot be reallocated.
1111 // - If storage has just been redefined, this path is not taken because a new buffer has
1112 // already been created by the caller. Besides, this path uses mState.getSize(), which the
1113 // frontend updates only after this call in situations where the storage may be redefined.
1114 // This could happen if the buffer memory is DEVICE_LOCAL and
1115 // renderer->getFeatures().allocateNonZeroMemory.enabled is true. In this case a
1116 // copyToBuffer is immediately issued after allocation and isCurrentlyInUse will be true.
1117 // - If this is a self copy through glCopyBufferSubData, |dataSource| will contain a
1118 // reference to |mBuffer|, in which case source information is lost after acquiring a new
1119 // buffer.
1120 //
1121 // Additionally, this path is taken only if either of the following conditions are true:
1122 //
1123 // - If BufferVk does not have any valid data. This means that there is no data to be
1124 // copied from the old buffer to the new one after acquiring it. This could happen when
1125 // the application calls glBufferData with the same size and we reuse the existing buffer
1126 // storage.
1127 // - If the buffer is used read-only in the current render pass. In this case, acquiring a
1128 // new buffer is preferred to avoid breaking the render pass.
1129 // - The update modifies a significant portion of the buffer
1130 // - The preferCPUForBufferSubData feature is enabled.
1131 //
1132 const bool canAcquireAndUpdate = !isExternalBuffer() &&
1133 updateType != BufferUpdateType::StorageRedefined &&
1134 !IsSelfCopy(dataSource, mBuffer);
1135 if (canAcquireAndUpdate &&
1136 (!mHasValidData || ShouldAvoidRenderPassBreakOnUpdate(contextVk, mBuffer, bufferSize) ||
1137 ShouldAllocateNewMemoryForUpdate(contextVk, updateSize, bufferSize)))
1138 {
1139 ANGLE_TRY(acquireAndUpdate(contextVk, bufferSize, dataSource, updateSize, updateOffset,
1140 updateType));
1141 }
1142 else
1143 {
1144 if (canAcquireAndUpdate && RenderPassUsesBufferForReadOnly(contextVk, mBuffer))
1145 {
1146 ANGLE_VK_PERF_WARNING(contextVk, GL_DEBUG_SEVERITY_LOW,
1147 "Breaking the render pass on small upload to large buffer");
1148 }
1149
1150 ANGLE_TRY(stagedUpdate(contextVk, dataSource, updateSize, updateOffset));
1151 }
1152 }
1153 else
1154 {
1155 ANGLE_TRY(updateBuffer(contextVk, bufferSize, dataSource, updateSize, updateOffset));
1156 }
1157
1158 // Update conversions
1159 dataUpdated();
1160
1161 return angle::Result::Continue;
1162 }
1163
getVertexConversionBuffer(vk::Renderer * renderer,angle::FormatID formatID,GLuint stride,size_t offset,bool hostVisible)1164 ConversionBuffer *BufferVk::getVertexConversionBuffer(vk::Renderer *renderer,
1165 angle::FormatID formatID,
1166 GLuint stride,
1167 size_t offset,
1168 bool hostVisible)
1169 {
1170 for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
1171 {
1172 if (buffer.formatID == formatID && buffer.stride == stride && buffer.offset == offset)
1173 {
1174 ASSERT(buffer.data && buffer.data->valid());
1175 return &buffer;
1176 }
1177 }
1178
1179 mVertexConversionBuffers.emplace_back(renderer, formatID, stride, offset, hostVisible);
1180 return &mVertexConversionBuffers.back();
1181 }
1182
dataUpdated()1183 void BufferVk::dataUpdated()
1184 {
1185 for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
1186 {
1187 buffer.dirty = true;
1188 }
1189 // Now we have valid data
1190 mHasValidData = true;
1191 }
1192
onDataChanged()1193 void BufferVk::onDataChanged()
1194 {
1195 dataUpdated();
1196 }
1197
acquireBufferHelper(ContextVk * contextVk,size_t sizeInBytes,BufferUsageType usageType)1198 angle::Result BufferVk::acquireBufferHelper(ContextVk *contextVk,
1199 size_t sizeInBytes,
1200 BufferUsageType usageType)
1201 {
1202 vk::Renderer *renderer = contextVk->getRenderer();
1203 size_t size = roundUpPow2(sizeInBytes, kBufferSizeGranularity);
1204 size_t alignment = renderer->getDefaultBufferAlignment();
1205
1206 if (mBuffer.valid())
1207 {
1208 ANGLE_TRY(contextVk->releaseBufferAllocation(&mBuffer));
1209 }
1210
1211 // Allocate the buffer directly
1212 ANGLE_TRY(
1213 contextVk->initBufferAllocation(&mBuffer, mMemoryTypeIndex, size, alignment, usageType));
1214
1215 // Tell the observers (front end) that a new buffer was created, so the necessary
1216 // dirty bits can be set. This allows the buffer views pointing to the old buffer to
1217 // be recreated and point to the new buffer, along with updating the descriptor sets
1218 // to use the new buffer.
1219 onStateChange(angle::SubjectMessage::InternalMemoryAllocationChanged);
1220
1221 return angle::Result::Continue;
1222 }
1223
isCurrentlyInUse(vk::Renderer * renderer) const1224 bool BufferVk::isCurrentlyInUse(vk::Renderer *renderer) const
1225 {
1226 return !renderer->hasResourceUseFinished(mBuffer.getResourceUse());
1227 }
1228
1229 // When a buffer is being completely changed, calculate whether it's better to allocate a new buffer
1230 // or overwrite the existing one.
calculateBufferUpdateTypeOnFullUpdate(vk::Renderer * renderer,size_t size,VkMemoryPropertyFlags memoryPropertyFlags,BufferUsageType usageType,const void * data) const1231 BufferUpdateType BufferVk::calculateBufferUpdateTypeOnFullUpdate(
1232 vk::Renderer *renderer,
1233 size_t size,
1234 VkMemoryPropertyFlags memoryPropertyFlags,
1235 BufferUsageType usageType,
1236 const void *data) const
1237 {
1238 // 0-sized updates should be no-op'd before this call.
1239 ASSERT(size > 0);
1240
1241 // If there is no existing buffer, this cannot be a content update.
1242 if (!mBuffer.valid())
1243 {
1244 return BufferUpdateType::StorageRedefined;
1245 }
1246
1247 const bool inUseAndRespecifiedWithoutData = data == nullptr && isCurrentlyInUse(renderer);
1248 bool redefineStorage = shouldRedefineStorage(renderer, usageType, memoryPropertyFlags, size);
1249
1250 // Create a new buffer if the buffer is busy and it's being redefined without data.
1251 // Additionally, a new buffer is created if any of the parameters change (memory type, usage,
1252 // size).
1253 return redefineStorage || inUseAndRespecifiedWithoutData ? BufferUpdateType::StorageRedefined
1254 : BufferUpdateType::ContentsUpdate;
1255 }
1256
shouldRedefineStorage(vk::Renderer * renderer,BufferUsageType usageType,VkMemoryPropertyFlags memoryPropertyFlags,size_t size) const1257 bool BufferVk::shouldRedefineStorage(vk::Renderer *renderer,
1258 BufferUsageType usageType,
1259 VkMemoryPropertyFlags memoryPropertyFlags,
1260 size_t size) const
1261 {
1262 if (mUsageType != usageType)
1263 {
1264 return true;
1265 }
1266
1267 if (mMemoryPropertyFlags != memoryPropertyFlags)
1268 {
1269 return true;
1270 }
1271
1272 if (size > mBuffer.getSize())
1273 {
1274 return true;
1275 }
1276 else
1277 {
1278 size_t paddedBufferSize =
1279 (renderer->getFeatures().padBuffersToMaxVertexAttribStride.enabled)
1280 ? (size + static_cast<size_t>(renderer->getMaxVertexAttribStride()))
1281 : size;
1282 size_t sizeInBytes = roundUpPow2(paddedBufferSize, kBufferSizeGranularity);
1283 size_t alignedSize = roundUp(sizeInBytes, renderer->getDefaultBufferAlignment());
1284 if (alignedSize > mBuffer.getSize())
1285 {
1286 return true;
1287 }
1288 }
1289
1290 return false;
1291 }
1292 } // namespace rx
1293