1 //
2 // Copyright 2016 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // BufferVk.cpp:
7 // Implements the class methods for BufferVk.
8 //
9
10 #include "libANGLE/renderer/vulkan/BufferVk.h"
11
12 #include "common/FixedVector.h"
13 #include "common/debug.h"
14 #include "common/mathutil.h"
15 #include "common/utilities.h"
16 #include "libANGLE/Context.h"
17 #include "libANGLE/renderer/vulkan/ContextVk.h"
18 #include "libANGLE/renderer/vulkan/vk_renderer.h"
19
20 namespace rx
21 {
GetDefaultBufferUsageFlags(vk::Renderer * renderer)22 VkBufferUsageFlags GetDefaultBufferUsageFlags(vk::Renderer *renderer)
23 {
24 // We could potentially use multiple backing buffers for different usages.
25 // For now keep a single buffer with all relevant usage flags.
26 VkBufferUsageFlags defaultBufferUsageFlags =
27 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
28 VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
29 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
30 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
31 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
32 if (renderer->getFeatures().supportsTransformFeedbackExtension.enabled)
33 {
34 defaultBufferUsageFlags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT |
35 VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_COUNTER_BUFFER_BIT_EXT;
36 }
37 return defaultBufferUsageFlags;
38 }
39
40 namespace
41 {
42 constexpr VkMemoryPropertyFlags kDeviceLocalFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
43 constexpr VkMemoryPropertyFlags kDeviceLocalHostCoherentFlags =
44 (VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
45 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
46 constexpr VkMemoryPropertyFlags kHostCachedFlags =
47 (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
48 VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
49 constexpr VkMemoryPropertyFlags kHostUncachedFlags =
50 (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
51 constexpr VkMemoryPropertyFlags kHostCachedNonCoherentFlags =
52 (VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
53
54 // Vertex attribute buffers are used as storage buffers for conversion in compute, where access to
55 // the buffer is made in 4-byte chunks. Assume the size of the buffer is 4k+n where n is in [0, 3).
56 // On some hardware, reading 4 bytes from address 4k returns 0, making it impossible to read the
57 // last n bytes. By rounding up the buffer sizes to a multiple of 4, the problem is alleviated.
58 constexpr size_t kBufferSizeGranularity = 4;
59 static_assert(gl::isPow2(kBufferSizeGranularity), "use as alignment, must be power of two");
60
61 // Start with a fairly small buffer size. We can increase this dynamically as we convert more data.
62 constexpr size_t kConvertedArrayBufferInitialSize = 1024 * 8;
63
64 // Buffers that have a static usage pattern will be allocated in
65 // device local memory to speed up access to and from the GPU.
66 // Dynamic usage patterns or that are frequently mapped
67 // will now request host cached memory to speed up access from the CPU.
GetPreferredMemoryType(vk::Renderer * renderer,gl::BufferBinding target,gl::BufferUsage usage)68 VkMemoryPropertyFlags GetPreferredMemoryType(vk::Renderer *renderer,
69 gl::BufferBinding target,
70 gl::BufferUsage usage)
71 {
72 if (target == gl::BufferBinding::PixelUnpack)
73 {
74 return kHostCachedFlags;
75 }
76
77 switch (usage)
78 {
79 case gl::BufferUsage::StaticCopy:
80 case gl::BufferUsage::StaticDraw:
81 case gl::BufferUsage::StaticRead:
82 // For static usage, request a device local memory
83 return renderer->getFeatures().preferDeviceLocalMemoryHostVisible.enabled
84 ? kDeviceLocalHostCoherentFlags
85 : kDeviceLocalFlags;
86 case gl::BufferUsage::DynamicDraw:
87 case gl::BufferUsage::StreamDraw:
88 // For non-static usage where the CPU performs a write-only access, request
89 // a host uncached memory
90 return renderer->getFeatures().preferHostCachedForNonStaticBufferUsage.enabled
91 ? kHostCachedFlags
92 : kHostUncachedFlags;
93 case gl::BufferUsage::DynamicCopy:
94 case gl::BufferUsage::DynamicRead:
95 case gl::BufferUsage::StreamCopy:
96 case gl::BufferUsage::StreamRead:
97 // For all other types of usage, request a host cached memory
98 return renderer->getFeatures()
99 .preferCachedNoncoherentForDynamicStreamBufferUsage.enabled
100 ? kHostCachedNonCoherentFlags
101 : kHostCachedFlags;
102 default:
103 UNREACHABLE();
104 return kHostCachedFlags;
105 }
106 }
107
GetStorageMemoryType(vk::Renderer * renderer,GLbitfield storageFlags,bool externalBuffer)108 VkMemoryPropertyFlags GetStorageMemoryType(vk::Renderer *renderer,
109 GLbitfield storageFlags,
110 bool externalBuffer)
111 {
112 const bool hasMapAccess =
113 (storageFlags & (GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT_EXT)) != 0;
114
115 if (renderer->getFeatures().preferDeviceLocalMemoryHostVisible.enabled)
116 {
117 const bool canUpdate = (storageFlags & GL_DYNAMIC_STORAGE_BIT_EXT) != 0;
118 if (canUpdate || hasMapAccess || externalBuffer)
119 {
120 // We currently allocate coherent memory for persistently mapped buffers.
121 // GL_EXT_buffer_storage allows non-coherent memory, but currently the implementation of
122 // |glMemoryBarrier(CLIENT_MAPPED_BUFFER_BARRIER_BIT_EXT)| relies on the mapping being
123 // coherent.
124 //
125 // If persistently mapped buffers ever use non-coherent memory, then said
126 // |glMemoryBarrier| call must result in |vkInvalidateMappedMemoryRanges| for all
127 // persistently mapped buffers.
128 return kDeviceLocalHostCoherentFlags;
129 }
130 return kDeviceLocalFlags;
131 }
132
133 return hasMapAccess ? kHostCachedFlags : kDeviceLocalFlags;
134 }
135
ShouldAllocateNewMemoryForUpdate(ContextVk * contextVk,size_t subDataSize,size_t bufferSize)136 bool ShouldAllocateNewMemoryForUpdate(ContextVk *contextVk, size_t subDataSize, size_t bufferSize)
137 {
138 // A sub-data update with size > 50% of buffer size meets the threshold to acquire a new
139 // BufferHelper from the pool.
140 size_t halfBufferSize = bufferSize / 2;
141 if (subDataSize > halfBufferSize)
142 {
143 return true;
144 }
145
146 // If the GPU is busy, it is possible to use the CPU for updating sub-data instead, but since it
147 // would need to create a duplicate of the buffer, a large enough buffer copy could result in a
148 // performance regression.
149 if (contextVk->getFeatures().preferCPUForBufferSubData.enabled)
150 {
151 // If the buffer is small enough, the cost of barrier associated with the GPU copy likely
152 // exceeds the overhead with the CPU copy. Duplicating the buffer allows the CPU to write to
153 // the buffer immediately, thus avoiding the barrier that prevents parallel operation.
154 constexpr size_t kCpuCopyBufferSizeThreshold = 32 * 1024;
155 if (bufferSize < kCpuCopyBufferSizeThreshold)
156 {
157 return true;
158 }
159
160 // To use CPU for the sub-data update in larger buffers, the update should be sizable enough
161 // compared to the whole buffer size. The threshold is chosen based on perf data collected
162 // from Pixel devices. At 1/8 of buffer size, the CPU overhead associated with extra data
163 // copy weighs less than serialization caused by barriers.
164 size_t subDataThreshold = bufferSize / 8;
165 if (subDataSize > subDataThreshold)
166 {
167 return true;
168 }
169 }
170
171 return false;
172 }
173
ShouldUseCPUToCopyData(ContextVk * contextVk,const vk::BufferHelper & buffer,size_t copySize,size_t bufferSize)174 bool ShouldUseCPUToCopyData(ContextVk *contextVk,
175 const vk::BufferHelper &buffer,
176 size_t copySize,
177 size_t bufferSize)
178 {
179 vk::Renderer *renderer = contextVk->getRenderer();
180
181 // If the buffer is not host-visible, or if it's busy on the GPU, can't read from it from the
182 // CPU
183 if (!buffer.isHostVisible() || !renderer->hasResourceUseFinished(buffer.getWriteResourceUse()))
184 {
185 return false;
186 }
187
188 // For some GPUs (e.g. ARM) we always prefer using CPU to do copy instead of using the GPU to
189 // avoid pipeline bubbles. If the GPU is currently busy and data copy size is less than certain
190 // threshold, we choose to use CPU to do the copy over GPU to achieve better parallelism.
191 return renderer->getFeatures().preferCPUForBufferSubData.enabled ||
192 (renderer->isCommandQueueBusy() &&
193 copySize < renderer->getMaxCopyBytesUsingCPUWhenPreservingBufferData());
194 }
195
RenderPassUsesBufferForReadOnly(ContextVk * contextVk,const vk::BufferHelper & buffer)196 bool RenderPassUsesBufferForReadOnly(ContextVk *contextVk, const vk::BufferHelper &buffer)
197 {
198 if (!contextVk->hasActiveRenderPass())
199 {
200 return false;
201 }
202
203 vk::RenderPassCommandBufferHelper &renderPassCommands =
204 contextVk->getStartedRenderPassCommands();
205 return renderPassCommands.usesBuffer(buffer) && !renderPassCommands.usesBufferForWrite(buffer);
206 }
207
208 // If a render pass is open which uses the buffer in read-only mode, render pass break can be
209 // avoided by using acquireAndUpdate. This can be costly however if the update is very small, and
210 // is limited to platforms where render pass break is itself costly (i.e. tiled-based renderers).
ShouldAvoidRenderPassBreakOnUpdate(ContextVk * contextVk,const vk::BufferHelper & buffer,size_t bufferSize)211 bool ShouldAvoidRenderPassBreakOnUpdate(ContextVk *contextVk,
212 const vk::BufferHelper &buffer,
213 size_t bufferSize)
214 {
215 // Only avoid breaking the render pass if the buffer is not so big such that duplicating it
216 // would outweight the cost of breaking the render pass. A value of 1KB is temporary chosen as
217 // a heuristic, and can be adjusted when such a situation is encountered.
218 constexpr size_t kPreferDuplicateOverRenderPassBreakMaxBufferSize = 1024;
219 if (!contextVk->getFeatures().preferCPUForBufferSubData.enabled ||
220 bufferSize > kPreferDuplicateOverRenderPassBreakMaxBufferSize)
221 {
222 return false;
223 }
224
225 return RenderPassUsesBufferForReadOnly(contextVk, buffer);
226 }
227
GetBufferUsageType(gl::BufferUsage usage)228 BufferUsageType GetBufferUsageType(gl::BufferUsage usage)
229 {
230 return (usage == gl::BufferUsage::DynamicDraw || usage == gl::BufferUsage::DynamicCopy ||
231 usage == gl::BufferUsage::DynamicRead)
232 ? BufferUsageType::Dynamic
233 : BufferUsageType::Static;
234 }
235
GetMemoryTypeIndex(ContextVk * contextVk,VkDeviceSize size,VkMemoryPropertyFlags memoryPropertyFlags,uint32_t * memoryTypeIndexOut)236 angle::Result GetMemoryTypeIndex(ContextVk *contextVk,
237 VkDeviceSize size,
238 VkMemoryPropertyFlags memoryPropertyFlags,
239 uint32_t *memoryTypeIndexOut)
240 {
241 vk::Renderer *renderer = contextVk->getRenderer();
242 const vk::Allocator &allocator = renderer->getAllocator();
243
244 bool persistentlyMapped = renderer->getFeatures().persistentlyMappedBuffers.enabled;
245 VkBufferUsageFlags defaultBufferUsageFlags = GetDefaultBufferUsageFlags(renderer);
246
247 VkBufferCreateInfo createInfo = {};
248 createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
249 createInfo.flags = 0;
250 createInfo.size = size;
251 createInfo.usage = defaultBufferUsageFlags;
252 createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
253 createInfo.queueFamilyIndexCount = 0;
254 createInfo.pQueueFamilyIndices = nullptr;
255
256 // Host visible is required, all other bits are preferred, (i.e., optional)
257 VkMemoryPropertyFlags requiredFlags =
258 (memoryPropertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
259 VkMemoryPropertyFlags preferredFlags =
260 (memoryPropertyFlags & (~VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT));
261
262 // Check that the allocation is not too large.
263 uint32_t memoryTypeIndex = 0;
264 ANGLE_VK_TRY(contextVk, allocator.findMemoryTypeIndexForBufferInfo(
265 createInfo, requiredFlags, preferredFlags, persistentlyMapped,
266 &memoryTypeIndex));
267 *memoryTypeIndexOut = memoryTypeIndex;
268
269 return angle::Result::Continue;
270 }
271
IsSelfCopy(const BufferDataSource & dataSource,const vk::BufferHelper & destination)272 bool IsSelfCopy(const BufferDataSource &dataSource, const vk::BufferHelper &destination)
273 {
274 return dataSource.data == nullptr &&
275 dataSource.buffer->getBufferSerial() == destination.getBufferSerial();
276 }
277
CopyBuffers(ContextVk * contextVk,vk::BufferHelper * srcBuffer,vk::BufferHelper * dstBuffer,uint32_t regionCount,const VkBufferCopy * copyRegions)278 angle::Result CopyBuffers(ContextVk *contextVk,
279 vk::BufferHelper *srcBuffer,
280 vk::BufferHelper *dstBuffer,
281 uint32_t regionCount,
282 const VkBufferCopy *copyRegions)
283 {
284 ASSERT(srcBuffer->valid() && dstBuffer->valid());
285
286 // Enqueue a copy command on the GPU
287 vk::CommandBufferAccess access;
288 if (srcBuffer->getBufferSerial() == dstBuffer->getBufferSerial())
289 {
290 access.onBufferSelfCopy(srcBuffer);
291 }
292 else
293 {
294 access.onBufferTransferRead(srcBuffer);
295 access.onBufferTransferWrite(dstBuffer);
296 }
297
298 vk::OutsideRenderPassCommandBuffer *commandBuffer;
299 ANGLE_TRY(contextVk->getOutsideRenderPassCommandBuffer(access, &commandBuffer));
300
301 commandBuffer->copyBuffer(srcBuffer->getBuffer(), dstBuffer->getBuffer(), regionCount,
302 copyRegions);
303
304 return angle::Result::Continue;
305 }
306 } // namespace
307
308 // ConversionBuffer implementation.
ConversionBuffer(vk::Renderer * renderer,VkBufferUsageFlags usageFlags,size_t initialSize,size_t alignment,bool hostVisible)309 ConversionBuffer::ConversionBuffer(vk::Renderer *renderer,
310 VkBufferUsageFlags usageFlags,
311 size_t initialSize,
312 size_t alignment,
313 bool hostVisible)
314 : mEntireBufferDirty(true)
315 {
316 mData = std::make_unique<vk::BufferHelper>();
317 mDirtyRanges.reserve(32);
318 }
319
~ConversionBuffer()320 ConversionBuffer::~ConversionBuffer()
321 {
322 ASSERT(!mData || !mData->valid());
323 mDirtyRanges.clear();
324 }
325
326 ConversionBuffer::ConversionBuffer(ConversionBuffer &&other) = default;
327
328 // dirtyRanges may be overlap or continuous. In order to reduce the redunant conversion, we try to
329 // consolidate the dirty ranges. First we sort it by the range's low. Then we walk the range again
330 // and check it with previous range and merge them if possible. That merge will remove the
331 // overlapped area as well as reduce the number of ranges.
consolidateDirtyRanges()332 void ConversionBuffer::consolidateDirtyRanges()
333 {
334 ASSERT(!mEntireBufferDirty);
335
336 auto comp = [](const RangeDeviceSize &a, const RangeDeviceSize &b) -> bool {
337 return a.low() < b.low();
338 };
339 std::sort(mDirtyRanges.begin(), mDirtyRanges.end(), comp);
340
341 size_t prev = 0;
342 for (size_t i = 1; i < mDirtyRanges.size(); i++)
343 {
344 if (mDirtyRanges[prev].intersectsOrContinuous(mDirtyRanges[i]))
345 {
346 mDirtyRanges[prev].merge(mDirtyRanges[i]);
347 mDirtyRanges[i].invalidate();
348 }
349 else
350 {
351 prev = i;
352 }
353 }
354 }
355
356 // VertexConversionBuffer implementation.
VertexConversionBuffer(vk::Renderer * renderer,const CacheKey & cacheKey)357 VertexConversionBuffer::VertexConversionBuffer(vk::Renderer *renderer, const CacheKey &cacheKey)
358 : ConversionBuffer(renderer,
359 vk::kVertexBufferUsageFlags,
360 kConvertedArrayBufferInitialSize,
361 vk::kVertexBufferAlignment,
362 cacheKey.hostVisible),
363 mCacheKey(cacheKey)
364 {}
365
366 VertexConversionBuffer::VertexConversionBuffer(VertexConversionBuffer &&other) = default;
367
368 VertexConversionBuffer::~VertexConversionBuffer() = default;
369
370 // BufferVk implementation.
BufferVk(const gl::BufferState & state)371 BufferVk::BufferVk(const gl::BufferState &state)
372 : BufferImpl(state),
373 mClientBuffer(nullptr),
374 mMemoryTypeIndex(0),
375 mMemoryPropertyFlags(0),
376 mIsStagingBufferMapped(false),
377 mHasValidData(false),
378 mIsMappedForWrite(false),
379 mUsageType(BufferUsageType::Static)
380 {
381 mMappedRange.invalidate();
382 }
383
~BufferVk()384 BufferVk::~BufferVk() {}
385
destroy(const gl::Context * context)386 void BufferVk::destroy(const gl::Context *context)
387 {
388 ContextVk *contextVk = vk::GetImpl(context);
389
390 (void)release(contextVk);
391 }
392
releaseConversionBuffers(vk::Context * context)393 void BufferVk::releaseConversionBuffers(vk::Context *context)
394 {
395 for (ConversionBuffer &buffer : mVertexConversionBuffers)
396 {
397 buffer.release(context);
398 }
399 mVertexConversionBuffers.clear();
400 }
401
release(ContextVk * contextVk)402 angle::Result BufferVk::release(ContextVk *contextVk)
403 {
404 if (mBuffer.valid())
405 {
406 ANGLE_TRY(contextVk->releaseBufferAllocation(&mBuffer));
407 }
408 if (mStagingBuffer.valid())
409 {
410 mStagingBuffer.release(contextVk);
411 }
412
413 releaseConversionBuffers(contextVk);
414
415 return angle::Result::Continue;
416 }
417
setExternalBufferData(const gl::Context * context,gl::BufferBinding target,GLeglClientBufferEXT clientBuffer,size_t size,VkMemoryPropertyFlags memoryPropertyFlags)418 angle::Result BufferVk::setExternalBufferData(const gl::Context *context,
419 gl::BufferBinding target,
420 GLeglClientBufferEXT clientBuffer,
421 size_t size,
422 VkMemoryPropertyFlags memoryPropertyFlags)
423 {
424 ContextVk *contextVk = vk::GetImpl(context);
425
426 // Release and re-create the memory and buffer.
427 ANGLE_TRY(release(contextVk));
428
429 VkBufferCreateInfo createInfo = {};
430 createInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
431 createInfo.flags = 0;
432 createInfo.size = size;
433 createInfo.usage = GetDefaultBufferUsageFlags(contextVk->getRenderer());
434 createInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
435 createInfo.queueFamilyIndexCount = 0;
436 createInfo.pQueueFamilyIndices = nullptr;
437
438 return mBuffer.initExternal(contextVk, memoryPropertyFlags, createInfo, clientBuffer);
439 }
440
setDataWithUsageFlags(const gl::Context * context,gl::BufferBinding target,GLeglClientBufferEXT clientBuffer,const void * data,size_t size,gl::BufferUsage usage,GLbitfield flags,gl::BufferStorage bufferStorage)441 angle::Result BufferVk::setDataWithUsageFlags(const gl::Context *context,
442 gl::BufferBinding target,
443 GLeglClientBufferEXT clientBuffer,
444 const void *data,
445 size_t size,
446 gl::BufferUsage usage,
447 GLbitfield flags,
448 gl::BufferStorage bufferStorage)
449 {
450 ContextVk *contextVk = vk::GetImpl(context);
451 VkMemoryPropertyFlags memoryPropertyFlags = 0;
452 bool persistentMapRequired = false;
453 const bool isExternalBuffer = clientBuffer != nullptr;
454
455 if (bufferStorage == gl::BufferStorage::Immutable)
456 {
457 // glBufferStorage API call
458 memoryPropertyFlags =
459 GetStorageMemoryType(contextVk->getRenderer(), flags, isExternalBuffer);
460 persistentMapRequired = (flags & GL_MAP_PERSISTENT_BIT_EXT) != 0;
461 }
462 else
463 {
464 // glBufferData API call
465 memoryPropertyFlags = GetPreferredMemoryType(contextVk->getRenderer(), target, usage);
466 }
467
468 if (isExternalBuffer)
469 {
470 ANGLE_TRY(setExternalBufferData(context, target, clientBuffer, size, memoryPropertyFlags));
471 if (!mBuffer.isHostVisible())
472 {
473 // If external buffer's memory does not support host visible memory property, we cannot
474 // support a persistent map request.
475 ANGLE_VK_CHECK(contextVk, !persistentMapRequired, VK_ERROR_MEMORY_MAP_FAILED);
476 }
477
478 mClientBuffer = clientBuffer;
479
480 return angle::Result::Continue;
481 }
482 return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags, usage);
483 }
484
setData(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,gl::BufferUsage usage)485 angle::Result BufferVk::setData(const gl::Context *context,
486 gl::BufferBinding target,
487 const void *data,
488 size_t size,
489 gl::BufferUsage usage)
490 {
491 ContextVk *contextVk = vk::GetImpl(context);
492 // Assume host visible/coherent memory available.
493 VkMemoryPropertyFlags memoryPropertyFlags =
494 GetPreferredMemoryType(contextVk->getRenderer(), target, usage);
495 return setDataWithMemoryType(context, target, data, size, memoryPropertyFlags, usage);
496 }
497
setDataWithMemoryType(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,VkMemoryPropertyFlags memoryPropertyFlags,gl::BufferUsage usage)498 angle::Result BufferVk::setDataWithMemoryType(const gl::Context *context,
499 gl::BufferBinding target,
500 const void *data,
501 size_t size,
502 VkMemoryPropertyFlags memoryPropertyFlags,
503 gl::BufferUsage usage)
504 {
505 ContextVk *contextVk = vk::GetImpl(context);
506 vk::Renderer *renderer = contextVk->getRenderer();
507
508 // Since the buffer is being entirely reinitialized, reset the valid-data flag. If the caller
509 // passed in data to fill the buffer, the flag will be updated when the data is copied to the
510 // buffer.
511 mHasValidData = false;
512
513 if (size == 0)
514 {
515 // Nothing to do.
516 return angle::Result::Continue;
517 }
518
519 if (!mVertexConversionBuffers.empty())
520 {
521 for (ConversionBuffer &buffer : mVertexConversionBuffers)
522 {
523 buffer.clearDirty();
524 }
525 }
526
527 const BufferUsageType usageType = GetBufferUsageType(usage);
528 const BufferUpdateType updateType =
529 calculateBufferUpdateTypeOnFullUpdate(renderer, size, memoryPropertyFlags, usageType, data);
530
531 if (updateType == BufferUpdateType::StorageRedefined)
532 {
533 mUsageType = usageType;
534 mMemoryPropertyFlags = memoryPropertyFlags;
535 ANGLE_TRY(GetMemoryTypeIndex(contextVk, size, memoryPropertyFlags, &mMemoryTypeIndex));
536 ANGLE_TRY(acquireBufferHelper(contextVk, size, mUsageType));
537 }
538 else if (size != static_cast<size_t>(mState.getSize()))
539 {
540 if (mBuffer.onBufferUserSizeChange(renderer))
541 {
542 // If we have a dedicated VkBuffer created with user size, even if the storage is
543 // reused, we have to recreate that VkBuffer with user size when user size changes.
544 // When this happens, we must notify other objects that observing this buffer, such as
545 // vertex array. The reason vertex array is observing the buffer's storage change is
546 // because they uses VkBuffer. Now VkBuffer have changed, vertex array needs to
547 // re-process it just like storage has been reallocated.
548 onStateChange(angle::SubjectMessage::InternalMemoryAllocationChanged);
549 }
550 }
551
552 if (data != nullptr)
553 {
554 BufferDataSource dataSource = {};
555 dataSource.data = data;
556
557 // Handle full-buffer updates similarly to glBufferSubData
558 ANGLE_TRY(setDataImpl(contextVk, size, dataSource, size, 0, updateType));
559 }
560
561 return angle::Result::Continue;
562 }
563
setSubData(const gl::Context * context,gl::BufferBinding target,const void * data,size_t size,size_t offset)564 angle::Result BufferVk::setSubData(const gl::Context *context,
565 gl::BufferBinding target,
566 const void *data,
567 size_t size,
568 size_t offset)
569 {
570 ASSERT(mBuffer.valid());
571
572 BufferDataSource dataSource = {};
573 dataSource.data = data;
574
575 ContextVk *contextVk = vk::GetImpl(context);
576 return setDataImpl(contextVk, static_cast<size_t>(mState.getSize()), dataSource, size, offset,
577 BufferUpdateType::ContentsUpdate);
578 }
579
copySubData(const gl::Context * context,BufferImpl * source,GLintptr sourceOffset,GLintptr destOffset,GLsizeiptr size)580 angle::Result BufferVk::copySubData(const gl::Context *context,
581 BufferImpl *source,
582 GLintptr sourceOffset,
583 GLintptr destOffset,
584 GLsizeiptr size)
585 {
586 ASSERT(mBuffer.valid());
587
588 ContextVk *contextVk = vk::GetImpl(context);
589 BufferVk *sourceVk = GetAs<BufferVk>(source);
590
591 BufferDataSource dataSource = {};
592 dataSource.buffer = &sourceVk->getBuffer();
593 dataSource.bufferOffset = static_cast<VkDeviceSize>(sourceOffset);
594
595 ASSERT(dataSource.buffer->valid());
596
597 return setDataImpl(contextVk, static_cast<size_t>(mState.getSize()), dataSource, size,
598 destOffset, BufferUpdateType::ContentsUpdate);
599 }
600
allocStagingBuffer(ContextVk * contextVk,vk::MemoryCoherency coherency,VkDeviceSize size,uint8_t ** mapPtr)601 angle::Result BufferVk::allocStagingBuffer(ContextVk *contextVk,
602 vk::MemoryCoherency coherency,
603 VkDeviceSize size,
604 uint8_t **mapPtr)
605 {
606 ASSERT(!mIsStagingBufferMapped);
607
608 if (mStagingBuffer.valid())
609 {
610 if (size <= mStagingBuffer.getSize() && IsCached(coherency) == mStagingBuffer.isCached() &&
611 contextVk->getRenderer()->hasResourceUseFinished(mStagingBuffer.getResourceUse()))
612 {
613 // If size is big enough and it is idle, then just reuse the existing staging buffer
614 *mapPtr = mStagingBuffer.getMappedMemory();
615 mIsStagingBufferMapped = true;
616 return angle::Result::Continue;
617 }
618 mStagingBuffer.release(contextVk);
619 }
620
621 ANGLE_TRY(
622 contextVk->initBufferForBufferCopy(&mStagingBuffer, static_cast<size_t>(size), coherency));
623 *mapPtr = mStagingBuffer.getMappedMemory();
624 mIsStagingBufferMapped = true;
625
626 return angle::Result::Continue;
627 }
628
flushStagingBuffer(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize size)629 angle::Result BufferVk::flushStagingBuffer(ContextVk *contextVk,
630 VkDeviceSize offset,
631 VkDeviceSize size)
632 {
633 vk::Renderer *renderer = contextVk->getRenderer();
634
635 ASSERT(mIsStagingBufferMapped);
636 ASSERT(mStagingBuffer.valid());
637
638 if (!mStagingBuffer.isCoherent())
639 {
640 ANGLE_TRY(mStagingBuffer.flush(renderer));
641 }
642
643 VkBufferCopy copyRegion = {mStagingBuffer.getOffset(), mBuffer.getOffset() + offset, size};
644 ANGLE_TRY(CopyBuffers(contextVk, &mStagingBuffer, &mBuffer, 1, ©Region));
645
646 return angle::Result::Continue;
647 }
648
handleDeviceLocalBufferMap(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize size,uint8_t ** mapPtr)649 angle::Result BufferVk::handleDeviceLocalBufferMap(ContextVk *contextVk,
650 VkDeviceSize offset,
651 VkDeviceSize size,
652 uint8_t **mapPtr)
653 {
654 vk::Renderer *renderer = contextVk->getRenderer();
655 ANGLE_TRY(
656 allocStagingBuffer(contextVk, vk::MemoryCoherency::CachedPreferCoherent, size, mapPtr));
657 ANGLE_TRY(mStagingBuffer.flush(renderer));
658
659 // Copy data from device local buffer to host visible staging buffer.
660 VkBufferCopy copyRegion = {mBuffer.getOffset() + offset, mStagingBuffer.getOffset(), size};
661 ANGLE_TRY(CopyBuffers(contextVk, &mBuffer, &mStagingBuffer, 1, ©Region));
662 ANGLE_TRY(mStagingBuffer.waitForIdle(contextVk, "GPU stall due to mapping device local buffer",
663 RenderPassClosureReason::DeviceLocalBufferMap));
664 // Since coherent is prefer, we may end up getting non-coherent. Always call invalidate here (it
665 // will check memory flag before it actually calls into driver).
666 ANGLE_TRY(mStagingBuffer.invalidate(renderer));
667
668 return angle::Result::Continue;
669 }
670
mapHostVisibleBuffer(ContextVk * contextVk,VkDeviceSize offset,GLbitfield access,uint8_t ** mapPtr)671 angle::Result BufferVk::mapHostVisibleBuffer(ContextVk *contextVk,
672 VkDeviceSize offset,
673 GLbitfield access,
674 uint8_t **mapPtr)
675 {
676 ANGLE_TRY(mBuffer.mapWithOffset(contextVk, mapPtr, static_cast<size_t>(offset)));
677
678 // Invalidate non-coherent for READ case.
679 if (!mBuffer.isCoherent() && (access & GL_MAP_READ_BIT) != 0)
680 {
681 ANGLE_TRY(mBuffer.invalidate(contextVk->getRenderer()));
682 }
683 return angle::Result::Continue;
684 }
685
map(const gl::Context * context,GLenum access,void ** mapPtr)686 angle::Result BufferVk::map(const gl::Context *context, GLenum access, void **mapPtr)
687 {
688 ASSERT(mBuffer.valid());
689 ASSERT(access == GL_WRITE_ONLY_OES);
690
691 return mapImpl(vk::GetImpl(context), GL_MAP_WRITE_BIT, mapPtr);
692 }
693
mapRange(const gl::Context * context,size_t offset,size_t length,GLbitfield access,void ** mapPtr)694 angle::Result BufferVk::mapRange(const gl::Context *context,
695 size_t offset,
696 size_t length,
697 GLbitfield access,
698 void **mapPtr)
699 {
700 return mapRangeImpl(vk::GetImpl(context), offset, length, access, mapPtr);
701 }
702
mapImpl(ContextVk * contextVk,GLbitfield access,void ** mapPtr)703 angle::Result BufferVk::mapImpl(ContextVk *contextVk, GLbitfield access, void **mapPtr)
704 {
705 return mapRangeImpl(contextVk, 0, static_cast<VkDeviceSize>(mState.getSize()), access, mapPtr);
706 }
707
ghostMappedBuffer(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize length,GLbitfield access,void ** mapPtr)708 angle::Result BufferVk::ghostMappedBuffer(ContextVk *contextVk,
709 VkDeviceSize offset,
710 VkDeviceSize length,
711 GLbitfield access,
712 void **mapPtr)
713 {
714 // We shouldn't get here if it is external memory
715 ASSERT(!isExternalBuffer());
716
717 ++contextVk->getPerfCounters().buffersGhosted;
718
719 // If we are creating a new buffer because the GPU is using it as read-only, then we
720 // also need to copy the contents of the previous buffer into the new buffer, in
721 // case the caller only updates a portion of the new buffer.
722 vk::BufferHelper src = std::move(mBuffer);
723 ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
724 BufferUsageType::Dynamic));
725
726 // Before returning the new buffer, map the previous buffer and copy its entire
727 // contents into the new buffer.
728 uint8_t *srcMapPtr = nullptr;
729 uint8_t *dstMapPtr = nullptr;
730 ANGLE_TRY(src.map(contextVk, &srcMapPtr));
731 ANGLE_TRY(mBuffer.map(contextVk, &dstMapPtr));
732
733 ASSERT(src.isCoherent());
734 ASSERT(mBuffer.isCoherent());
735
736 // No need to copy over [offset, offset + length), just around it
737 if ((access & GL_MAP_INVALIDATE_RANGE_BIT) != 0)
738 {
739 if (offset != 0)
740 {
741 memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(offset));
742 }
743 size_t totalSize = static_cast<size_t>(mState.getSize());
744 size_t remainingStart = static_cast<size_t>(offset + length);
745 size_t remainingSize = totalSize - remainingStart;
746 if (remainingSize != 0)
747 {
748 memcpy(dstMapPtr + remainingStart, srcMapPtr + remainingStart, remainingSize);
749 }
750 }
751 else
752 {
753 memcpy(dstMapPtr, srcMapPtr, static_cast<size_t>(mState.getSize()));
754 }
755
756 ANGLE_TRY(contextVk->releaseBufferAllocation(&src));
757
758 // Return the already mapped pointer with the offset adjustment to avoid the call to unmap().
759 *mapPtr = dstMapPtr + offset;
760
761 return angle::Result::Continue;
762 }
763
mapRangeImpl(ContextVk * contextVk,VkDeviceSize offset,VkDeviceSize length,GLbitfield access,void ** mapPtr)764 angle::Result BufferVk::mapRangeImpl(ContextVk *contextVk,
765 VkDeviceSize offset,
766 VkDeviceSize length,
767 GLbitfield access,
768 void **mapPtr)
769 {
770 vk::Renderer *renderer = contextVk->getRenderer();
771 ASSERT(mBuffer.valid());
772
773 // Record map call parameters in case this call is from angle internal (the access/offset/length
774 // will be inconsistent from mState).
775 mIsMappedForWrite = (access & GL_MAP_WRITE_BIT) != 0;
776 mMappedRange = RangeDeviceSize(offset, offset + length);
777
778 uint8_t **mapPtrBytes = reinterpret_cast<uint8_t **>(mapPtr);
779 bool hostVisible = mBuffer.isHostVisible();
780
781 // MAP_UNSYNCHRONIZED_BIT, so immediately map.
782 if ((access & GL_MAP_UNSYNCHRONIZED_BIT) != 0)
783 {
784 if (hostVisible)
785 {
786 return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
787 }
788 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
789 }
790
791 // Read case
792 if ((access & GL_MAP_WRITE_BIT) == 0)
793 {
794 // If app is not going to write, all we need is to ensure GPU write is finished.
795 // Concurrent reads from CPU and GPU is allowed.
796 if (!renderer->hasResourceUseFinished(mBuffer.getWriteResourceUse()))
797 {
798 // If there are unflushed write commands for the resource, flush them.
799 if (contextVk->hasUnsubmittedUse(mBuffer.getWriteResourceUse()))
800 {
801 ANGLE_TRY(contextVk->flushAndSubmitCommands(
802 nullptr, nullptr, RenderPassClosureReason::BufferWriteThenMap));
803 }
804 ANGLE_TRY(renderer->finishResourceUse(contextVk, mBuffer.getWriteResourceUse()));
805 }
806 if (hostVisible)
807 {
808 return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
809 }
810 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
811 }
812
813 // Write case
814 if (!hostVisible)
815 {
816 return handleDeviceLocalBufferMap(contextVk, offset, length, mapPtrBytes);
817 }
818
819 // Write case, buffer not in use.
820 if (isExternalBuffer() || !isCurrentlyInUse(contextVk->getRenderer()))
821 {
822 return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
823 }
824
825 // Write case, buffer in use.
826 //
827 // Here, we try to map the buffer, but it's busy. Instead of waiting for the GPU to
828 // finish, we just allocate a new buffer if:
829 // 1.) Caller has told us it doesn't care about previous contents, or
830 // 2.) The GPU won't write to the buffer.
831
832 bool rangeInvalidate = (access & GL_MAP_INVALIDATE_RANGE_BIT) != 0;
833 bool entireBufferInvalidated =
834 ((access & GL_MAP_INVALIDATE_BUFFER_BIT) != 0) ||
835 (rangeInvalidate && offset == 0 && static_cast<VkDeviceSize>(mState.getSize()) == length);
836
837 if (entireBufferInvalidated)
838 {
839 ANGLE_TRY(acquireBufferHelper(contextVk, static_cast<size_t>(mState.getSize()),
840 BufferUsageType::Dynamic));
841 return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
842 }
843
844 bool smallMapRange = (length < static_cast<VkDeviceSize>(mState.getSize()) / 2);
845
846 if (smallMapRange && rangeInvalidate)
847 {
848 ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::CachedNonCoherent,
849 static_cast<size_t>(length), mapPtrBytes));
850 return angle::Result::Continue;
851 }
852
853 if (renderer->hasResourceUseFinished(mBuffer.getWriteResourceUse()))
854 {
855 // This will keep the new buffer mapped and update mapPtr, so return immediately.
856 return ghostMappedBuffer(contextVk, offset, length, access, mapPtr);
857 }
858
859 // Write case (worst case, buffer in use for write)
860 ANGLE_TRY(mBuffer.waitForIdle(contextVk, "GPU stall due to mapping buffer in use by the GPU",
861 RenderPassClosureReason::BufferInUseWhenSynchronizedMap));
862 return mapHostVisibleBuffer(contextVk, offset, access, mapPtrBytes);
863 }
864
unmap(const gl::Context * context,GLboolean * result)865 angle::Result BufferVk::unmap(const gl::Context *context, GLboolean *result)
866 {
867 ANGLE_TRY(unmapImpl(vk::GetImpl(context)));
868
869 // This should be false if the contents have been corrupted through external means. Vulkan
870 // doesn't provide such information.
871 *result = true;
872
873 return angle::Result::Continue;
874 }
875
unmapImpl(ContextVk * contextVk)876 angle::Result BufferVk::unmapImpl(ContextVk *contextVk)
877 {
878 ASSERT(mBuffer.valid());
879
880 if (mIsStagingBufferMapped)
881 {
882 ASSERT(mStagingBuffer.valid());
883 // The buffer is device local or optimization of small range map.
884 if (mIsMappedForWrite)
885 {
886 ANGLE_TRY(flushStagingBuffer(contextVk, mMappedRange.low(), mMappedRange.length()));
887 }
888
889 mIsStagingBufferMapped = false;
890 }
891 else
892 {
893 ASSERT(mBuffer.isHostVisible());
894 vk::Renderer *renderer = contextVk->getRenderer();
895 if (!mBuffer.isCoherent())
896 {
897 ANGLE_TRY(mBuffer.flush(renderer));
898 }
899 mBuffer.unmap(renderer);
900 }
901
902 if (mIsMappedForWrite)
903 {
904 if (mMappedRange == RangeDeviceSize(0, static_cast<VkDeviceSize>(getSize())))
905 {
906 dataUpdated();
907 }
908 else
909 {
910 dataRangeUpdated(mMappedRange);
911 }
912 }
913
914 // Reset the mapping parameters
915 mIsMappedForWrite = false;
916 mMappedRange.invalidate();
917
918 return angle::Result::Continue;
919 }
920
getSubData(const gl::Context * context,GLintptr offset,GLsizeiptr size,void * outData)921 angle::Result BufferVk::getSubData(const gl::Context *context,
922 GLintptr offset,
923 GLsizeiptr size,
924 void *outData)
925 {
926 ASSERT(offset + size <= getSize());
927 ASSERT(mBuffer.valid());
928 ContextVk *contextVk = vk::GetImpl(context);
929 void *mapPtr;
930 ANGLE_TRY(mapRangeImpl(contextVk, offset, size, GL_MAP_READ_BIT, &mapPtr));
931 memcpy(outData, mapPtr, size);
932 return unmapImpl(contextVk);
933 }
934
getIndexRange(const gl::Context * context,gl::DrawElementsType type,size_t offset,size_t count,bool primitiveRestartEnabled,gl::IndexRange * outRange)935 angle::Result BufferVk::getIndexRange(const gl::Context *context,
936 gl::DrawElementsType type,
937 size_t offset,
938 size_t count,
939 bool primitiveRestartEnabled,
940 gl::IndexRange *outRange)
941 {
942 ContextVk *contextVk = vk::GetImpl(context);
943 vk::Renderer *renderer = contextVk->getRenderer();
944
945 // This is a workaround for the mock ICD not implementing buffer memory state.
946 // Could be removed if https://github.com/KhronosGroup/Vulkan-Tools/issues/84 is fixed.
947 if (renderer->isMockICDEnabled())
948 {
949 outRange->start = 0;
950 outRange->end = 0;
951 return angle::Result::Continue;
952 }
953
954 ANGLE_TRACE_EVENT0("gpu.angle", "BufferVk::getIndexRange");
955
956 void *mapPtr;
957 ANGLE_TRY(mapRangeImpl(contextVk, offset, getSize(), GL_MAP_READ_BIT, &mapPtr));
958 *outRange = gl::ComputeIndexRange(type, mapPtr, count, primitiveRestartEnabled);
959 ANGLE_TRY(unmapImpl(contextVk));
960
961 return angle::Result::Continue;
962 }
963
updateBuffer(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset)964 angle::Result BufferVk::updateBuffer(ContextVk *contextVk,
965 size_t bufferSize,
966 const BufferDataSource &dataSource,
967 size_t updateSize,
968 size_t updateOffset)
969 {
970 // To copy on the CPU, destination must be host-visible. The source should also be either a CPU
971 // pointer or other a host-visible buffer that is not being written to by the GPU.
972 const bool shouldCopyOnCPU =
973 mBuffer.isHostVisible() &&
974 (dataSource.data != nullptr ||
975 ShouldUseCPUToCopyData(contextVk, *dataSource.buffer, updateSize, bufferSize));
976
977 if (shouldCopyOnCPU)
978 {
979 ANGLE_TRY(directUpdate(contextVk, dataSource, updateSize, updateOffset));
980 }
981 else
982 {
983 ANGLE_TRY(stagedUpdate(contextVk, dataSource, updateSize, updateOffset));
984 }
985 return angle::Result::Continue;
986 }
987
directUpdate(ContextVk * contextVk,const BufferDataSource & dataSource,size_t size,size_t offset)988 angle::Result BufferVk::directUpdate(ContextVk *contextVk,
989 const BufferDataSource &dataSource,
990 size_t size,
991 size_t offset)
992 {
993 vk::Renderer *renderer = contextVk->getRenderer();
994 uint8_t *srcPointerMapped = nullptr;
995 const uint8_t *srcPointer = nullptr;
996 uint8_t *dstPointer = nullptr;
997
998 // Map the destination buffer.
999 ASSERT(mBuffer.isHostVisible());
1000 ANGLE_TRY(mBuffer.mapWithOffset(contextVk, &dstPointer, offset));
1001 ASSERT(dstPointer);
1002
1003 // If source data is coming from a buffer, map it. If this is a self-copy, avoid double-mapping
1004 // the buffer.
1005 if (dataSource.data != nullptr)
1006 {
1007 srcPointer = static_cast<const uint8_t *>(dataSource.data);
1008 }
1009 else
1010 {
1011 ANGLE_TRY(dataSource.buffer->mapWithOffset(contextVk, &srcPointerMapped,
1012 static_cast<size_t>(dataSource.bufferOffset)));
1013 srcPointer = srcPointerMapped;
1014 }
1015
1016 memcpy(dstPointer, srcPointer, size);
1017
1018 // External memory may end up with noncoherent
1019 if (!mBuffer.isCoherent())
1020 {
1021 ANGLE_TRY(mBuffer.flush(renderer, offset, size));
1022 }
1023
1024 // Unmap the destination and source buffers if applicable.
1025 //
1026 // If the buffer has dynamic usage then the intent is frequent client side updates to the
1027 // buffer. Don't CPU unmap the buffer, we will take care of unmapping when releasing the buffer
1028 // to either the renderer or mBufferFreeList.
1029 if (GetBufferUsageType(mState.getUsage()) == BufferUsageType::Static)
1030 {
1031 mBuffer.unmap(renderer);
1032 }
1033
1034 if (srcPointerMapped != nullptr)
1035 {
1036 dataSource.buffer->unmap(renderer);
1037 }
1038
1039 return angle::Result::Continue;
1040 }
1041
stagedUpdate(ContextVk * contextVk,const BufferDataSource & dataSource,size_t size,size_t offset)1042 angle::Result BufferVk::stagedUpdate(ContextVk *contextVk,
1043 const BufferDataSource &dataSource,
1044 size_t size,
1045 size_t offset)
1046 {
1047 // If data is coming from a CPU pointer, stage it in a temporary staging buffer.
1048 // Otherwise, do a GPU copy directly from the given buffer.
1049 if (dataSource.data != nullptr)
1050 {
1051 uint8_t *mapPointer = nullptr;
1052 ANGLE_TRY(allocStagingBuffer(contextVk, vk::MemoryCoherency::CachedNonCoherent, size,
1053 &mapPointer));
1054 memcpy(mapPointer, dataSource.data, size);
1055 ANGLE_TRY(flushStagingBuffer(contextVk, offset, size));
1056 mIsStagingBufferMapped = false;
1057 }
1058 else
1059 {
1060 // Check for self-dependency.
1061 vk::CommandBufferAccess access;
1062 if (dataSource.buffer->getBufferSerial() == mBuffer.getBufferSerial())
1063 {
1064 access.onBufferSelfCopy(&mBuffer);
1065 }
1066 else
1067 {
1068 access.onBufferTransferRead(dataSource.buffer);
1069 access.onBufferTransferWrite(&mBuffer);
1070 }
1071
1072 vk::OutsideRenderPassCommandBuffer *commandBuffer;
1073 ANGLE_TRY(contextVk->getOutsideRenderPassCommandBuffer(access, &commandBuffer));
1074
1075 // Enqueue a copy command on the GPU.
1076 const VkBufferCopy copyRegion = {dataSource.bufferOffset + dataSource.buffer->getOffset(),
1077 static_cast<VkDeviceSize>(offset) + mBuffer.getOffset(),
1078 static_cast<VkDeviceSize>(size)};
1079
1080 commandBuffer->copyBuffer(dataSource.buffer->getBuffer(), mBuffer.getBuffer(), 1,
1081 ©Region);
1082 }
1083
1084 return angle::Result::Continue;
1085 }
1086
acquireAndUpdate(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset,BufferUpdateType updateType)1087 angle::Result BufferVk::acquireAndUpdate(ContextVk *contextVk,
1088 size_t bufferSize,
1089 const BufferDataSource &dataSource,
1090 size_t updateSize,
1091 size_t updateOffset,
1092 BufferUpdateType updateType)
1093 {
1094 // We shouldn't get here if this is external memory
1095 ASSERT(!isExternalBuffer());
1096 // If StorageRedefined, we cannot use mState.getSize() to allocate a new buffer.
1097 ASSERT(updateType != BufferUpdateType::StorageRedefined);
1098 ASSERT(mBuffer.valid());
1099 ASSERT(mBuffer.getSize() >= bufferSize);
1100
1101 // Here we acquire a new BufferHelper and directUpdate() the new buffer.
1102 // If the subData size was less than the buffer's size we additionally enqueue
1103 // a GPU copy of the remaining regions from the old mBuffer to the new one.
1104 vk::BufferHelper prevBuffer;
1105 size_t offsetAfterSubdata = (updateOffset + updateSize);
1106 bool updateRegionBeforeSubData = mHasValidData && (updateOffset > 0);
1107 bool updateRegionAfterSubData = mHasValidData && (offsetAfterSubdata < bufferSize);
1108
1109 uint8_t *prevMapPtrBeforeSubData = nullptr;
1110 uint8_t *prevMapPtrAfterSubData = nullptr;
1111 if (updateRegionBeforeSubData || updateRegionAfterSubData)
1112 {
1113 prevBuffer = std::move(mBuffer);
1114
1115 // The total bytes that we need to copy from old buffer to new buffer
1116 size_t copySize = bufferSize - updateSize;
1117
1118 // If the buffer is host visible and the GPU is not writing to it, we use the CPU to do the
1119 // copy. We need to save the source buffer pointer before we acquire a new buffer.
1120 if (ShouldUseCPUToCopyData(contextVk, prevBuffer, copySize, bufferSize))
1121 {
1122 uint8_t *mapPointer = nullptr;
1123 // prevBuffer buffer will be recycled (or released and unmapped) by acquireBufferHelper
1124 ANGLE_TRY(prevBuffer.map(contextVk, &mapPointer));
1125 ASSERT(mapPointer);
1126 prevMapPtrBeforeSubData = mapPointer;
1127 prevMapPtrAfterSubData = mapPointer + offsetAfterSubdata;
1128 }
1129 }
1130
1131 ANGLE_TRY(acquireBufferHelper(contextVk, bufferSize, BufferUsageType::Dynamic));
1132 ANGLE_TRY(updateBuffer(contextVk, bufferSize, dataSource, updateSize, updateOffset));
1133
1134 constexpr int kMaxCopyRegions = 2;
1135 angle::FixedVector<VkBufferCopy, kMaxCopyRegions> copyRegions;
1136
1137 if (updateRegionBeforeSubData)
1138 {
1139 if (prevMapPtrBeforeSubData)
1140 {
1141 BufferDataSource beforeSrc = {};
1142 beforeSrc.data = prevMapPtrBeforeSubData;
1143
1144 ANGLE_TRY(directUpdate(contextVk, beforeSrc, updateOffset, 0));
1145 }
1146 else
1147 {
1148 copyRegions.push_back({prevBuffer.getOffset(), mBuffer.getOffset(), updateOffset});
1149 }
1150 }
1151
1152 if (updateRegionAfterSubData)
1153 {
1154 size_t copySize = bufferSize - offsetAfterSubdata;
1155 if (prevMapPtrAfterSubData)
1156 {
1157 BufferDataSource afterSrc = {};
1158 afterSrc.data = prevMapPtrAfterSubData;
1159
1160 ANGLE_TRY(directUpdate(contextVk, afterSrc, copySize, offsetAfterSubdata));
1161 }
1162 else
1163 {
1164 copyRegions.push_back({prevBuffer.getOffset() + offsetAfterSubdata,
1165 mBuffer.getOffset() + offsetAfterSubdata, copySize});
1166 }
1167 }
1168
1169 if (!copyRegions.empty())
1170 {
1171 ANGLE_TRY(CopyBuffers(contextVk, &prevBuffer, &mBuffer,
1172 static_cast<uint32_t>(copyRegions.size()), copyRegions.data()));
1173 }
1174
1175 if (prevBuffer.valid())
1176 {
1177 ANGLE_TRY(contextVk->releaseBufferAllocation(&prevBuffer));
1178 }
1179
1180 return angle::Result::Continue;
1181 }
1182
setDataImpl(ContextVk * contextVk,size_t bufferSize,const BufferDataSource & dataSource,size_t updateSize,size_t updateOffset,BufferUpdateType updateType)1183 angle::Result BufferVk::setDataImpl(ContextVk *contextVk,
1184 size_t bufferSize,
1185 const BufferDataSource &dataSource,
1186 size_t updateSize,
1187 size_t updateOffset,
1188 BufferUpdateType updateType)
1189 {
1190 // if the buffer is currently in use
1191 // if it isn't an external buffer and not a self-copy and sub data size meets threshold
1192 // acquire a new BufferHelper from the pool
1193 // else stage the update
1194 // else update the buffer directly
1195 if (isCurrentlyInUse(contextVk->getRenderer()))
1196 {
1197 // The acquire-and-update path creates a new buffer, which is sometimes more efficient than
1198 // trying to update the existing one. Firstly, this is not done in the following
1199 // situations:
1200 //
1201 // - For external buffers, the underlying storage cannot be reallocated.
1202 // - If storage has just been redefined, this path is not taken because a new buffer has
1203 // already been created by the caller. Besides, this path uses mState.getSize(), which the
1204 // frontend updates only after this call in situations where the storage may be redefined.
1205 // This could happen if the buffer memory is DEVICE_LOCAL and
1206 // renderer->getFeatures().allocateNonZeroMemory.enabled is true. In this case a
1207 // copyToBuffer is immediately issued after allocation and isCurrentlyInUse will be true.
1208 // - If this is a self copy through glCopyBufferSubData, |dataSource| will contain a
1209 // reference to |mBuffer|, in which case source information is lost after acquiring a new
1210 // buffer.
1211 //
1212 // Additionally, this path is taken only if either of the following conditions are true:
1213 //
1214 // - If BufferVk does not have any valid data. This means that there is no data to be
1215 // copied from the old buffer to the new one after acquiring it. This could happen when
1216 // the application calls glBufferData with the same size and we reuse the existing buffer
1217 // storage.
1218 // - If the buffer is used read-only in the current render pass. In this case, acquiring a
1219 // new buffer is preferred to avoid breaking the render pass.
1220 // - The update modifies a significant portion of the buffer
1221 // - The preferCPUForBufferSubData feature is enabled.
1222 //
1223 const bool canAcquireAndUpdate = !isExternalBuffer() &&
1224 updateType != BufferUpdateType::StorageRedefined &&
1225 !IsSelfCopy(dataSource, mBuffer);
1226 if (canAcquireAndUpdate &&
1227 (!mHasValidData || ShouldAvoidRenderPassBreakOnUpdate(contextVk, mBuffer, bufferSize) ||
1228 ShouldAllocateNewMemoryForUpdate(contextVk, updateSize, bufferSize)))
1229 {
1230 ANGLE_TRY(acquireAndUpdate(contextVk, bufferSize, dataSource, updateSize, updateOffset,
1231 updateType));
1232 }
1233 else
1234 {
1235 if (canAcquireAndUpdate && RenderPassUsesBufferForReadOnly(contextVk, mBuffer))
1236 {
1237 ANGLE_VK_PERF_WARNING(contextVk, GL_DEBUG_SEVERITY_LOW,
1238 "Breaking the render pass on small upload to large buffer");
1239 }
1240
1241 ANGLE_TRY(stagedUpdate(contextVk, dataSource, updateSize, updateOffset));
1242 }
1243 }
1244 else
1245 {
1246 ANGLE_TRY(updateBuffer(contextVk, bufferSize, dataSource, updateSize, updateOffset));
1247 }
1248
1249 // Update conversions.
1250 if (updateOffset == 0 && updateSize == bufferSize)
1251 {
1252 dataUpdated();
1253 }
1254 else
1255 {
1256 dataRangeUpdated(RangeDeviceSize(updateOffset, updateOffset + updateSize));
1257 }
1258
1259 return angle::Result::Continue;
1260 }
1261
getVertexConversionBuffer(vk::Renderer * renderer,const VertexConversionBuffer::CacheKey & cacheKey)1262 VertexConversionBuffer *BufferVk::getVertexConversionBuffer(
1263 vk::Renderer *renderer,
1264 const VertexConversionBuffer::CacheKey &cacheKey)
1265 {
1266 for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
1267 {
1268 if (buffer.match(cacheKey))
1269 {
1270 ASSERT(buffer.valid());
1271 return &buffer;
1272 }
1273 }
1274
1275 mVertexConversionBuffers.emplace_back(renderer, cacheKey);
1276 return &mVertexConversionBuffers.back();
1277 }
1278
dataRangeUpdated(const RangeDeviceSize & range)1279 void BufferVk::dataRangeUpdated(const RangeDeviceSize &range)
1280 {
1281 for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
1282 {
1283 buffer.addDirtyBufferRange(range);
1284 }
1285 // Now we have valid data
1286 mHasValidData = true;
1287 }
1288
dataUpdated()1289 void BufferVk::dataUpdated()
1290 {
1291 for (VertexConversionBuffer &buffer : mVertexConversionBuffers)
1292 {
1293 buffer.setEntireBufferDirty();
1294 }
1295 // Now we have valid data
1296 mHasValidData = true;
1297 }
1298
onDataChanged()1299 void BufferVk::onDataChanged()
1300 {
1301 dataUpdated();
1302 }
1303
acquireBufferHelper(ContextVk * contextVk,size_t sizeInBytes,BufferUsageType usageType)1304 angle::Result BufferVk::acquireBufferHelper(ContextVk *contextVk,
1305 size_t sizeInBytes,
1306 BufferUsageType usageType)
1307 {
1308 vk::Renderer *renderer = contextVk->getRenderer();
1309 size_t size = roundUpPow2(sizeInBytes, kBufferSizeGranularity);
1310 size_t alignment = renderer->getDefaultBufferAlignment();
1311
1312 if (mBuffer.valid())
1313 {
1314 ANGLE_TRY(contextVk->releaseBufferAllocation(&mBuffer));
1315 }
1316
1317 // Allocate the buffer directly
1318 ANGLE_TRY(
1319 contextVk->initBufferAllocation(&mBuffer, mMemoryTypeIndex, size, alignment, usageType));
1320
1321 // Tell the observers (front end) that a new buffer was created, so the necessary
1322 // dirty bits can be set. This allows the buffer views pointing to the old buffer to
1323 // be recreated and point to the new buffer, along with updating the descriptor sets
1324 // to use the new buffer.
1325 onStateChange(angle::SubjectMessage::InternalMemoryAllocationChanged);
1326
1327 return angle::Result::Continue;
1328 }
1329
isCurrentlyInUse(vk::Renderer * renderer) const1330 bool BufferVk::isCurrentlyInUse(vk::Renderer *renderer) const
1331 {
1332 return !renderer->hasResourceUseFinished(mBuffer.getResourceUse());
1333 }
1334
1335 // When a buffer is being completely changed, calculate whether it's better to allocate a new buffer
1336 // or overwrite the existing one.
calculateBufferUpdateTypeOnFullUpdate(vk::Renderer * renderer,size_t size,VkMemoryPropertyFlags memoryPropertyFlags,BufferUsageType usageType,const void * data) const1337 BufferUpdateType BufferVk::calculateBufferUpdateTypeOnFullUpdate(
1338 vk::Renderer *renderer,
1339 size_t size,
1340 VkMemoryPropertyFlags memoryPropertyFlags,
1341 BufferUsageType usageType,
1342 const void *data) const
1343 {
1344 // 0-sized updates should be no-op'd before this call.
1345 ASSERT(size > 0);
1346
1347 // If there is no existing buffer, this cannot be a content update.
1348 if (!mBuffer.valid())
1349 {
1350 return BufferUpdateType::StorageRedefined;
1351 }
1352
1353 const bool inUseAndRespecifiedWithoutData = data == nullptr && isCurrentlyInUse(renderer);
1354 bool redefineStorage = shouldRedefineStorage(renderer, usageType, memoryPropertyFlags, size);
1355
1356 // Create a new buffer if the buffer is busy and it's being redefined without data.
1357 // Additionally, a new buffer is created if any of the parameters change (memory type, usage,
1358 // size).
1359 return redefineStorage || inUseAndRespecifiedWithoutData ? BufferUpdateType::StorageRedefined
1360 : BufferUpdateType::ContentsUpdate;
1361 }
1362
shouldRedefineStorage(vk::Renderer * renderer,BufferUsageType usageType,VkMemoryPropertyFlags memoryPropertyFlags,size_t size) const1363 bool BufferVk::shouldRedefineStorage(vk::Renderer *renderer,
1364 BufferUsageType usageType,
1365 VkMemoryPropertyFlags memoryPropertyFlags,
1366 size_t size) const
1367 {
1368 if (mUsageType != usageType)
1369 {
1370 return true;
1371 }
1372
1373 if (mMemoryPropertyFlags != memoryPropertyFlags)
1374 {
1375 return true;
1376 }
1377
1378 if (size > mBuffer.getSize())
1379 {
1380 return true;
1381 }
1382 else
1383 {
1384 size_t paddedBufferSize =
1385 (renderer->getFeatures().padBuffersToMaxVertexAttribStride.enabled)
1386 ? (size + static_cast<size_t>(renderer->getMaxVertexAttribStride()))
1387 : size;
1388 size_t sizeInBytes = roundUpPow2(paddedBufferSize, kBufferSizeGranularity);
1389 size_t alignedSize = roundUp(sizeInBytes, renderer->getDefaultBufferAlignment());
1390 if (alignedSize > mBuffer.getSize())
1391 {
1392 return true;
1393 }
1394 }
1395
1396 return false;
1397 }
1398 } // namespace rx
1399