1 //
2 // Copyright 2016 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // VertexArrayVk.cpp:
7 // Implements the class methods for VertexArrayVk.
8 //
9
10 #include "libANGLE/renderer/vulkan/VertexArrayVk.h"
11
12 #include "common/debug.h"
13 #include "common/utilities.h"
14 #include "libANGLE/Context.h"
15 #include "libANGLE/renderer/vulkan/BufferVk.h"
16 #include "libANGLE/renderer/vulkan/ContextVk.h"
17 #include "libANGLE/renderer/vulkan/FramebufferVk.h"
18 #include "libANGLE/renderer/vulkan/vk_format_utils.h"
19 #include "libANGLE/renderer/vulkan/vk_renderer.h"
20 #include "libANGLE/renderer/vulkan/vk_resource.h"
21
22 namespace rx
23 {
24 namespace
25 {
26 constexpr int kStreamIndexBufferCachedIndexCount = 6;
27 constexpr int kMaxCachedStreamIndexBuffers = 4;
28 constexpr size_t kDefaultValueSize = sizeof(gl::VertexAttribCurrentValueData::Values);
29
BindingIsAligned(const angle::Format & angleFormat,VkDeviceSize offset,GLuint stride)30 ANGLE_INLINE bool BindingIsAligned(const angle::Format &angleFormat,
31 VkDeviceSize offset,
32 GLuint stride)
33 {
34 ASSERT(stride != 0);
35 GLuint mask = angleFormat.componentAlignmentMask;
36 if (mask != std::numeric_limits<GLuint>::max())
37 {
38 return ((offset & mask) == 0 && (stride & mask) == 0);
39 }
40 else
41 {
42 // To perform the GPU conversion for formats with components that aren't byte-aligned
43 // (for example, A2BGR10 or RGB10A2), one element has to be placed in 4 bytes to perform
44 // the compute shader. So, binding offset and stride has to be aligned to formatSize.
45 unsigned int formatSize = angleFormat.pixelBytes;
46 return (offset % formatSize == 0) && (stride % formatSize == 0);
47 }
48 }
49
ClientBindingAligned(const gl::VertexAttribute & attrib,GLuint stride,size_t alignment)50 ANGLE_INLINE bool ClientBindingAligned(const gl::VertexAttribute &attrib,
51 GLuint stride,
52 size_t alignment)
53 {
54 return reinterpret_cast<intptr_t>(attrib.pointer) % alignment == 0 && stride % alignment == 0;
55 }
56
ShouldCombineAttributes(vk::Renderer * renderer,const gl::VertexAttribute & attrib,const gl::VertexBinding & binding)57 bool ShouldCombineAttributes(vk::Renderer *renderer,
58 const gl::VertexAttribute &attrib,
59 const gl::VertexBinding &binding)
60 {
61 if (!renderer->getFeatures().enableMergeClientAttribBuffer.enabled)
62 {
63 return false;
64 }
65 const vk::Format &vertexFormat = renderer->getFormat(attrib.format->id);
66 return !vertexFormat.getVertexLoadRequiresConversion(false) && binding.getDivisor() == 0 &&
67 ClientBindingAligned(attrib, binding.getStride(),
68 vertexFormat.getVertexInputAlignment(false));
69 }
70
WarnOnVertexFormatConversion(ContextVk * contextVk,const vk::Format & vertexFormat,bool compressed,bool insertEventMarker)71 void WarnOnVertexFormatConversion(ContextVk *contextVk,
72 const vk::Format &vertexFormat,
73 bool compressed,
74 bool insertEventMarker)
75 {
76 if (!vertexFormat.getVertexLoadRequiresConversion(compressed))
77 {
78 return;
79 }
80
81 ANGLE_VK_PERF_WARNING(
82 contextVk, GL_DEBUG_SEVERITY_LOW,
83 "The Vulkan driver does not support vertex attribute format 0x%04X, emulating with 0x%04X",
84 vertexFormat.getIntendedFormat().glInternalFormat,
85 vertexFormat.getActualBufferFormat(compressed).glInternalFormat);
86 }
87
StreamVertexData(ContextVk * contextVk,vk::BufferHelper * dstBufferHelper,const uint8_t * srcData,size_t bytesToCopy,size_t dstOffset,size_t vertexCount,size_t srcStride,VertexCopyFunction vertexLoadFunction)88 angle::Result StreamVertexData(ContextVk *contextVk,
89 vk::BufferHelper *dstBufferHelper,
90 const uint8_t *srcData,
91 size_t bytesToCopy,
92 size_t dstOffset,
93 size_t vertexCount,
94 size_t srcStride,
95 VertexCopyFunction vertexLoadFunction)
96 {
97 vk::Renderer *renderer = contextVk->getRenderer();
98
99 // If the source pointer is null, it should not be accessed.
100 if (srcData == nullptr)
101 {
102 return angle::Result::Continue;
103 }
104
105 uint8_t *dst = dstBufferHelper->getMappedMemory() + dstOffset;
106
107 if (vertexLoadFunction != nullptr)
108 {
109 vertexLoadFunction(srcData, srcStride, vertexCount, dst);
110 }
111 else
112 {
113 memcpy(dst, srcData, bytesToCopy);
114 }
115
116 ANGLE_TRY(dstBufferHelper->flush(renderer));
117
118 return angle::Result::Continue;
119 }
120
StreamVertexDataWithDivisor(ContextVk * contextVk,vk::BufferHelper * dstBufferHelper,const uint8_t * srcData,size_t bytesToAllocate,size_t srcStride,size_t dstStride,VertexCopyFunction vertexLoadFunction,uint32_t divisor,size_t numSrcVertices)121 angle::Result StreamVertexDataWithDivisor(ContextVk *contextVk,
122 vk::BufferHelper *dstBufferHelper,
123 const uint8_t *srcData,
124 size_t bytesToAllocate,
125 size_t srcStride,
126 size_t dstStride,
127 VertexCopyFunction vertexLoadFunction,
128 uint32_t divisor,
129 size_t numSrcVertices)
130 {
131 vk::Renderer *renderer = contextVk->getRenderer();
132
133 uint8_t *dst = dstBufferHelper->getMappedMemory();
134
135 // Each source vertex is used `divisor` times before advancing. Clamp to avoid OOB reads.
136 size_t clampedSize = std::min(numSrcVertices * dstStride * divisor, bytesToAllocate);
137
138 ASSERT(clampedSize % dstStride == 0);
139 ASSERT(divisor > 0);
140
141 uint32_t srcVertexUseCount = 0;
142 for (size_t dataCopied = 0; dataCopied < clampedSize; dataCopied += dstStride)
143 {
144 vertexLoadFunction(srcData, srcStride, 1, dst);
145 srcVertexUseCount++;
146 if (srcVertexUseCount == divisor)
147 {
148 srcData += srcStride;
149 srcVertexUseCount = 0;
150 }
151 dst += dstStride;
152 }
153
154 // Satisfy robustness constraints (only if extension enabled)
155 if (contextVk->getExtensions().robustnessAny())
156 {
157 if (clampedSize < bytesToAllocate)
158 {
159 memset(dst, 0, bytesToAllocate - clampedSize);
160 }
161 }
162
163 ANGLE_TRY(dstBufferHelper->flush(renderer));
164
165 return angle::Result::Continue;
166 }
167
GetVertexCountForRange(GLint64 srcBufferBytes,uint32_t srcFormatSize,uint32_t srcVertexStride)168 size_t GetVertexCountForRange(GLint64 srcBufferBytes,
169 uint32_t srcFormatSize,
170 uint32_t srcVertexStride)
171 {
172 ASSERT(srcVertexStride != 0);
173 ASSERT(srcFormatSize != 0);
174
175 if (srcBufferBytes < srcFormatSize)
176 {
177 return 0;
178 }
179
180 size_t numVertices =
181 static_cast<size_t>(srcBufferBytes + srcVertexStride - 1) / srcVertexStride;
182 return numVertices;
183 }
184
GetVertexCount(BufferVk * srcBuffer,const gl::VertexBinding & binding,uint32_t srcFormatSize)185 size_t GetVertexCount(BufferVk *srcBuffer, const gl::VertexBinding &binding, uint32_t srcFormatSize)
186 {
187 // Bytes usable for vertex data.
188 GLint64 bytes = srcBuffer->getSize() - binding.getOffset();
189 GLuint stride = binding.getStride();
190 if (stride == 0)
191 {
192 stride = srcFormatSize;
193 }
194 return GetVertexCountForRange(bytes, srcFormatSize, stride);
195 }
196
CalculateMaxVertexCountForConversion(ContextVk * contextVk,BufferVk * srcBuffer,VertexConversionBuffer * conversion,const angle::Format & srcFormat,const angle::Format & dstFormat,size_t * maxNumVerticesOut)197 angle::Result CalculateMaxVertexCountForConversion(ContextVk *contextVk,
198 BufferVk *srcBuffer,
199 VertexConversionBuffer *conversion,
200 const angle::Format &srcFormat,
201 const angle::Format &dstFormat,
202 size_t *maxNumVerticesOut)
203 {
204 // Initialize numVertices to 0
205 *maxNumVerticesOut = 0;
206
207 unsigned srcFormatSize = srcFormat.pixelBytes;
208 unsigned dstFormatSize = dstFormat.pixelBytes;
209
210 uint32_t srcStride = conversion->getCacheKey().stride;
211 uint32_t dstStride = dstFormatSize;
212
213 ASSERT(srcStride != 0);
214 ASSERT(conversion->dirty());
215
216 // Start the range with the range from the the beginning of the buffer to the end of
217 // buffer. Then scissor it with the dirtyRange.
218 size_t srcOffset = conversion->getCacheKey().offset;
219 GLint64 srcLength = srcBuffer->getSize() - srcOffset;
220
221 // The max number of vertices from binding to the end of the buffer
222 size_t maxNumVertices = GetVertexCountForRange(srcLength, srcFormatSize, srcStride);
223 if (maxNumVertices == 0)
224 {
225 return angle::Result::Continue;
226 }
227
228 // Allocate buffer for results
229 vk::MemoryHostVisibility hostVisible = conversion->getCacheKey().hostVisible
230 ? vk::MemoryHostVisibility::Visible
231 : vk::MemoryHostVisibility::NonVisible;
232 ANGLE_TRY(contextVk->initBufferForVertexConversion(conversion, maxNumVertices * dstStride,
233 hostVisible));
234
235 // Calculate numVertices to convert
236 *maxNumVerticesOut = GetVertexCountForRange(srcLength, srcFormatSize, srcStride);
237
238 return angle::Result::Continue;
239 }
240
CalculateOffsetAndVertexCountForDirtyRange(BufferVk * bufferVk,VertexConversionBuffer * conversion,const angle::Format & srcFormat,const angle::Format & dstFormat,const RangeDeviceSize & dirtyRange,uint32_t * srcOffsetOut,uint32_t * dstOffsetOut,uint32_t * numVerticesOut)241 void CalculateOffsetAndVertexCountForDirtyRange(BufferVk *bufferVk,
242 VertexConversionBuffer *conversion,
243 const angle::Format &srcFormat,
244 const angle::Format &dstFormat,
245 const RangeDeviceSize &dirtyRange,
246 uint32_t *srcOffsetOut,
247 uint32_t *dstOffsetOut,
248 uint32_t *numVerticesOut)
249 {
250 ASSERT(!dirtyRange.empty());
251 unsigned srcFormatSize = srcFormat.pixelBytes;
252 unsigned dstFormatSize = dstFormat.pixelBytes;
253
254 uint32_t srcStride = conversion->getCacheKey().stride;
255 uint32_t dstStride = dstFormatSize;
256
257 ASSERT(srcStride != 0);
258 ASSERT(conversion->dirty());
259
260 // Start the range with the range from the the beginning of the buffer to the end of
261 // buffer. Then scissor it with the dirtyRange.
262 size_t srcOffset = conversion->getCacheKey().offset;
263 size_t dstOffset = 0;
264
265 GLint64 srcLength = bufferVk->getSize() - srcOffset;
266
267 // Adjust offset to the begining of the dirty range
268 if (dirtyRange.low() > srcOffset)
269 {
270 size_t vertexCountToSkip = (static_cast<size_t>(dirtyRange.low()) - srcOffset) / srcStride;
271 size_t srcBytesToSkip = vertexCountToSkip * srcStride;
272 size_t dstBytesToSkip = vertexCountToSkip * dstStride;
273 srcOffset += srcBytesToSkip;
274 srcLength -= srcBytesToSkip;
275 dstOffset += dstBytesToSkip;
276 }
277
278 // Adjust dstOffset to align to 4 bytes. The GPU convert code path always write a uint32_t and
279 // must aligned at 4 bytes. We could possibly make it able to store at unaligned uint32_t but
280 // performance will be worse than just convert a few extra data.
281 while ((dstOffset % 4) != 0)
282 {
283 dstOffset -= dstStride;
284 srcOffset -= srcStride;
285 srcLength += srcStride;
286 }
287
288 // Adjust length
289 if (dirtyRange.high() < static_cast<VkDeviceSize>(bufferVk->getSize()))
290 {
291 srcLength = dirtyRange.high() - srcOffset;
292 }
293
294 // Calculate numVertices to convert
295 size_t numVertices = GetVertexCountForRange(srcLength, srcFormatSize, srcStride);
296
297 *numVerticesOut = static_cast<uint32_t>(numVertices);
298 *srcOffsetOut = static_cast<uint32_t>(srcOffset);
299 *dstOffsetOut = static_cast<uint32_t>(dstOffset);
300 }
301 } // anonymous namespace
302
VertexArrayVk(ContextVk * contextVk,const gl::VertexArrayState & state)303 VertexArrayVk::VertexArrayVk(ContextVk *contextVk, const gl::VertexArrayState &state)
304 : VertexArrayImpl(state),
305 mCurrentArrayBufferHandles{},
306 mCurrentArrayBufferOffsets{},
307 mCurrentArrayBufferRelativeOffsets{},
308 mCurrentArrayBuffers{},
309 mCurrentArrayBufferStrides{},
310 mCurrentArrayBufferDivisors{},
311 mCurrentElementArrayBuffer(nullptr),
312 mLineLoopHelper(contextVk->getRenderer()),
313 mDirtyLineLoopTranslation(true)
314 {
315 vk::BufferHelper &emptyBuffer = contextVk->getEmptyBuffer();
316
317 mCurrentArrayBufferHandles.fill(emptyBuffer.getBuffer().getHandle());
318 mCurrentArrayBufferOffsets.fill(0);
319 mCurrentArrayBufferRelativeOffsets.fill(0);
320 mCurrentArrayBuffers.fill(&emptyBuffer);
321 mCurrentArrayBufferStrides.fill(0);
322 mCurrentArrayBufferDivisors.fill(0);
323
324 mBindingDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_BINDING_DIVISOR);
325 if (!contextVk->getFeatures().useVertexInputBindingStrideDynamicState.enabled)
326 {
327 mBindingDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_BINDING_STRIDE);
328 }
329
330 // All but DIRTY_ATTRIB_POINTER_BUFFER requires graphics pipeline update
331 mAttribDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_ATTRIB_ENABLED);
332 mAttribDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_ATTRIB_POINTER);
333 mAttribDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_ATTRIB_FORMAT);
334 mAttribDirtyBitsRequiresPipelineUpdate.set(gl::VertexArray::DIRTY_ATTRIB_BINDING);
335 }
336
~VertexArrayVk()337 VertexArrayVk::~VertexArrayVk() {}
338
destroy(const gl::Context * context)339 void VertexArrayVk::destroy(const gl::Context *context)
340 {
341 ContextVk *contextVk = vk::GetImpl(context);
342
343 for (std::unique_ptr<vk::BufferHelper> &buffer : mCachedStreamIndexBuffers)
344 {
345 buffer->release(contextVk);
346 }
347
348 mStreamedIndexData.release(contextVk);
349 mTranslatedByteIndexData.release(contextVk);
350 mTranslatedByteIndirectData.release(contextVk);
351 mLineLoopHelper.release(contextVk);
352 }
353
convertIndexBufferGPU(ContextVk * contextVk,BufferVk * bufferVk,const void * indices)354 angle::Result VertexArrayVk::convertIndexBufferGPU(ContextVk *contextVk,
355 BufferVk *bufferVk,
356 const void *indices)
357 {
358 intptr_t offsetIntoSrcData = reinterpret_cast<intptr_t>(indices);
359 size_t srcDataSize = static_cast<size_t>(bufferVk->getSize()) - offsetIntoSrcData;
360
361 // Allocate buffer for results
362 ANGLE_TRY(contextVk->initBufferForVertexConversion(&mTranslatedByteIndexData,
363 sizeof(GLushort) * srcDataSize,
364 vk::MemoryHostVisibility::NonVisible));
365 mCurrentElementArrayBuffer = mTranslatedByteIndexData.getBuffer();
366
367 vk::BufferHelper *dst = mTranslatedByteIndexData.getBuffer();
368 vk::BufferHelper *src = &bufferVk->getBuffer();
369
370 // Copy relevant section of the source into destination at allocated offset. Note that the
371 // offset returned by allocate() above is in bytes. As is the indices offset pointer.
372 UtilsVk::ConvertIndexParameters params = {};
373 params.srcOffset = static_cast<uint32_t>(offsetIntoSrcData);
374 params.dstOffset = 0;
375 params.maxIndex = static_cast<uint32_t>(bufferVk->getSize());
376
377 ANGLE_TRY(contextVk->getUtils().convertIndexBuffer(contextVk, dst, src, params));
378 mTranslatedByteIndexData.clearDirty();
379
380 return angle::Result::Continue;
381 }
382
convertIndexBufferIndirectGPU(ContextVk * contextVk,vk::BufferHelper * srcIndirectBuf,VkDeviceSize srcIndirectBufOffset,vk::BufferHelper ** indirectBufferVkOut)383 angle::Result VertexArrayVk::convertIndexBufferIndirectGPU(ContextVk *contextVk,
384 vk::BufferHelper *srcIndirectBuf,
385 VkDeviceSize srcIndirectBufOffset,
386 vk::BufferHelper **indirectBufferVkOut)
387 {
388 size_t srcDataSize = static_cast<size_t>(mCurrentElementArrayBuffer->getSize());
389 ASSERT(mCurrentElementArrayBuffer ==
390 &vk::GetImpl(getState().getElementArrayBuffer())->getBuffer());
391
392 vk::BufferHelper *srcIndexBuf = mCurrentElementArrayBuffer;
393
394 // Allocate buffer for results
395 ANGLE_TRY(contextVk->initBufferForVertexConversion(&mTranslatedByteIndexData,
396 sizeof(GLushort) * srcDataSize,
397 vk::MemoryHostVisibility::NonVisible));
398 vk::BufferHelper *dstIndexBuf = mTranslatedByteIndexData.getBuffer();
399
400 ANGLE_TRY(contextVk->initBufferForVertexConversion(&mTranslatedByteIndirectData,
401 sizeof(VkDrawIndexedIndirectCommand),
402 vk::MemoryHostVisibility::NonVisible));
403 vk::BufferHelper *dstIndirectBuf = mTranslatedByteIndirectData.getBuffer();
404
405 // Save new element array buffer
406 mCurrentElementArrayBuffer = dstIndexBuf;
407 // Tell caller what new indirect buffer is
408 *indirectBufferVkOut = dstIndirectBuf;
409
410 // Copy relevant section of the source into destination at allocated offset. Note that the
411 // offset returned by allocate() above is in bytes. As is the indices offset pointer.
412 UtilsVk::ConvertIndexIndirectParameters params = {};
413 params.srcIndirectBufOffset = static_cast<uint32_t>(srcIndirectBufOffset);
414 params.srcIndexBufOffset = 0;
415 params.dstIndexBufOffset = 0;
416 params.maxIndex = static_cast<uint32_t>(srcDataSize);
417 params.dstIndirectBufOffset = 0;
418
419 ANGLE_TRY(contextVk->getUtils().convertIndexIndirectBuffer(
420 contextVk, srcIndirectBuf, srcIndexBuf, dstIndirectBuf, dstIndexBuf, params));
421
422 mTranslatedByteIndexData.clearDirty();
423 mTranslatedByteIndirectData.clearDirty();
424
425 return angle::Result::Continue;
426 }
427
handleLineLoopIndexIndirect(ContextVk * contextVk,gl::DrawElementsType glIndexType,vk::BufferHelper * srcIndexBuffer,vk::BufferHelper * srcIndirectBuffer,VkDeviceSize indirectBufferOffset,vk::BufferHelper ** indexBufferOut,vk::BufferHelper ** indirectBufferOut)428 angle::Result VertexArrayVk::handleLineLoopIndexIndirect(ContextVk *contextVk,
429 gl::DrawElementsType glIndexType,
430 vk::BufferHelper *srcIndexBuffer,
431 vk::BufferHelper *srcIndirectBuffer,
432 VkDeviceSize indirectBufferOffset,
433 vk::BufferHelper **indexBufferOut,
434 vk::BufferHelper **indirectBufferOut)
435 {
436 return mLineLoopHelper.streamIndicesIndirect(contextVk, glIndexType, srcIndexBuffer,
437 srcIndirectBuffer, indirectBufferOffset,
438 indexBufferOut, indirectBufferOut);
439 }
440
handleLineLoopIndirectDraw(const gl::Context * context,vk::BufferHelper * indirectBufferVk,VkDeviceSize indirectBufferOffset,vk::BufferHelper ** indexBufferOut,vk::BufferHelper ** indirectBufferOut)441 angle::Result VertexArrayVk::handleLineLoopIndirectDraw(const gl::Context *context,
442 vk::BufferHelper *indirectBufferVk,
443 VkDeviceSize indirectBufferOffset,
444 vk::BufferHelper **indexBufferOut,
445 vk::BufferHelper **indirectBufferOut)
446 {
447 size_t maxVertexCount = 0;
448 ContextVk *contextVk = vk::GetImpl(context);
449 const gl::AttributesMask activeAttribs =
450 context->getStateCache().getActiveBufferedAttribsMask();
451
452 const auto &attribs = mState.getVertexAttributes();
453 const auto &bindings = mState.getVertexBindings();
454
455 for (size_t attribIndex : activeAttribs)
456 {
457 const gl::VertexAttribute &attrib = attribs[attribIndex];
458 ASSERT(attrib.enabled);
459 VkDeviceSize bufSize = getCurrentArrayBuffers()[attribIndex]->getSize();
460 const gl::VertexBinding &binding = bindings[attrib.bindingIndex];
461 size_t stride = binding.getStride();
462 size_t vertexCount = static_cast<size_t>(bufSize / stride);
463 if (vertexCount > maxVertexCount)
464 {
465 maxVertexCount = vertexCount;
466 }
467 }
468 ANGLE_TRY(mLineLoopHelper.streamArrayIndirect(contextVk, maxVertexCount + 1, indirectBufferVk,
469 indirectBufferOffset, indexBufferOut,
470 indirectBufferOut));
471
472 return angle::Result::Continue;
473 }
474
convertIndexBufferCPU(ContextVk * contextVk,gl::DrawElementsType indexType,size_t indexCount,const void * sourcePointer,BufferBindingDirty * bindingDirty)475 angle::Result VertexArrayVk::convertIndexBufferCPU(ContextVk *contextVk,
476 gl::DrawElementsType indexType,
477 size_t indexCount,
478 const void *sourcePointer,
479 BufferBindingDirty *bindingDirty)
480 {
481 ASSERT(!mState.getElementArrayBuffer() || indexType == gl::DrawElementsType::UnsignedByte);
482 vk::Renderer *renderer = contextVk->getRenderer();
483 size_t elementSize = contextVk->getVkIndexTypeSize(indexType);
484 const size_t amount = elementSize * indexCount;
485
486 // Applications often time draw a quad with two triangles. This is try to catch all the
487 // common used element array buffer with pre-created BufferHelper objects to improve
488 // performance.
489 if (indexCount == kStreamIndexBufferCachedIndexCount &&
490 indexType == gl::DrawElementsType::UnsignedShort)
491 {
492 for (std::unique_ptr<vk::BufferHelper> &buffer : mCachedStreamIndexBuffers)
493 {
494 void *ptr = buffer->getMappedMemory();
495 if (memcmp(sourcePointer, ptr, amount) == 0)
496 {
497 // Found a matching cached buffer, use the cached internal index buffer.
498 *bindingDirty = mCurrentElementArrayBuffer == buffer.get()
499 ? BufferBindingDirty::No
500 : BufferBindingDirty::Yes;
501 mCurrentElementArrayBuffer = buffer.get();
502 return angle::Result::Continue;
503 }
504 }
505
506 // If we still have capacity, cache this index buffer for future use.
507 if (mCachedStreamIndexBuffers.size() < kMaxCachedStreamIndexBuffers)
508 {
509 std::unique_ptr<vk::BufferHelper> buffer = std::make_unique<vk::BufferHelper>();
510 ANGLE_TRY(contextVk->initBufferAllocation(
511 buffer.get(),
512 renderer->getVertexConversionBufferMemoryTypeIndex(
513 vk::MemoryHostVisibility::Visible),
514 amount, renderer->getVertexConversionBufferAlignment(), BufferUsageType::Static));
515 memcpy(buffer->getMappedMemory(), sourcePointer, amount);
516 ANGLE_TRY(buffer->flush(renderer));
517
518 mCachedStreamIndexBuffers.push_back(std::move(buffer));
519
520 *bindingDirty = BufferBindingDirty::Yes;
521 mCurrentElementArrayBuffer = mCachedStreamIndexBuffers.back().get();
522 return angle::Result::Continue;
523 }
524 }
525
526 ANGLE_TRY(contextVk->initBufferForVertexConversion(&mStreamedIndexData, amount,
527 vk::MemoryHostVisibility::Visible));
528 mCurrentElementArrayBuffer = mStreamedIndexData.getBuffer();
529 GLubyte *dst = mCurrentElementArrayBuffer->getMappedMemory();
530 *bindingDirty = BufferBindingDirty::Yes;
531
532 if (contextVk->shouldConvertUint8VkIndexType(indexType))
533 {
534 // Unsigned bytes don't have direct support in Vulkan so we have to expand the
535 // memory to a GLushort.
536 const GLubyte *in = static_cast<const GLubyte *>(sourcePointer);
537 GLushort *expandedDst = reinterpret_cast<GLushort *>(dst);
538 bool primitiveRestart = contextVk->getState().isPrimitiveRestartEnabled();
539
540 constexpr GLubyte kUnsignedByteRestartValue = 0xFF;
541 constexpr GLushort kUnsignedShortRestartValue = 0xFFFF;
542
543 if (primitiveRestart)
544 {
545 for (size_t index = 0; index < indexCount; index++)
546 {
547 GLushort value = static_cast<GLushort>(in[index]);
548 if (in[index] == kUnsignedByteRestartValue)
549 {
550 // Convert from 8-bit restart value to 16-bit restart value
551 value = kUnsignedShortRestartValue;
552 }
553 expandedDst[index] = value;
554 }
555 }
556 else
557 {
558 // Fast path for common case.
559 for (size_t index = 0; index < indexCount; index++)
560 {
561 expandedDst[index] = static_cast<GLushort>(in[index]);
562 }
563 }
564 }
565 else
566 {
567 // The primitive restart value is the same for OpenGL and Vulkan,
568 // so there's no need to perform any conversion.
569 memcpy(dst, sourcePointer, amount);
570 }
571
572 mStreamedIndexData.clearDirty();
573
574 return mCurrentElementArrayBuffer->flush(contextVk->getRenderer());
575 }
576
577 // We assume the buffer is completely full of the same kind of data and convert
578 // and/or align it as we copy it to a buffer. The assumption could be wrong
579 // but the alternative of copying it piecemeal on each draw would have a lot more
580 // overhead.
convertVertexBufferGPU(ContextVk * contextVk,BufferVk * srcBuffer,VertexConversionBuffer * conversion,const angle::Format & srcFormat,const angle::Format & dstFormat)581 angle::Result VertexArrayVk::convertVertexBufferGPU(ContextVk *contextVk,
582 BufferVk *srcBuffer,
583 VertexConversionBuffer *conversion,
584 const angle::Format &srcFormat,
585 const angle::Format &dstFormat)
586 {
587 uint32_t srcStride = conversion->getCacheKey().stride;
588 ASSERT(srcStride % (srcFormat.pixelBytes / srcFormat.channelCount) == 0);
589
590 size_t maxNumVertices;
591 ANGLE_TRY(CalculateMaxVertexCountForConversion(contextVk, srcBuffer, conversion, srcFormat,
592 dstFormat, &maxNumVertices));
593 if (maxNumVertices == 0)
594 {
595 return angle::Result::Continue;
596 }
597
598 vk::BufferHelper *srcBufferHelper = &srcBuffer->getBuffer();
599 vk::BufferHelper *dstBuffer = conversion->getBuffer();
600
601 UtilsVk::OffsetAndVertexCounts additionalOffsetVertexCounts;
602
603 UtilsVk::ConvertVertexParameters params;
604 params.srcFormat = &srcFormat;
605 params.dstFormat = &dstFormat;
606 params.srcStride = srcStride;
607 params.vertexCount = 0;
608
609 if (conversion->isEntireBufferDirty())
610 {
611 params.vertexCount = static_cast<uint32_t>(maxNumVertices);
612 params.srcOffset = static_cast<uint32_t>(conversion->getCacheKey().offset);
613 params.dstOffset = 0;
614 }
615 else
616 {
617 // dirtyRanges may overlap with each other. Try to do a quick merge to reduce the number of
618 // dispatch calls as well as avoid redundant conversion in the overlapped area.
619 conversion->consolidateDirtyRanges();
620
621 const std::vector<RangeDeviceSize> &dirtyRanges = conversion->getDirtyBufferRanges();
622 additionalOffsetVertexCounts.reserve(dirtyRanges.size());
623
624 for (const RangeDeviceSize &dirtyRange : dirtyRanges)
625 {
626 if (dirtyRange.empty())
627 {
628 // consolidateDirtyRanges may end up with invalid range if it gets merged.
629 continue;
630 }
631
632 uint32_t srcOffset, dstOffset, numVertices;
633 CalculateOffsetAndVertexCountForDirtyRange(srcBuffer, conversion, srcFormat, dstFormat,
634 dirtyRange, &srcOffset, &dstOffset,
635 &numVertices);
636 if (params.vertexCount == 0)
637 {
638 params.vertexCount = numVertices;
639 params.srcOffset = srcOffset;
640 params.dstOffset = dstOffset;
641 }
642 else
643 {
644 additionalOffsetVertexCounts.emplace_back();
645 additionalOffsetVertexCounts.back().srcOffset = srcOffset;
646 additionalOffsetVertexCounts.back().dstOffset = dstOffset;
647 additionalOffsetVertexCounts.back().vertexCount = numVertices;
648 }
649 }
650 }
651 ANGLE_TRY(contextVk->getUtils().convertVertexBuffer(contextVk, dstBuffer, srcBufferHelper,
652 params, additionalOffsetVertexCounts));
653 conversion->clearDirty();
654
655 return angle::Result::Continue;
656 }
657
convertVertexBufferCPU(ContextVk * contextVk,BufferVk * srcBuffer,VertexConversionBuffer * conversion,const angle::Format & srcFormat,const angle::Format & dstFormat,const VertexCopyFunction vertexLoadFunction)658 angle::Result VertexArrayVk::convertVertexBufferCPU(ContextVk *contextVk,
659 BufferVk *srcBuffer,
660 VertexConversionBuffer *conversion,
661 const angle::Format &srcFormat,
662 const angle::Format &dstFormat,
663 const VertexCopyFunction vertexLoadFunction)
664 {
665 ANGLE_TRACE_EVENT0("gpu.angle", "VertexArrayVk::convertVertexBufferCpu");
666
667 size_t maxNumVertices;
668 ANGLE_TRY(CalculateMaxVertexCountForConversion(contextVk, srcBuffer, conversion, srcFormat,
669 dstFormat, &maxNumVertices));
670 if (maxNumVertices == 0)
671 {
672 return angle::Result::Continue;
673 }
674
675 uint8_t *src = nullptr;
676 ANGLE_TRY(srcBuffer->mapImpl(contextVk, GL_MAP_READ_BIT, reinterpret_cast<void **>(&src)));
677 uint32_t srcStride = conversion->getCacheKey().stride;
678
679 if (conversion->isEntireBufferDirty())
680 {
681 size_t srcOffset = conversion->getCacheKey().offset;
682 size_t dstOffset = 0;
683 const uint8_t *srcBytes = src + srcOffset;
684 size_t bytesToCopy = maxNumVertices * dstFormat.pixelBytes;
685 ANGLE_TRY(StreamVertexData(contextVk, conversion->getBuffer(), srcBytes, bytesToCopy,
686 dstOffset, maxNumVertices, srcStride, vertexLoadFunction));
687 }
688 else
689 {
690 // dirtyRanges may overlap with each other. Try to do a quick merge to avoid redundant
691 // conversion in the overlapped area.
692 conversion->consolidateDirtyRanges();
693
694 const std::vector<RangeDeviceSize> &dirtyRanges = conversion->getDirtyBufferRanges();
695 for (const RangeDeviceSize &dirtyRange : dirtyRanges)
696 {
697 if (dirtyRange.empty())
698 {
699 // consolidateDirtyRanges may end up with invalid range if it gets merged.
700 continue;
701 }
702
703 uint32_t srcOffset, dstOffset, numVertices;
704 CalculateOffsetAndVertexCountForDirtyRange(srcBuffer, conversion, srcFormat, dstFormat,
705 dirtyRange, &srcOffset, &dstOffset,
706 &numVertices);
707
708 if (numVertices > 0)
709 {
710 const uint8_t *srcBytes = src + srcOffset;
711 size_t bytesToCopy = maxNumVertices * dstFormat.pixelBytes;
712 ANGLE_TRY(StreamVertexData(contextVk, conversion->getBuffer(), srcBytes,
713 bytesToCopy, dstOffset, maxNumVertices, srcStride,
714 vertexLoadFunction));
715 }
716 }
717 }
718
719 conversion->clearDirty();
720 ANGLE_TRY(srcBuffer->unmapImpl(contextVk));
721
722 return angle::Result::Continue;
723 }
724
updateCurrentElementArrayBuffer()725 void VertexArrayVk::updateCurrentElementArrayBuffer()
726 {
727 ASSERT(mState.getElementArrayBuffer() != nullptr);
728 ASSERT(mState.getElementArrayBuffer()->getSize() > 0);
729
730 BufferVk *bufferVk = vk::GetImpl(mState.getElementArrayBuffer());
731 mCurrentElementArrayBuffer = &bufferVk->getBuffer();
732 }
733
syncState(const gl::Context * context,const gl::VertexArray::DirtyBits & dirtyBits,gl::VertexArray::DirtyAttribBitsArray * attribBits,gl::VertexArray::DirtyBindingBitsArray * bindingBits)734 angle::Result VertexArrayVk::syncState(const gl::Context *context,
735 const gl::VertexArray::DirtyBits &dirtyBits,
736 gl::VertexArray::DirtyAttribBitsArray *attribBits,
737 gl::VertexArray::DirtyBindingBitsArray *bindingBits)
738 {
739 ASSERT(dirtyBits.any());
740
741 ContextVk *contextVk = vk::GetImpl(context);
742 contextVk->getPerfCounters().vertexArraySyncStateCalls++;
743
744 const std::vector<gl::VertexAttribute> &attribs = mState.getVertexAttributes();
745 const std::vector<gl::VertexBinding> &bindings = mState.getVertexBindings();
746
747 for (auto iter = dirtyBits.begin(), endIter = dirtyBits.end(); iter != endIter; ++iter)
748 {
749 size_t dirtyBit = *iter;
750 switch (dirtyBit)
751 {
752 case gl::VertexArray::DIRTY_BIT_LOST_OBSERVATION:
753 {
754 // If vertex array was not observing while unbound, we need to check buffer's
755 // internal storage and take action if buffer storage has changed while not
756 // observing.
757 if (contextVk->getFeatures().compressVertexData.enabled ||
758 mContentsObservers->any())
759 {
760 // We may have lost buffer content change when it became non-current. In that
761 // case we always assume buffer has changed. If compressVertexData.enabled is
762 // true, it also depends on buffer usage which may have changed.
763 iter.setLaterBits(
764 gl::VertexArray::DirtyBits(mState.getBufferBindingMask().to_ulong()
765 << gl::VertexArray::DIRTY_BIT_BINDING_0));
766 }
767 else
768 {
769 for (size_t bindingIndex : mState.getBufferBindingMask())
770 {
771 const gl::Buffer *bufferGL = bindings[bindingIndex].getBuffer().get();
772 vk::BufferSerial bufferSerial = vk::GetImpl(bufferGL)->getBufferSerial();
773 for (size_t attribIndex : bindings[bindingIndex].getBoundAttributesMask())
774 {
775 if (attribs[attribIndex].enabled &&
776 (!bufferSerial.valid() ||
777 bufferSerial != mCurrentArrayBufferSerial[attribIndex]))
778 {
779 iter.setLaterBit(gl::VertexArray::DIRTY_BIT_BINDING_0 +
780 bindingIndex);
781 break;
782 }
783 }
784 }
785 }
786 break;
787 }
788
789 case gl::VertexArray::DIRTY_BIT_ELEMENT_ARRAY_BUFFER:
790 case gl::VertexArray::DIRTY_BIT_ELEMENT_ARRAY_BUFFER_DATA:
791 {
792 gl::Buffer *bufferGL = mState.getElementArrayBuffer();
793 if (bufferGL && bufferGL->getSize() > 0)
794 {
795 // Note that just updating buffer data may still result in a new
796 // vk::BufferHelper allocation.
797 updateCurrentElementArrayBuffer();
798 }
799 else
800 {
801 mCurrentElementArrayBuffer = nullptr;
802 }
803
804 mLineLoopBufferFirstIndex.reset();
805 mLineLoopBufferLastIndex.reset();
806 ANGLE_TRY(contextVk->onIndexBufferChange(mCurrentElementArrayBuffer));
807 mDirtyLineLoopTranslation = true;
808 break;
809 }
810
811 #define ANGLE_VERTEX_DIRTY_ATTRIB_FUNC(INDEX) \
812 case gl::VertexArray::DIRTY_BIT_ATTRIB_0 + INDEX: \
813 { \
814 gl::VertexArray::DirtyAttribBits dirtyAttribBitsRequiresPipelineUpdate = \
815 (*attribBits)[INDEX] & mAttribDirtyBitsRequiresPipelineUpdate; \
816 const bool bufferOnly = dirtyAttribBitsRequiresPipelineUpdate.none(); \
817 ANGLE_TRY(syncDirtyAttrib(contextVk, attribs[INDEX], \
818 bindings[attribs[INDEX].bindingIndex], INDEX, bufferOnly)); \
819 (*attribBits)[INDEX].reset(); \
820 break; \
821 }
822
823 ANGLE_VERTEX_INDEX_CASES(ANGLE_VERTEX_DIRTY_ATTRIB_FUNC)
824
825 // Since BINDING already implies DATA and ATTRIB change, we remove these here to avoid redundant
826 // processing.
827 #define ANGLE_VERTEX_DIRTY_BINDING_FUNC(INDEX) \
828 case gl::VertexArray::DIRTY_BIT_BINDING_0 + INDEX: \
829 { \
830 gl::VertexArray::DirtyBindingBits dirtyBindingBitsRequirePipelineUpdate = \
831 (*bindingBits)[INDEX] & mBindingDirtyBitsRequiresPipelineUpdate; \
832 \
833 for (size_t attribIndex : bindings[INDEX].getBoundAttributesMask()) \
834 { \
835 gl::VertexArray::DirtyAttribBits dirtyAttribBitsRequiresPipelineUpdate = \
836 (*attribBits)[attribIndex] & mAttribDirtyBitsRequiresPipelineUpdate; \
837 const bool bufferOnly = dirtyBindingBitsRequirePipelineUpdate.none() && \
838 dirtyAttribBitsRequiresPipelineUpdate.none(); \
839 ANGLE_TRY(syncDirtyAttrib(contextVk, attribs[attribIndex], bindings[INDEX], \
840 attribIndex, bufferOnly)); \
841 iter.resetLaterBit(gl::VertexArray::DIRTY_BIT_BUFFER_DATA_0 + attribIndex); \
842 iter.resetLaterBit(gl::VertexArray::DIRTY_BIT_ATTRIB_0 + attribIndex); \
843 (*attribBits)[attribIndex].reset(); \
844 } \
845 (*bindingBits)[INDEX].reset(); \
846 break; \
847 }
848
849 ANGLE_VERTEX_INDEX_CASES(ANGLE_VERTEX_DIRTY_BINDING_FUNC)
850
851 #define ANGLE_VERTEX_DIRTY_BUFFER_DATA_FUNC(INDEX) \
852 case gl::VertexArray::DIRTY_BIT_BUFFER_DATA_0 + INDEX: \
853 ANGLE_TRY(syncDirtyAttrib(contextVk, attribs[INDEX], \
854 bindings[attribs[INDEX].bindingIndex], INDEX, false)); \
855 iter.resetLaterBit(gl::VertexArray::DIRTY_BIT_ATTRIB_0 + INDEX); \
856 (*attribBits)[INDEX].reset(); \
857 break;
858
859 ANGLE_VERTEX_INDEX_CASES(ANGLE_VERTEX_DIRTY_BUFFER_DATA_FUNC)
860
861 default:
862 UNREACHABLE();
863 break;
864 }
865 }
866
867 return angle::Result::Continue;
868 } // namespace rx
869
870 #undef ANGLE_VERTEX_DIRTY_ATTRIB_FUNC
871 #undef ANGLE_VERTEX_DIRTY_BINDING_FUNC
872 #undef ANGLE_VERTEX_DIRTY_BUFFER_DATA_FUNC
873
setDefaultPackedInput(ContextVk * contextVk,size_t attribIndex,angle::FormatID * formatOut)874 ANGLE_INLINE angle::Result VertexArrayVk::setDefaultPackedInput(ContextVk *contextVk,
875 size_t attribIndex,
876 angle::FormatID *formatOut)
877 {
878 const gl::State &glState = contextVk->getState();
879 const gl::VertexAttribCurrentValueData &defaultValue =
880 glState.getVertexAttribCurrentValues()[attribIndex];
881
882 *formatOut = GetCurrentValueFormatID(defaultValue.Type);
883
884 return contextVk->onVertexAttributeChange(attribIndex, 0, 0, *formatOut, false, 0, nullptr);
885 }
886
updateActiveAttribInfo(ContextVk * contextVk)887 angle::Result VertexArrayVk::updateActiveAttribInfo(ContextVk *contextVk)
888 {
889 const std::vector<gl::VertexAttribute> &attribs = mState.getVertexAttributes();
890 const std::vector<gl::VertexBinding> &bindings = mState.getVertexBindings();
891
892 // Update pipeline cache with current active attribute info
893 for (size_t attribIndex : mState.getEnabledAttributesMask())
894 {
895 const gl::VertexAttribute &attrib = attribs[attribIndex];
896 const gl::VertexBinding &binding = bindings[attribs[attribIndex].bindingIndex];
897 const angle::FormatID format = attrib.format->id;
898
899 ANGLE_TRY(contextVk->onVertexAttributeChange(
900 attribIndex, mCurrentArrayBufferStrides[attribIndex], binding.getDivisor(), format,
901 mCurrentArrayBufferCompressed.test(attribIndex),
902 mCurrentArrayBufferRelativeOffsets[attribIndex], mCurrentArrayBuffers[attribIndex]));
903
904 mCurrentArrayBufferFormats[attribIndex] = format;
905 }
906
907 return angle::Result::Continue;
908 }
909
syncDirtyAttrib(ContextVk * contextVk,const gl::VertexAttribute & attrib,const gl::VertexBinding & binding,size_t attribIndex,bool bufferOnly)910 angle::Result VertexArrayVk::syncDirtyAttrib(ContextVk *contextVk,
911 const gl::VertexAttribute &attrib,
912 const gl::VertexBinding &binding,
913 size_t attribIndex,
914 bool bufferOnly)
915 {
916 vk::Renderer *renderer = contextVk->getRenderer();
917 if (attrib.enabled)
918 {
919 const vk::Format &vertexFormat = renderer->getFormat(attrib.format->id);
920
921 // Init attribute offset to the front-end value
922 mCurrentArrayBufferRelativeOffsets[attribIndex] = attrib.relativeOffset;
923 gl::Buffer *bufferGL = binding.getBuffer().get();
924 // Emulated and/or client-side attribs will be streamed
925 bool isStreamingVertexAttrib =
926 (binding.getDivisor() > renderer->getMaxVertexAttribDivisor()) || (bufferGL == nullptr);
927 // If we sre switching between streaming and buffer mode, set bufferOnly to false since we
928 // are actually changing the buffer.
929 if (bufferOnly && isStreamingVertexAttrib != mStreamingVertexAttribsMask.test(attribIndex))
930 {
931 bufferOnly = false;
932 }
933 mStreamingVertexAttribsMask.set(attribIndex, isStreamingVertexAttrib);
934 bool compressed = false;
935
936 if (bufferGL)
937 {
938 mContentsObservers->disableForBuffer(bufferGL, static_cast<uint32_t>(attribIndex));
939 }
940
941 if (!isStreamingVertexAttrib && bufferGL->getSize() > 0)
942 {
943 BufferVk *bufferVk = vk::GetImpl(bufferGL);
944 const angle::Format &srcFormat = vertexFormat.getIntendedFormat();
945 unsigned srcFormatSize = srcFormat.pixelBytes;
946 uint32_t srcStride = binding.getStride() == 0 ? srcFormatSize : binding.getStride();
947 size_t numVertices = GetVertexCount(bufferVk, binding, srcFormatSize);
948 bool bindingIsAligned =
949 BindingIsAligned(srcFormat, binding.getOffset() + attrib.relativeOffset, srcStride);
950
951 if (renderer->getFeatures().compressVertexData.enabled &&
952 gl::IsStaticBufferUsage(bufferGL->getUsage()) &&
953 vertexFormat.canCompressBufferData())
954 {
955 compressed = true;
956 }
957
958 bool needsConversion =
959 numVertices > 0 &&
960 (vertexFormat.getVertexLoadRequiresConversion(compressed) || !bindingIsAligned);
961
962 if (needsConversion)
963 {
964 const angle::Format &dstFormat = vertexFormat.getActualBufferFormat(compressed);
965 // Converted buffer is tightly packed
966 uint32_t dstStride = dstFormat.pixelBytes;
967
968 ASSERT(vertexFormat.getVertexInputAlignment(compressed) <=
969 vk::kVertexBufferAlignment);
970
971 mContentsObservers->enableForBuffer(bufferGL, static_cast<uint32_t>(attribIndex));
972
973 WarnOnVertexFormatConversion(contextVk, vertexFormat, compressed, true);
974
975 const VertexConversionBuffer::CacheKey cacheKey{
976 srcFormat.id, srcStride,
977 static_cast<size_t>(binding.getOffset()) + attrib.relativeOffset,
978 !bindingIsAligned, false};
979
980 VertexConversionBuffer *conversion =
981 bufferVk->getVertexConversionBuffer(renderer, cacheKey);
982
983 // Converted attribs are packed in their own VK buffer so offset is relative to the
984 // binding and coversion's offset. The conversion buffer try to reuse the existing
985 // buffer as much as possible to reduce the amount of data that has to be converted.
986 // When binding's offset changes, it will check if new offset and existing buffer's
987 // offset are multiple of strides apart. It yes it will reuse. If new offset is
988 // larger, all existing data are still valid. If the new offset is smaller it will
989 // mark the newly exposed range dirty and then rely on
990 // ContextVk::initBufferForVertexConversion to decide buffer's size is big enough or
991 // not and reallocate (and mark entire buffer dirty) if needed.
992 //
993 // bufferVk:-----------------------------------------------------------------------
994 // | |
995 // | bingding.offset + attrib.relativeOffset.
996 // conversion->getCacheKey().offset
997 //
998 // conversion.buffer: --------------------------------------------------------------
999 // |
1000 // dstRelativeOffset
1001 size_t srcRelativeOffset =
1002 binding.getOffset() + attrib.relativeOffset - conversion->getCacheKey().offset;
1003 size_t numberOfVerticesToSkip = srcRelativeOffset / srcStride;
1004 size_t dstRelativeOffset = numberOfVerticesToSkip * dstStride;
1005
1006 if (conversion->dirty())
1007 {
1008 if (compressed)
1009 {
1010 INFO() << "Compressing vertex data in buffer " << bufferGL->id().value
1011 << " from " << ToUnderlying(srcFormat.id) << " to "
1012 << ToUnderlying(dstFormat.id) << ".";
1013 }
1014
1015 if (bindingIsAligned)
1016 {
1017 ANGLE_TRY(convertVertexBufferGPU(contextVk, bufferVk, conversion, srcFormat,
1018 dstFormat));
1019 }
1020 else
1021 {
1022 ANGLE_VK_PERF_WARNING(
1023 contextVk, GL_DEBUG_SEVERITY_HIGH,
1024 "GPU stall due to vertex format conversion of unaligned data");
1025
1026 ANGLE_TRY(convertVertexBufferCPU(
1027 contextVk, bufferVk, conversion, srcFormat, dstFormat,
1028 vertexFormat.getVertexLoadFunction(compressed)));
1029 }
1030
1031 // If conversion happens, the destination buffer stride may be changed,
1032 // therefore an attribute change needs to be called. Note that it may trigger
1033 // unnecessary vulkan PSO update when the destination buffer stride does not
1034 // change, but for simplicity just make it conservative
1035 bufferOnly = false;
1036 }
1037
1038 vk::BufferHelper *bufferHelper = conversion->getBuffer();
1039 mCurrentArrayBuffers[attribIndex] = bufferHelper;
1040 mCurrentArrayBufferSerial[attribIndex] = bufferHelper->getBufferSerial();
1041 VkDeviceSize bufferOffset;
1042 mCurrentArrayBufferHandles[attribIndex] =
1043 bufferHelper
1044 ->getBufferForVertexArray(contextVk, bufferHelper->getSize(), &bufferOffset)
1045 .getHandle();
1046 ASSERT(BindingIsAligned(dstFormat, bufferOffset + dstRelativeOffset, dstStride));
1047 mCurrentArrayBufferOffsets[attribIndex] = bufferOffset + dstRelativeOffset;
1048 mCurrentArrayBufferRelativeOffsets[attribIndex] = 0;
1049 mCurrentArrayBufferStrides[attribIndex] = dstStride;
1050 }
1051 else
1052 {
1053 if (numVertices == 0)
1054 {
1055 vk::BufferHelper &emptyBuffer = contextVk->getEmptyBuffer();
1056
1057 mCurrentArrayBuffers[attribIndex] = &emptyBuffer;
1058 mCurrentArrayBufferSerial[attribIndex] = emptyBuffer.getBufferSerial();
1059 mCurrentArrayBufferHandles[attribIndex] = emptyBuffer.getBuffer().getHandle();
1060 mCurrentArrayBufferOffsets[attribIndex] = emptyBuffer.getOffset();
1061 mCurrentArrayBufferStrides[attribIndex] = 0;
1062 }
1063 else
1064 {
1065 vk::BufferHelper &bufferHelper = bufferVk->getBuffer();
1066 mCurrentArrayBuffers[attribIndex] = &bufferHelper;
1067 mCurrentArrayBufferSerial[attribIndex] = bufferHelper.getBufferSerial();
1068 VkDeviceSize bufferOffset;
1069 mCurrentArrayBufferHandles[attribIndex] =
1070 bufferHelper
1071 .getBufferForVertexArray(contextVk, bufferVk->getSize(), &bufferOffset)
1072 .getHandle();
1073
1074 // Vulkan requires the offset is within the buffer. We use robust access
1075 // behaviour to reset the offset if it starts outside the buffer.
1076 mCurrentArrayBufferOffsets[attribIndex] =
1077 binding.getOffset() < static_cast<GLint64>(bufferVk->getSize())
1078 ? binding.getOffset() + bufferOffset
1079 : bufferOffset;
1080
1081 mCurrentArrayBufferStrides[attribIndex] = binding.getStride();
1082 }
1083 }
1084 }
1085 else
1086 {
1087 vk::BufferHelper &emptyBuffer = contextVk->getEmptyBuffer();
1088 mCurrentArrayBuffers[attribIndex] = &emptyBuffer;
1089 mCurrentArrayBufferSerial[attribIndex] = emptyBuffer.getBufferSerial();
1090 mCurrentArrayBufferHandles[attribIndex] = emptyBuffer.getBuffer().getHandle();
1091 mCurrentArrayBufferOffsets[attribIndex] = emptyBuffer.getOffset();
1092
1093 bool combined = ShouldCombineAttributes(renderer, attrib, binding);
1094 mCurrentArrayBufferStrides[attribIndex] =
1095 combined ? binding.getStride()
1096 : vertexFormat.getActualBufferFormat(compressed).pixelBytes;
1097 }
1098
1099 if (bufferOnly)
1100 {
1101 ANGLE_TRY(contextVk->onVertexBufferChange(mCurrentArrayBuffers[attribIndex]));
1102 }
1103 else
1104 {
1105 const angle::FormatID format = attrib.format->id;
1106 ANGLE_TRY(contextVk->onVertexAttributeChange(
1107 attribIndex, mCurrentArrayBufferStrides[attribIndex], binding.getDivisor(), format,
1108 compressed, mCurrentArrayBufferRelativeOffsets[attribIndex],
1109 mCurrentArrayBuffers[attribIndex]));
1110
1111 mCurrentArrayBufferFormats[attribIndex] = format;
1112 mCurrentArrayBufferCompressed[attribIndex] = compressed;
1113 mCurrentArrayBufferDivisors[attribIndex] = binding.getDivisor();
1114 }
1115 }
1116 else
1117 {
1118 contextVk->invalidateDefaultAttribute(attribIndex);
1119
1120 // These will be filled out by the ContextVk.
1121 vk::BufferHelper &emptyBuffer = contextVk->getEmptyBuffer();
1122 mCurrentArrayBuffers[attribIndex] = &emptyBuffer;
1123 mCurrentArrayBufferSerial[attribIndex] = emptyBuffer.getBufferSerial();
1124 mCurrentArrayBufferHandles[attribIndex] = emptyBuffer.getBuffer().getHandle();
1125 mCurrentArrayBufferOffsets[attribIndex] = emptyBuffer.getOffset();
1126 mCurrentArrayBufferStrides[attribIndex] = 0;
1127 mCurrentArrayBufferDivisors[attribIndex] = 0;
1128 mCurrentArrayBufferCompressed[attribIndex] = false;
1129 mCurrentArrayBufferRelativeOffsets[attribIndex] = 0;
1130
1131 ANGLE_TRY(setDefaultPackedInput(contextVk, attribIndex,
1132 &mCurrentArrayBufferFormats[attribIndex]));
1133 }
1134
1135 return angle::Result::Continue;
1136 }
1137
mergeClientAttribsRange(vk::Renderer * renderer,const gl::AttributesMask activeStreamedAttribs,size_t startVertex,size_t endVertex,std::array<AttributeRange,gl::MAX_VERTEX_ATTRIBS> & mergeRangesOut,std::array<size_t,gl::MAX_VERTEX_ATTRIBS> & mergedIndexesOut) const1138 gl::AttributesMask VertexArrayVk::mergeClientAttribsRange(
1139 vk::Renderer *renderer,
1140 const gl::AttributesMask activeStreamedAttribs,
1141 size_t startVertex,
1142 size_t endVertex,
1143 std::array<AttributeRange, gl::MAX_VERTEX_ATTRIBS> &mergeRangesOut,
1144 std::array<size_t, gl::MAX_VERTEX_ATTRIBS> &mergedIndexesOut) const
1145 {
1146 const std::vector<gl::VertexAttribute> &attribs = mState.getVertexAttributes();
1147 const std::vector<gl::VertexBinding> &bindings = mState.getVertexBindings();
1148 gl::AttributesMask attributeMaskCanCombine;
1149 angle::FixedVector<size_t, gl::MAX_VERTEX_ATTRIBS> combinedIndexes;
1150 for (size_t attribIndex : activeStreamedAttribs)
1151 {
1152 const gl::VertexAttribute &attrib = attribs[attribIndex];
1153 ASSERT(attrib.enabled);
1154 const gl::VertexBinding &binding = bindings[attrib.bindingIndex];
1155 const vk::Format &vertexFormat = renderer->getFormat(attrib.format->id);
1156 bool combined = ShouldCombineAttributes(renderer, attrib, binding);
1157 attributeMaskCanCombine.set(attribIndex, combined);
1158 if (combined)
1159 {
1160 combinedIndexes.push_back(attribIndex);
1161 }
1162 GLuint pixelBytes = vertexFormat.getActualBufferFormat(false).pixelBytes;
1163 size_t destStride = combined ? binding.getStride() : pixelBytes;
1164 uintptr_t startAddress = reinterpret_cast<uintptr_t>(attrib.pointer);
1165 mergeRangesOut[attribIndex].startAddr = startAddress;
1166 mergeRangesOut[attribIndex].endAddr =
1167 startAddress + (endVertex - 1) * destStride + pixelBytes;
1168 mergeRangesOut[attribIndex].copyStartAddr =
1169 startAddress + startVertex * binding.getStride();
1170 mergedIndexesOut[attribIndex] = attribIndex;
1171 }
1172 if (attributeMaskCanCombine.none())
1173 {
1174 return attributeMaskCanCombine;
1175 }
1176 auto comp = [&mergeRangesOut](size_t a, size_t b) -> bool {
1177 return mergeRangesOut[a] < mergeRangesOut[b];
1178 };
1179 // Only sort combined range indexes.
1180 std::sort(combinedIndexes.begin(), combinedIndexes.end(), comp);
1181 // Merge combined range span.
1182 auto next = combinedIndexes.begin();
1183 auto cur = next++;
1184 while (next != combinedIndexes.end() || (cur != next))
1185 {
1186 // Cur and next overlaps: merge next into cur and move next.
1187 if (next != combinedIndexes.end() &&
1188 mergeRangesOut[*cur].endAddr >= mergeRangesOut[*next].startAddr)
1189 {
1190 mergeRangesOut[*cur].endAddr =
1191 std::max(mergeRangesOut[*cur].endAddr, mergeRangesOut[*next].endAddr);
1192 mergeRangesOut[*cur].copyStartAddr =
1193 std::min(mergeRangesOut[*cur].copyStartAddr, mergeRangesOut[*next].copyStartAddr);
1194 mergedIndexesOut[*next] = mergedIndexesOut[*cur];
1195 ++next;
1196 }
1197 else
1198 {
1199 ++cur;
1200 if (cur != next)
1201 {
1202 mergeRangesOut[*cur] = mergeRangesOut[*(cur - 1)];
1203 }
1204 else if (next != combinedIndexes.end())
1205 {
1206 ++next;
1207 }
1208 }
1209 }
1210 return attributeMaskCanCombine;
1211 }
1212
1213 // Handle copying client attribs and/or expanding attrib buffer in case where attribute
1214 // divisor value has to be emulated.
updateStreamedAttribs(const gl::Context * context,GLint firstVertex,GLsizei vertexOrIndexCount,GLsizei instanceCount,gl::DrawElementsType indexTypeOrInvalid,const void * indices)1215 angle::Result VertexArrayVk::updateStreamedAttribs(const gl::Context *context,
1216 GLint firstVertex,
1217 GLsizei vertexOrIndexCount,
1218 GLsizei instanceCount,
1219 gl::DrawElementsType indexTypeOrInvalid,
1220 const void *indices)
1221 {
1222 ContextVk *contextVk = vk::GetImpl(context);
1223 vk::Renderer *renderer = contextVk->getRenderer();
1224
1225 const gl::AttributesMask activeAttribs =
1226 context->getStateCache().getActiveClientAttribsMask() |
1227 context->getStateCache().getActiveBufferedAttribsMask();
1228 const gl::AttributesMask activeStreamedAttribs = mStreamingVertexAttribsMask & activeAttribs;
1229
1230 // Early return for corner case where emulated buffered attribs are not active
1231 if (!activeStreamedAttribs.any())
1232 {
1233 return angle::Result::Continue;
1234 }
1235
1236 GLint startVertex;
1237 size_t vertexCount;
1238 ANGLE_TRY(GetVertexRangeInfo(context, firstVertex, vertexOrIndexCount, indexTypeOrInvalid,
1239 indices, 0, &startVertex, &vertexCount));
1240
1241 ASSERT(vertexCount > 0);
1242 const auto &attribs = mState.getVertexAttributes();
1243 const auto &bindings = mState.getVertexBindings();
1244
1245 std::array<size_t, gl::MAX_VERTEX_ATTRIBS> mergedIndexes;
1246 std::array<AttributeRange, gl::MAX_VERTEX_ATTRIBS> mergeRanges;
1247 std::array<vk::BufferHelper *, gl::MAX_VERTEX_ATTRIBS> attribBufferHelper = {};
1248 auto mergeAttribMask =
1249 mergeClientAttribsRange(renderer, activeStreamedAttribs, startVertex,
1250 startVertex + vertexCount, mergeRanges, mergedIndexes);
1251
1252 for (size_t attribIndex : activeStreamedAttribs)
1253 {
1254 const gl::VertexAttribute &attrib = attribs[attribIndex];
1255 ASSERT(attrib.enabled);
1256 const gl::VertexBinding &binding = bindings[attrib.bindingIndex];
1257
1258 const vk::Format &vertexFormat = renderer->getFormat(attrib.format->id);
1259 const angle::Format &dstFormat = vertexFormat.getActualBufferFormat(false);
1260 GLuint pixelBytes = dstFormat.pixelBytes;
1261
1262 const bool compressed = false;
1263 ASSERT(vertexFormat.getVertexInputAlignment(false) <= vk::kVertexBufferAlignment);
1264
1265 vk::BufferHelper *vertexDataBuffer = nullptr;
1266 const uint8_t *src = static_cast<const uint8_t *>(attrib.pointer);
1267 const uint32_t divisor = binding.getDivisor();
1268
1269 bool combined = mergeAttribMask.test(attribIndex);
1270 GLuint stride = combined ? binding.getStride() : pixelBytes;
1271 VkDeviceSize startOffset = 0;
1272 if (divisor > 0)
1273 {
1274 // Instanced attrib
1275 if (divisor > renderer->getMaxVertexAttribDivisor())
1276 {
1277 // Divisor will be set to 1 & so update buffer to have 1 attrib per instance
1278 size_t bytesToAllocate = instanceCount * stride;
1279
1280 // Allocate buffer for results
1281 ANGLE_TRY(contextVk->allocateStreamedVertexBuffer(attribIndex, bytesToAllocate,
1282 &vertexDataBuffer));
1283
1284 gl::Buffer *bufferGL = binding.getBuffer().get();
1285 if (bufferGL != nullptr)
1286 {
1287 // Only do the data copy if src buffer is valid.
1288 if (bufferGL->getSize() > 0)
1289 {
1290 // Map buffer to expand attribs for divisor emulation
1291 BufferVk *bufferVk = vk::GetImpl(binding.getBuffer().get());
1292 void *buffSrc = nullptr;
1293 ANGLE_TRY(bufferVk->mapImpl(contextVk, GL_MAP_READ_BIT, &buffSrc));
1294 src = reinterpret_cast<const uint8_t *>(buffSrc) + binding.getOffset();
1295
1296 uint32_t srcAttributeSize =
1297 static_cast<uint32_t>(ComputeVertexAttributeTypeSize(attrib));
1298
1299 size_t numVertices = GetVertexCount(bufferVk, binding, srcAttributeSize);
1300
1301 ANGLE_TRY(StreamVertexDataWithDivisor(
1302 contextVk, vertexDataBuffer, src, bytesToAllocate, binding.getStride(),
1303 stride, vertexFormat.getVertexLoadFunction(compressed), divisor,
1304 numVertices));
1305
1306 ANGLE_TRY(bufferVk->unmapImpl(contextVk));
1307 }
1308 else if (contextVk->getExtensions().robustnessAny())
1309 {
1310 // Satisfy robustness constraints (only if extension enabled)
1311 uint8_t *dst = vertexDataBuffer->getMappedMemory();
1312 memset(dst, 0, bytesToAllocate);
1313 }
1314 }
1315 else
1316 {
1317 size_t numVertices = instanceCount;
1318 ANGLE_TRY(StreamVertexDataWithDivisor(
1319 contextVk, vertexDataBuffer, src, bytesToAllocate, binding.getStride(),
1320 stride, vertexFormat.getVertexLoadFunction(compressed), divisor,
1321 numVertices));
1322 }
1323 }
1324 else
1325 {
1326 ASSERT(binding.getBuffer().get() == nullptr);
1327 size_t count = UnsignedCeilDivide(instanceCount, divisor);
1328 size_t bytesToAllocate = count * stride;
1329
1330 // Allocate buffer for results
1331 ANGLE_TRY(contextVk->allocateStreamedVertexBuffer(attribIndex, bytesToAllocate,
1332 &vertexDataBuffer));
1333
1334 ANGLE_TRY(StreamVertexData(contextVk, vertexDataBuffer, src, bytesToAllocate, 0,
1335 count, binding.getStride(),
1336 vertexFormat.getVertexLoadFunction(compressed)));
1337 }
1338 }
1339 else
1340 {
1341 ASSERT(binding.getBuffer().get() == nullptr);
1342 size_t mergedAttribIdx = mergedIndexes[attribIndex];
1343 const AttributeRange &range = mergeRanges[attribIndex];
1344 if (attribBufferHelper[mergedAttribIdx] == nullptr)
1345 {
1346 size_t destOffset =
1347 combined ? range.copyStartAddr - range.startAddr : startVertex * stride;
1348 size_t bytesToAllocate = range.endAddr - range.startAddr;
1349 ANGLE_TRY(contextVk->allocateStreamedVertexBuffer(
1350 mergedAttribIdx, bytesToAllocate, &attribBufferHelper[mergedAttribIdx]));
1351 ANGLE_TRY(StreamVertexData(
1352 contextVk, attribBufferHelper[mergedAttribIdx],
1353 (const uint8_t *)range.copyStartAddr, bytesToAllocate - destOffset, destOffset,
1354 vertexCount, binding.getStride(),
1355 combined ? nullptr : vertexFormat.getVertexLoadFunction(compressed)));
1356 }
1357 vertexDataBuffer = attribBufferHelper[mergedAttribIdx];
1358 startOffset = combined ? (uintptr_t)attrib.pointer - range.startAddr : 0;
1359 }
1360 ASSERT(vertexDataBuffer != nullptr);
1361 mCurrentArrayBuffers[attribIndex] = vertexDataBuffer;
1362 mCurrentArrayBufferSerial[attribIndex] = vertexDataBuffer->getBufferSerial();
1363 VkDeviceSize bufferOffset;
1364 mCurrentArrayBufferHandles[attribIndex] =
1365 vertexDataBuffer
1366 ->getBufferForVertexArray(contextVk, vertexDataBuffer->getSize(), &bufferOffset)
1367 .getHandle();
1368 mCurrentArrayBufferOffsets[attribIndex] = bufferOffset + startOffset;
1369 mCurrentArrayBufferStrides[attribIndex] = stride;
1370 mCurrentArrayBufferDivisors[attribIndex] = divisor;
1371 ASSERT(BindingIsAligned(dstFormat, mCurrentArrayBufferOffsets[attribIndex],
1372 mCurrentArrayBufferStrides[attribIndex]));
1373 }
1374
1375 return angle::Result::Continue;
1376 }
1377
handleLineLoop(ContextVk * contextVk,GLint firstVertex,GLsizei vertexOrIndexCount,gl::DrawElementsType indexTypeOrInvalid,const void * indices,vk::BufferHelper ** indexBufferOut,uint32_t * indexCountOut)1378 angle::Result VertexArrayVk::handleLineLoop(ContextVk *contextVk,
1379 GLint firstVertex,
1380 GLsizei vertexOrIndexCount,
1381 gl::DrawElementsType indexTypeOrInvalid,
1382 const void *indices,
1383 vk::BufferHelper **indexBufferOut,
1384 uint32_t *indexCountOut)
1385 {
1386 if (indexTypeOrInvalid != gl::DrawElementsType::InvalidEnum)
1387 {
1388 // Handle GL_LINE_LOOP drawElements.
1389 if (mDirtyLineLoopTranslation)
1390 {
1391 gl::Buffer *elementArrayBuffer = mState.getElementArrayBuffer();
1392
1393 if (!elementArrayBuffer)
1394 {
1395 ANGLE_TRY(mLineLoopHelper.streamIndices(
1396 contextVk, indexTypeOrInvalid, vertexOrIndexCount,
1397 reinterpret_cast<const uint8_t *>(indices), indexBufferOut, indexCountOut));
1398 }
1399 else
1400 {
1401 // When using an element array buffer, 'indices' is an offset to the first element.
1402 intptr_t offset = reinterpret_cast<intptr_t>(indices);
1403 BufferVk *elementArrayBufferVk = vk::GetImpl(elementArrayBuffer);
1404 ANGLE_TRY(mLineLoopHelper.getIndexBufferForElementArrayBuffer(
1405 contextVk, elementArrayBufferVk, indexTypeOrInvalid, vertexOrIndexCount, offset,
1406 indexBufferOut, indexCountOut));
1407 }
1408 }
1409
1410 // If we've had a drawArrays call with a line loop before, we want to make sure this is
1411 // invalidated the next time drawArrays is called since we use the same index buffer for
1412 // both calls.
1413 mLineLoopBufferFirstIndex.reset();
1414 mLineLoopBufferLastIndex.reset();
1415 return angle::Result::Continue;
1416 }
1417
1418 // Note: Vertex indexes can be arbitrarily large.
1419 uint32_t clampedVertexCount = gl::clampCast<uint32_t>(vertexOrIndexCount);
1420
1421 // Handle GL_LINE_LOOP drawArrays.
1422 size_t lastVertex = static_cast<size_t>(firstVertex + clampedVertexCount);
1423 if (!mLineLoopBufferFirstIndex.valid() || !mLineLoopBufferLastIndex.valid() ||
1424 mLineLoopBufferFirstIndex != firstVertex || mLineLoopBufferLastIndex != lastVertex)
1425 {
1426 ANGLE_TRY(mLineLoopHelper.getIndexBufferForDrawArrays(contextVk, clampedVertexCount,
1427 firstVertex, indexBufferOut));
1428
1429 mLineLoopBufferFirstIndex = firstVertex;
1430 mLineLoopBufferLastIndex = lastVertex;
1431 }
1432 else
1433 {
1434 *indexBufferOut = mLineLoopHelper.getCurrentIndexBuffer();
1435 }
1436 *indexCountOut = vertexOrIndexCount + 1;
1437
1438 return angle::Result::Continue;
1439 }
1440
updateDefaultAttrib(ContextVk * contextVk,size_t attribIndex)1441 angle::Result VertexArrayVk::updateDefaultAttrib(ContextVk *contextVk, size_t attribIndex)
1442 {
1443 if (!mState.getEnabledAttributesMask().test(attribIndex))
1444 {
1445 vk::BufferHelper *bufferHelper;
1446 ANGLE_TRY(
1447 contextVk->allocateStreamedVertexBuffer(attribIndex, kDefaultValueSize, &bufferHelper));
1448
1449 const gl::VertexAttribCurrentValueData &defaultValue =
1450 contextVk->getState().getVertexAttribCurrentValues()[attribIndex];
1451 uint8_t *ptr = bufferHelper->getMappedMemory();
1452 memcpy(ptr, &defaultValue.Values, kDefaultValueSize);
1453 ANGLE_TRY(bufferHelper->flush(contextVk->getRenderer()));
1454
1455 VkDeviceSize bufferOffset;
1456 mCurrentArrayBufferHandles[attribIndex] =
1457 bufferHelper->getBufferForVertexArray(contextVk, kDefaultValueSize, &bufferOffset)
1458 .getHandle();
1459 mCurrentArrayBufferOffsets[attribIndex] = bufferOffset;
1460 mCurrentArrayBuffers[attribIndex] = bufferHelper;
1461 mCurrentArrayBufferSerial[attribIndex] = bufferHelper->getBufferSerial();
1462 mCurrentArrayBufferStrides[attribIndex] = 0;
1463 mCurrentArrayBufferDivisors[attribIndex] = 0;
1464
1465 ANGLE_TRY(setDefaultPackedInput(contextVk, attribIndex,
1466 &mCurrentArrayBufferFormats[attribIndex]));
1467 }
1468
1469 return angle::Result::Continue;
1470 }
1471 } // namespace rx
1472