1 //
2 // Copyright 2018 The ANGLE Project Authors. All rights reserved.
3 // Use of this source code is governed by a BSD-style license that can be
4 // found in the LICENSE file.
5 //
6 // vk_cache_utils.h:
7 // Contains the classes for the Pipeline State Object cache as well as the RenderPass cache.
8 // Also contains the structures for the packed descriptions for the RenderPass and Pipeline.
9 //
10
11 #ifndef LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
12 #define LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
13
14 #include <deque>
15
16 #include "common/Color.h"
17 #include "common/FixedVector.h"
18 #include "common/SimpleMutex.h"
19 #include "common/WorkerThread.h"
20 #include "libANGLE/Uniform.h"
21 #include "libANGLE/renderer/vulkan/ShaderInterfaceVariableInfoMap.h"
22 #include "libANGLE/renderer/vulkan/vk_resource.h"
23 #include "libANGLE/renderer/vulkan/vk_utils.h"
24
25 namespace gl
26 {
27 class ProgramExecutable;
28 } // namespace gl
29
30 namespace rx
31 {
32 class ShaderInterfaceVariableInfoMap;
33 class UpdateDescriptorSetsBuilder;
34
35 // Some descriptor set and pipeline layout constants.
36 //
37 // The set/binding assignment is done as following:
38 //
39 // - Set 0 contains uniform blocks created to encompass default uniforms. 1 binding is used per
40 // pipeline stage. Additionally, transform feedback buffers are bound from binding 2 and up.
41 // For internal shaders, set 0 is used for all the needed resources.
42 // - Set 1 contains all textures (including texture buffers).
43 // - Set 2 contains all other shader resources, such as uniform and storage blocks, atomic counter
44 // buffers, images and image buffers.
45 // - Set 3 reserved for OpenCL
46
47 enum class DescriptorSetIndex : uint32_t
48 {
49 Internal = 0, // Internal shaders
50 UniformsAndXfb = Internal, // Uniforms set index
51 Texture = 1, // Textures set index
52 ShaderResource = 2, // Other shader resources set index
53
54 // CL specific naming for set indices
55 LiteralSampler = 0,
56 KernelArguments = 1,
57 ModuleConstants = 2,
58 Printf = 3,
59
60 InvalidEnum = 4,
61 EnumCount = InvalidEnum,
62 };
63
64 namespace vk
65 {
66 class Context;
67 class BufferHelper;
68 class DynamicDescriptorPool;
69 class SamplerHelper;
70 enum class ImageLayout;
71 class PipelineCacheAccess;
72 class RenderPassCommandBufferHelper;
73 class PackedClearValuesArray;
74 class AttachmentOpsArray;
75
76 using PipelineLayoutPtr = AtomicSharedPtr<PipelineLayout>;
77 using DescriptorSetLayoutPtr = AtomicSharedPtr<DescriptorSetLayout>;
78
79 // Packed Vk resource descriptions.
80 // Most Vk types use many more bits than required to represent the underlying data.
81 // Since ANGLE wants to cache things like RenderPasses and Pipeline State Objects using
82 // hashing (and also needs to check equality) we can optimize these operations by
83 // using fewer bits. Hence the packed types.
84 //
85 // One implementation note: these types could potentially be improved by using even
86 // fewer bits. For example, boolean values could be represented by a single bit instead
87 // of a uint8_t. However at the current time there are concerns about the portability
88 // of bitfield operators, and complexity issues with using bit mask operations. This is
89 // something we will likely want to investigate as the Vulkan implementation progresses.
90 //
91 // Second implementation note: the struct packing is also a bit fragile, and some of the
92 // packing requirements depend on using alignas and field ordering to get the result of
93 // packing nicely into the desired space. This is something we could also potentially fix
94 // with a redesign to use bitfields or bit mask operations.
95
96 // Enable struct padding warnings for the code below since it is used in caches.
97 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
98
99 enum class ResourceAccess
100 {
101 Unused = 0x0,
102 ReadOnly = 0x1,
103 WriteOnly = 0x2,
104 ReadWrite = ReadOnly | WriteOnly,
105 };
106
UpdateAccess(ResourceAccess * oldAccess,ResourceAccess newAccess)107 inline void UpdateAccess(ResourceAccess *oldAccess, ResourceAccess newAccess)
108 {
109 *oldAccess = static_cast<ResourceAccess>(ToUnderlying(newAccess) | ToUnderlying(*oldAccess));
110 }
HasResourceWriteAccess(ResourceAccess access)111 inline bool HasResourceWriteAccess(ResourceAccess access)
112 {
113 return (ToUnderlying(access) & ToUnderlying(ResourceAccess::WriteOnly)) != 0;
114 }
115
116 enum class RenderPassLoadOp
117 {
118 Load = VK_ATTACHMENT_LOAD_OP_LOAD,
119 Clear = VK_ATTACHMENT_LOAD_OP_CLEAR,
120 DontCare = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
121 None,
122 };
123 enum class RenderPassStoreOp
124 {
125 Store = VK_ATTACHMENT_STORE_OP_STORE,
126 DontCare = VK_ATTACHMENT_STORE_OP_DONT_CARE,
127 None,
128 };
129
130 enum class FramebufferFetchMode
131 {
132 None,
133 Color,
134 DepthStencil,
135 ColorAndDepthStencil,
136 };
137 FramebufferFetchMode GetProgramFramebufferFetchMode(const gl::ProgramExecutable *executable);
FramebufferFetchModeHasColor(FramebufferFetchMode framebufferFetchMode)138 ANGLE_INLINE bool FramebufferFetchModeHasColor(FramebufferFetchMode framebufferFetchMode)
139 {
140 static_assert(ToUnderlying(FramebufferFetchMode::Color) == 0x1);
141 static_assert(ToUnderlying(FramebufferFetchMode::ColorAndDepthStencil) == 0x3);
142 return (ToUnderlying(framebufferFetchMode) & 0x1) != 0;
143 }
FramebufferFetchModeHasDepthStencil(FramebufferFetchMode framebufferFetchMode)144 ANGLE_INLINE bool FramebufferFetchModeHasDepthStencil(FramebufferFetchMode framebufferFetchMode)
145 {
146 static_assert(ToUnderlying(FramebufferFetchMode::DepthStencil) == 0x2);
147 static_assert(ToUnderlying(FramebufferFetchMode::ColorAndDepthStencil) == 0x3);
148 return (ToUnderlying(framebufferFetchMode) & 0x2) != 0;
149 }
FramebufferFetchModeMerge(FramebufferFetchMode mode1,FramebufferFetchMode mode2)150 ANGLE_INLINE FramebufferFetchMode FramebufferFetchModeMerge(FramebufferFetchMode mode1,
151 FramebufferFetchMode mode2)
152 {
153 constexpr uint32_t kNone = ToUnderlying(FramebufferFetchMode::None);
154 constexpr uint32_t kColor = ToUnderlying(FramebufferFetchMode::Color);
155 constexpr uint32_t kDepthStencil = ToUnderlying(FramebufferFetchMode::DepthStencil);
156 constexpr uint32_t kColorAndDepthStencil =
157 ToUnderlying(FramebufferFetchMode::ColorAndDepthStencil);
158 static_assert(kNone == 0);
159 static_assert((kColor & kColorAndDepthStencil) == kColor);
160 static_assert((kDepthStencil & kColorAndDepthStencil) == kDepthStencil);
161 static_assert((kColor | kDepthStencil) == kColorAndDepthStencil);
162
163 return static_cast<FramebufferFetchMode>(ToUnderlying(mode1) | ToUnderlying(mode2));
164 }
165
166 // There can be a maximum of IMPLEMENTATION_MAX_DRAW_BUFFERS color and resolve attachments, plus -
167 // - one depth/stencil attachment
168 // - one depth/stencil resolve attachment
169 // - one fragment shading rate attachment
170 constexpr size_t kMaxFramebufferAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS * 2 + 3;
171 template <typename T>
172 using FramebufferAttachmentArray = std::array<T, kMaxFramebufferAttachments>;
173 template <typename T>
174 using FramebufferAttachmentsVector = angle::FixedVector<T, kMaxFramebufferAttachments>;
175 using FramebufferAttachmentMask = angle::BitSet<kMaxFramebufferAttachments>;
176
177 constexpr size_t kMaxFramebufferNonResolveAttachments = gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1;
178 template <typename T>
179 using FramebufferNonResolveAttachmentArray = std::array<T, kMaxFramebufferNonResolveAttachments>;
180 using FramebufferNonResolveAttachmentMask = angle::BitSet16<kMaxFramebufferNonResolveAttachments>;
181
182 class PackedAttachmentIndex;
183
184 class alignas(4) RenderPassDesc final
185 {
186 public:
187 RenderPassDesc();
188 ~RenderPassDesc();
189 RenderPassDesc(const RenderPassDesc &other);
190 RenderPassDesc &operator=(const RenderPassDesc &other);
191
192 // Set format for an enabled GL color attachment.
193 void packColorAttachment(size_t colorIndexGL, angle::FormatID formatID);
194 // Mark a GL color attachment index as disabled.
195 void packColorAttachmentGap(size_t colorIndexGL);
196 // The caller must pack the depth/stencil attachment last, which is packed right after the color
197 // attachments (including gaps), i.e. with an index starting from |colorAttachmentRange()|.
198 void packDepthStencilAttachment(angle::FormatID angleFormatID);
199 void updateDepthStencilAccess(ResourceAccess access);
200 // Indicate that a color attachment should have a corresponding resolve attachment.
201 void packColorResolveAttachment(size_t colorIndexGL);
202 // Indicate that a YUV texture is attached to the resolve attachment.
203 void packYUVResolveAttachment(size_t colorIndexGL);
204 // Remove the resolve attachment. Used when optimizing blit through resolve attachment to
205 // temporarily pack a resolve attachment and then remove it.
206 void removeColorResolveAttachment(size_t colorIndexGL);
207 // Indicate that a color attachment should take its data from the resolve attachment initially.
208 void packColorUnresolveAttachment(size_t colorIndexGL);
209 void removeColorUnresolveAttachment(size_t colorIndexGL);
210 // Indicate that a depth/stencil attachment should have a corresponding resolve attachment.
211 void packDepthResolveAttachment();
212 void packStencilResolveAttachment();
213 // Indicate that a depth/stencil attachment should take its data from the resolve attachment
214 // initially.
215 void packDepthUnresolveAttachment();
216 void packStencilUnresolveAttachment();
217 void removeDepthStencilUnresolveAttachment();
218
219 PackedAttachmentIndex getPackedColorAttachmentIndex(size_t colorIndexGL);
220
221 void setWriteControlMode(gl::SrgbWriteControlMode mode);
222
223 size_t hash() const;
224
225 // Color attachments are in [0, colorAttachmentRange()), with possible gaps.
colorAttachmentRange()226 size_t colorAttachmentRange() const { return mColorAttachmentRange; }
depthStencilAttachmentIndex()227 size_t depthStencilAttachmentIndex() const { return colorAttachmentRange(); }
228
229 bool isColorAttachmentEnabled(size_t colorIndexGL) const;
hasYUVResolveAttachment()230 bool hasYUVResolveAttachment() const { return mIsYUVResolve; }
231 bool hasDepthStencilAttachment() const;
getColorResolveAttachmentMask()232 gl::DrawBufferMask getColorResolveAttachmentMask() const { return mColorResolveAttachmentMask; }
hasColorResolveAttachment(size_t colorIndexGL)233 bool hasColorResolveAttachment(size_t colorIndexGL) const
234 {
235 return mColorResolveAttachmentMask.test(colorIndexGL);
236 }
getColorUnresolveAttachmentMask()237 gl::DrawBufferMask getColorUnresolveAttachmentMask() const
238 {
239 return mColorUnresolveAttachmentMask;
240 }
hasColorUnresolveAttachment(size_t colorIndexGL)241 bool hasColorUnresolveAttachment(size_t colorIndexGL) const
242 {
243 return mColorUnresolveAttachmentMask.test(colorIndexGL);
244 }
hasDepthStencilResolveAttachment()245 bool hasDepthStencilResolveAttachment() const { return mResolveDepth || mResolveStencil; }
hasDepthResolveAttachment()246 bool hasDepthResolveAttachment() const { return mResolveDepth; }
hasStencilResolveAttachment()247 bool hasStencilResolveAttachment() const { return mResolveStencil; }
hasDepthStencilUnresolveAttachment()248 bool hasDepthStencilUnresolveAttachment() const { return mUnresolveDepth || mUnresolveStencil; }
hasDepthUnresolveAttachment()249 bool hasDepthUnresolveAttachment() const { return mUnresolveDepth; }
hasStencilUnresolveAttachment()250 bool hasStencilUnresolveAttachment() const { return mUnresolveStencil; }
getSRGBWriteControlMode()251 gl::SrgbWriteControlMode getSRGBWriteControlMode() const
252 {
253 return static_cast<gl::SrgbWriteControlMode>(mSrgbWriteControl);
254 }
255
isLegacyDitherEnabled()256 bool isLegacyDitherEnabled() const { return mLegacyDitherEnabled; }
257
258 void setLegacyDither(bool enabled);
259
260 // Get the number of clearable attachments in the Vulkan render pass, i.e. after removing
261 // disabled color attachments.
262 size_t clearableAttachmentCount() const;
263 // Get the total number of attachments in the Vulkan render pass, i.e. after removing disabled
264 // color attachments.
265 size_t attachmentCount() const;
266
setSamples(GLint samples)267 void setSamples(GLint samples) { mSamples = static_cast<uint8_t>(samples); }
samples()268 uint8_t samples() const { return mSamples; }
269
setViewCount(GLsizei viewCount)270 void setViewCount(GLsizei viewCount) { mViewCount = static_cast<uint8_t>(viewCount); }
viewCount()271 uint8_t viewCount() const { return mViewCount; }
272
setFramebufferFetchMode(FramebufferFetchMode framebufferFetchMode)273 void setFramebufferFetchMode(FramebufferFetchMode framebufferFetchMode)
274 {
275 SetBitField(mFramebufferFetchMode, framebufferFetchMode);
276 }
framebufferFetchMode()277 FramebufferFetchMode framebufferFetchMode() const
278 {
279 return static_cast<FramebufferFetchMode>(mFramebufferFetchMode);
280 }
hasColorFramebufferFetch()281 bool hasColorFramebufferFetch() const
282 {
283 return FramebufferFetchModeHasColor(framebufferFetchMode());
284 }
hasDepthStencilFramebufferFetch()285 bool hasDepthStencilFramebufferFetch() const
286 {
287 return FramebufferFetchModeHasDepthStencil(framebufferFetchMode());
288 }
289
updateRenderToTexture(bool isRenderToTexture)290 void updateRenderToTexture(bool isRenderToTexture) { mIsRenderToTexture = isRenderToTexture; }
isRenderToTexture()291 bool isRenderToTexture() const { return mIsRenderToTexture; }
292
setFragmentShadingAttachment(bool value)293 void setFragmentShadingAttachment(bool value) { mHasFragmentShadingAttachment = value; }
hasFragmentShadingAttachment()294 bool hasFragmentShadingAttachment() const { return mHasFragmentShadingAttachment; }
295
296 angle::FormatID operator[](size_t index) const
297 {
298 ASSERT(index < gl::IMPLEMENTATION_MAX_DRAW_BUFFERS + 1);
299 return static_cast<angle::FormatID>(mAttachmentFormats[index]);
300 }
301
302 // Start a render pass with a render pass object.
303 void beginRenderPass(ErrorContext *context,
304 PrimaryCommandBuffer *primary,
305 const RenderPass &renderPass,
306 VkFramebuffer framebuffer,
307 const gl::Rectangle &renderArea,
308 VkSubpassContents subpassContents,
309 PackedClearValuesArray &clearValues,
310 const VkRenderPassAttachmentBeginInfo *attachmentBeginInfo) const;
311
312 // Start a render pass with dynamic rendering.
313 void beginRendering(ErrorContext *context,
314 PrimaryCommandBuffer *primary,
315 const gl::Rectangle &renderArea,
316 VkSubpassContents subpassContents,
317 const FramebufferAttachmentsVector<VkImageView> &attachmentViews,
318 const AttachmentOpsArray &ops,
319 PackedClearValuesArray &clearValues,
320 uint32_t layerCount) const;
321
322 void populateRenderingInheritanceInfo(
323 Renderer *renderer,
324 VkCommandBufferInheritanceRenderingInfo *infoOut,
325 gl::DrawBuffersArray<VkFormat> *colorFormatStorageOut) const;
326
327 // Calculate perf counters for a dynamic rendering render pass instance. For render pass
328 // objects, the perf counters are updated when creating the render pass, where access to
329 // ContextVk is available.
330 void updatePerfCounters(ErrorContext *context,
331 const FramebufferAttachmentsVector<VkImageView> &attachmentViews,
332 const AttachmentOpsArray &ops,
333 angle::VulkanPerfCounters *countersOut);
334
335 private:
336 uint8_t mSamples;
337 uint8_t mColorAttachmentRange;
338
339 // Multiview
340 uint8_t mViewCount;
341
342 // sRGB
343 uint8_t mSrgbWriteControl : 1;
344
345 // Framebuffer fetch, one of FramebufferFetchMode values
346 uint8_t mFramebufferFetchMode : 2;
347
348 // Depth/stencil resolve
349 uint8_t mResolveDepth : 1;
350 uint8_t mResolveStencil : 1;
351
352 // Multisampled render to texture
353 uint8_t mIsRenderToTexture : 1;
354 uint8_t mUnresolveDepth : 1;
355 uint8_t mUnresolveStencil : 1;
356
357 // Dithering state when using VK_EXT_legacy_dithering
358 uint8_t mLegacyDitherEnabled : 1;
359
360 // external_format_resolve
361 uint8_t mIsYUVResolve : 1;
362
363 // Foveated rendering
364 uint8_t mHasFragmentShadingAttachment : 1;
365
366 // Available space for expansion.
367 uint8_t mPadding2 : 5;
368
369 // Whether each color attachment has a corresponding resolve attachment. Color resolve
370 // attachments can be used to optimize resolve through glBlitFramebuffer() as well as support
371 // GL_EXT_multisampled_render_to_texture and GL_EXT_multisampled_render_to_texture2.
372 gl::DrawBufferMask mColorResolveAttachmentMask;
373
374 // Whether each color attachment with a corresponding resolve attachment should be initialized
375 // with said resolve attachment in an initial subpass. This is an optimization to avoid
376 // loadOp=LOAD on the implicit multisampled image used with multisampled-render-to-texture
377 // render targets. This operation is referred to as "unresolve".
378 //
379 // Unused when VK_EXT_multisampled_render_to_single_sampled is available.
380 gl::DrawBufferMask mColorUnresolveAttachmentMask;
381
382 // Color attachment formats are stored with their GL attachment indices. The depth/stencil
383 // attachment formats follow the last enabled color attachment. When creating a render pass,
384 // the disabled attachments are removed and the resulting attachments are packed.
385 //
386 // The attachment indices provided as input to various functions in this file are thus GL
387 // attachment indices. These indices are marked as such, e.g. colorIndexGL. The render pass
388 // (and corresponding framebuffer object) lists the packed attachments, with the corresponding
389 // indices marked with Vk, e.g. colorIndexVk. The subpass attachment references create the
390 // link between the two index spaces. The subpass declares attachment references with GL
391 // indices (which corresponds to the location decoration of shader outputs). The attachment
392 // references then contain the Vulkan indices or VK_ATTACHMENT_UNUSED.
393 //
394 // For example, if GL uses color attachments 0 and 3, then there are two render pass
395 // attachments (indexed 0 and 1) and 4 subpass attachments:
396 //
397 // - Subpass attachment 0 -> Renderpass attachment 0
398 // - Subpass attachment 1 -> VK_ATTACHMENT_UNUSED
399 // - Subpass attachment 2 -> VK_ATTACHMENT_UNUSED
400 // - Subpass attachment 3 -> Renderpass attachment 1
401 //
402 // The resolve attachments are packed after the non-resolve attachments. They use the same
403 // formats, so they are not specified in this array.
404 FramebufferNonResolveAttachmentArray<uint8_t> mAttachmentFormats;
405 };
406
407 bool operator==(const RenderPassDesc &lhs, const RenderPassDesc &rhs);
408
409 constexpr size_t kRenderPassDescSize = sizeof(RenderPassDesc);
410 static_assert(kRenderPassDescSize == 16, "Size check failed");
411
412 enum class GraphicsPipelineSubset
413 {
414 Complete, // Include all subsets
415 Shaders, // Include only the shader subsets, excluding vertex input and fragment output state.
416 };
417
418 enum class CacheLookUpFeedback
419 {
420 None,
421 Hit,
422 Miss,
423 LinkedDrawHit,
424 LinkedDrawMiss,
425 WarmUpHit,
426 WarmUpMiss,
427 UtilsHit,
428 UtilsMiss,
429 };
430
431 struct PackedAttachmentOpsDesc final
432 {
433 // RenderPassLoadOp is in range [0, 3], and RenderPassStoreOp is in range [0, 2].
434 uint16_t loadOp : 2;
435 uint16_t storeOp : 2;
436 uint16_t stencilLoadOp : 2;
437 uint16_t stencilStoreOp : 2;
438 // If a corresponding resolve attachment exists, storeOp may already be DONT_CARE, and it's
439 // unclear whether the attachment was invalidated or not. This information is passed along here
440 // so that the resolve attachment's storeOp can be set to DONT_CARE if the attachment is
441 // invalidated, and if possible removed from the list of resolve attachments altogether. Note
442 // that the latter may not be possible if the render pass has multiple subpasses due to Vulkan
443 // render pass compatibility rules (not an issue with dynamic rendering).
444 uint16_t isInvalidated : 1;
445 uint16_t isStencilInvalidated : 1;
446 uint16_t padding1 : 6;
447
448 // Layouts take values from ImageLayout, so they are small. Layouts that are possible here are
449 // placed at the beginning of that enum.
450 uint16_t initialLayout : 5;
451 uint16_t finalLayout : 5;
452 uint16_t finalResolveLayout : 5;
453 uint16_t padding2 : 1;
454 };
455
456 static_assert(sizeof(PackedAttachmentOpsDesc) == 4, "Size check failed");
457
458 class AttachmentOpsArray final
459 {
460 public:
461 AttachmentOpsArray();
462 ~AttachmentOpsArray();
463 AttachmentOpsArray(const AttachmentOpsArray &other);
464 AttachmentOpsArray &operator=(const AttachmentOpsArray &other);
465
466 const PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index) const
467 {
468 return mOps[index.get()];
469 }
470 PackedAttachmentOpsDesc &operator[](PackedAttachmentIndex index) { return mOps[index.get()]; }
471
472 // Initialize an attachment op with all load and store operations.
473 void initWithLoadStore(PackedAttachmentIndex index,
474 ImageLayout initialLayout,
475 ImageLayout finalLayout);
476
477 void setLayouts(PackedAttachmentIndex index,
478 ImageLayout initialLayout,
479 ImageLayout finalLayout);
480 void setOps(PackedAttachmentIndex index, RenderPassLoadOp loadOp, RenderPassStoreOp storeOp);
481 void setStencilOps(PackedAttachmentIndex index,
482 RenderPassLoadOp loadOp,
483 RenderPassStoreOp storeOp);
484
485 void setClearOp(PackedAttachmentIndex index);
486 void setClearStencilOp(PackedAttachmentIndex index);
487
488 size_t hash() const;
489
490 private:
491 gl::AttachmentArray<PackedAttachmentOpsDesc> mOps;
492 };
493
494 bool operator==(const AttachmentOpsArray &lhs, const AttachmentOpsArray &rhs);
495
496 static_assert(sizeof(AttachmentOpsArray) == 40, "Size check failed");
497
498 struct PackedAttribDesc final
499 {
500 uint8_t format;
501 uint8_t divisor;
502 uint16_t offset : kAttributeOffsetMaxBits;
503 uint16_t compressed : 1;
504 };
505
506 constexpr size_t kPackedAttribDescSize = sizeof(PackedAttribDesc);
507 static_assert(kPackedAttribDescSize == 4, "Size mismatch");
508
509 struct PackedVertexInputAttributes final
510 {
511 PackedAttribDesc attribs[gl::MAX_VERTEX_ATTRIBS];
512
513 // Component type of the corresponding input in the program. Used to adjust the format if
514 // necessary. Takes values from gl::ComponentType.
515 uint32_t shaderAttribComponentType;
516
517 // Although technically stride can be any value in ES 2.0, in practice supporting stride
518 // greater than MAX_USHORT should not be that helpful. Note that stride limits are
519 // introduced in ES 3.1.
520 // Dynamic in VK_EXT_extended_dynamic_state
521 uint16_t strides[gl::MAX_VERTEX_ATTRIBS];
522 };
523
524 constexpr size_t kPackedVertexInputAttributesSize = sizeof(PackedVertexInputAttributes);
525 static_assert(kPackedVertexInputAttributesSize == 100, "Size mismatch");
526
527 struct PackedInputAssemblyState final
528 {
529 struct
530 {
531 uint32_t topology : 4;
532
533 // Dynamic in VK_EXT_extended_dynamic_state2
534 uint32_t primitiveRestartEnable : 1; // ds2
535
536 // Whether dynamic state for vertex stride from VK_EXT_extended_dynamic_state can be used
537 // for. Used by GraphicsPipelineDesc::hash() to exclude |vertexStrides| from the hash
538 uint32_t useVertexInputBindingStrideDynamicState : 1;
539
540 // Whether dynamic state for vertex input state from VK_EXT_vertex_input_dynamic_state can
541 // be used by GraphicsPipelineDesc::hash() to exclude |PackedVertexInputAttributes| from the
542 // hash
543 uint32_t useVertexInputDynamicState : 1;
544
545 // Whether the pipeline is robust (vertex input copy)
546 uint32_t isRobustContext : 1;
547 // Whether the pipeline needs access to protected content (vertex input copy)
548 uint32_t isProtectedContext : 1;
549
550 // Which attributes are actually active in the program and should affect the pipeline.
551 uint32_t programActiveAttributeLocations : gl::MAX_VERTEX_ATTRIBS;
552
553 uint32_t padding : 23 - gl::MAX_VERTEX_ATTRIBS;
554 } bits;
555 };
556
557 constexpr size_t kPackedInputAssemblyStateSize = sizeof(PackedInputAssemblyState);
558 static_assert(kPackedInputAssemblyStateSize == 4, "Size mismatch");
559
560 struct PackedStencilOpState final
561 {
562 uint8_t fail : 4;
563 uint8_t pass : 4;
564 uint8_t depthFail : 4;
565 uint8_t compare : 4;
566 };
567
568 constexpr size_t kPackedStencilOpSize = sizeof(PackedStencilOpState);
569 static_assert(kPackedStencilOpSize == 2, "Size check failed");
570
571 struct PackedPreRasterizationAndFragmentStates final
572 {
573 struct
574 {
575 // Affecting VkPipelineViewportStateCreateInfo
576 uint32_t viewportNegativeOneToOne : 1;
577
578 // Affecting VkPipelineRasterizationStateCreateInfo
579 uint32_t depthClampEnable : 1;
580 uint32_t polygonMode : 2;
581 // Dynamic in VK_EXT_extended_dynamic_state
582 uint32_t cullMode : 4;
583 uint32_t frontFace : 4;
584 // Dynamic in VK_EXT_extended_dynamic_state2
585 uint32_t rasterizerDiscardEnable : 1;
586 uint32_t depthBiasEnable : 1;
587
588 // Affecting VkPipelineTessellationStateCreateInfo
589 uint32_t patchVertices : 6;
590
591 // Affecting VkPipelineDepthStencilStateCreateInfo
592 uint32_t depthBoundsTest : 1;
593 // Dynamic in VK_EXT_extended_dynamic_state
594 uint32_t depthTest : 1;
595 uint32_t depthWrite : 1;
596 uint32_t stencilTest : 1;
597 uint32_t nonZeroStencilWriteMaskWorkaround : 1;
598 // Dynamic in VK_EXT_extended_dynamic_state2
599 uint32_t depthCompareOp : 4;
600
601 // Affecting specialization constants
602 uint32_t surfaceRotation : 1;
603
604 // Whether the pipeline is robust (shader stages copy)
605 uint32_t isRobustContext : 1;
606 // Whether the pipeline needs access to protected content (shader stages copy)
607 uint32_t isProtectedContext : 1;
608 } bits;
609
610 // Affecting specialization constants
611 static_assert(gl::IMPLEMENTATION_MAX_DRAW_BUFFERS <= 8,
612 "2 bits per draw buffer is needed for dither emulation");
613 uint16_t emulatedDitherControl;
614 uint16_t padding;
615
616 // Affecting VkPipelineDepthStencilStateCreateInfo
617 // Dynamic in VK_EXT_extended_dynamic_state
618 PackedStencilOpState front;
619 PackedStencilOpState back;
620 };
621
622 constexpr size_t kPackedPreRasterizationAndFragmentStatesSize =
623 sizeof(PackedPreRasterizationAndFragmentStates);
624 static_assert(kPackedPreRasterizationAndFragmentStatesSize == 12, "Size check failed");
625
626 struct PackedMultisampleAndSubpassState final
627 {
628 struct
629 {
630 // Affecting VkPipelineMultisampleStateCreateInfo
631 // Note: Only up to 16xMSAA is supported in the Vulkan backend.
632 uint16_t sampleMask;
633 // Stored as minus one so sample count 16 can fit in 4 bits.
634 uint16_t rasterizationSamplesMinusOne : 4;
635 uint16_t sampleShadingEnable : 1;
636 uint16_t alphaToCoverageEnable : 1;
637 uint16_t alphaToOneEnable : 1;
638 // The subpass index affects both the shader stages and the fragment output similarly to
639 // multisampled state, so they are grouped together.
640 // Note: Currently only 2 subpasses possible.
641 uint16_t subpass : 1;
642 // 8-bit normalized instead of float to align the struct.
643 uint16_t minSampleShading : 8;
644 } bits;
645 };
646
647 constexpr size_t kPackedMultisampleAndSubpassStateSize = sizeof(PackedMultisampleAndSubpassState);
648 static_assert(kPackedMultisampleAndSubpassStateSize == 4, "Size check failed");
649
650 struct PackedColorBlendAttachmentState final
651 {
652 uint16_t srcColorBlendFactor : 5;
653 uint16_t dstColorBlendFactor : 5;
654 uint16_t colorBlendOp : 6;
655 uint16_t srcAlphaBlendFactor : 5;
656 uint16_t dstAlphaBlendFactor : 5;
657 uint16_t alphaBlendOp : 6;
658 };
659
660 constexpr size_t kPackedColorBlendAttachmentStateSize = sizeof(PackedColorBlendAttachmentState);
661 static_assert(kPackedColorBlendAttachmentStateSize == 4, "Size check failed");
662
663 struct PackedColorBlendState final
664 {
665 uint8_t colorWriteMaskBits[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS / 2];
666 PackedColorBlendAttachmentState attachments[gl::IMPLEMENTATION_MAX_DRAW_BUFFERS];
667 };
668
669 constexpr size_t kPackedColorBlendStateSize = sizeof(PackedColorBlendState);
670 static_assert(kPackedColorBlendStateSize == 36, "Size check failed");
671
672 struct PackedBlendMaskAndLogicOpState final
673 {
674 struct
675 {
676 uint32_t blendEnableMask : 8;
677 uint32_t logicOpEnable : 1;
678 // Dynamic in VK_EXT_extended_dynamic_state2
679 uint32_t logicOp : 4;
680
681 // Whether the pipeline needs access to protected content (fragment output copy)
682 uint32_t isProtectedContext : 1;
683
684 // Output that is present in the framebuffer but is never written to in the shader. Used by
685 // GL_ANGLE_robust_fragment_shader_output which defines the behavior in this case (which is
686 // to mask these outputs)
687 uint32_t missingOutputsMask : gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
688
689 uint32_t padding : 18 - gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
690 } bits;
691 };
692
693 constexpr size_t kPackedBlendMaskAndLogicOpStateSize = sizeof(PackedBlendMaskAndLogicOpState);
694 static_assert(kPackedBlendMaskAndLogicOpStateSize == 4, "Size check failed");
695
696 // The vertex input subset of the pipeline.
697 struct PipelineVertexInputState final
698 {
699 PackedInputAssemblyState inputAssembly;
700 PackedVertexInputAttributes vertex;
701 };
702
703 // The pre-rasterization and fragment shader subsets of the pipeline. This is excluding
704 // multisampled and render pass states which are shared with fragment output.
705 struct PipelineShadersState final
706 {
707 PackedPreRasterizationAndFragmentStates shaders;
708 };
709
710 // Multisampled and render pass states.
711 struct PipelineSharedNonVertexInputState final
712 {
713 PackedMultisampleAndSubpassState multisample;
714 RenderPassDesc renderPass;
715 };
716
717 // The fragment output subset of the pipeline. This is excluding multisampled and render pass
718 // states which are shared with the shader subsets.
719 struct PipelineFragmentOutputState final
720 {
721 PackedColorBlendState blend;
722 PackedBlendMaskAndLogicOpState blendMaskAndLogic;
723 };
724
725 constexpr size_t kGraphicsPipelineVertexInputStateSize =
726 kPackedVertexInputAttributesSize + kPackedInputAssemblyStateSize;
727 constexpr size_t kGraphicsPipelineShadersStateSize = kPackedPreRasterizationAndFragmentStatesSize;
728 constexpr size_t kGraphicsPipelineSharedNonVertexInputStateSize =
729 kPackedMultisampleAndSubpassStateSize + kRenderPassDescSize;
730 constexpr size_t kGraphicsPipelineFragmentOutputStateSize =
731 kPackedColorBlendStateSize + kPackedBlendMaskAndLogicOpStateSize;
732
733 constexpr size_t kGraphicsPipelineDescSumOfSizes =
734 kGraphicsPipelineVertexInputStateSize + kGraphicsPipelineShadersStateSize +
735 kGraphicsPipelineSharedNonVertexInputStateSize + kGraphicsPipelineFragmentOutputStateSize;
736
737 // Number of dirty bits in the dirty bit set.
738 constexpr size_t kGraphicsPipelineDirtyBitBytes = 4;
739 constexpr static size_t kNumGraphicsPipelineDirtyBits =
740 kGraphicsPipelineDescSumOfSizes / kGraphicsPipelineDirtyBitBytes;
741 static_assert(kNumGraphicsPipelineDirtyBits <= 64, "Too many pipeline dirty bits");
742
743 // Set of dirty bits. Each bit represents kGraphicsPipelineDirtyBitBytes in the desc.
744 using GraphicsPipelineTransitionBits = angle::BitSet<kNumGraphicsPipelineDirtyBits>;
745
746 GraphicsPipelineTransitionBits GetGraphicsPipelineTransitionBitsMask(GraphicsPipelineSubset subset);
747
748 // Disable padding warnings for a few helper structs that aggregate Vulkan state objects. These are
749 // not used as hash keys, they just simplify passing them around to functions.
750 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
751
752 struct GraphicsPipelineVertexInputVulkanStructs
753 {
754 VkPipelineVertexInputStateCreateInfo vertexInputState = {};
755 VkPipelineInputAssemblyStateCreateInfo inputAssemblyState = {};
756 VkPipelineVertexInputDivisorStateCreateInfoEXT divisorState = {};
757
758 // Support storage
759 gl::AttribArray<VkVertexInputBindingDescription> bindingDescs;
760 gl::AttribArray<VkVertexInputAttributeDescription> attributeDescs;
761 gl::AttribArray<VkVertexInputBindingDivisorDescriptionEXT> divisorDesc;
762 };
763
764 struct GraphicsPipelineShadersVulkanStructs
765 {
766 VkPipelineViewportStateCreateInfo viewportState = {};
767 VkPipelineRasterizationStateCreateInfo rasterState = {};
768 VkPipelineDepthStencilStateCreateInfo depthStencilState = {};
769 VkPipelineTessellationStateCreateInfo tessellationState = {};
770 VkPipelineTessellationDomainOriginStateCreateInfo domainOriginState = {};
771 VkPipelineViewportDepthClipControlCreateInfoEXT depthClipControl = {};
772 VkPipelineRasterizationLineStateCreateInfoEXT rasterLineState = {};
773 VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provokingVertexState = {};
774 VkPipelineRasterizationStateStreamCreateInfoEXT rasterStreamState = {};
775 VkSpecializationInfo specializationInfo = {};
776
777 // Support storage
778 angle::FixedVector<VkPipelineShaderStageCreateInfo, 5> shaderStages;
779 SpecializationConstantMap<VkSpecializationMapEntry> specializationEntries;
780 };
781
782 struct GraphicsPipelineSharedNonVertexInputVulkanStructs
783 {
784 VkPipelineMultisampleStateCreateInfo multisampleState = {};
785
786 // Support storage
787 uint32_t sampleMask;
788 };
789
790 struct GraphicsPipelineFragmentOutputVulkanStructs
791 {
792 VkPipelineColorBlendStateCreateInfo blendState = {};
793
794 // Support storage
795 gl::DrawBuffersArray<VkPipelineColorBlendAttachmentState> blendAttachmentState;
796 };
797
798 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
799
800 using GraphicsPipelineDynamicStateList = angle::FixedVector<VkDynamicState, 23>;
801
802 enum class PipelineRobustness
803 {
804 NonRobust,
805 Robust,
806 };
807
808 enum class PipelineProtectedAccess
809 {
810 Unprotected,
811 Protected,
812 };
813
814 // Context state that can affect a compute pipeline
815 union ComputePipelineOptions final
816 {
817 struct
818 {
819 // Whether VK_EXT_pipeline_robustness should be used to make the pipeline robust. Note that
820 // programs are allowed to be shared between robust and non-robust contexts, so different
821 // pipelines can be created for the same compute program.
822 uint8_t robustness : 1;
823 // Whether VK_EXT_pipeline_protected_access should be used to make the pipeline
824 // protected-only. Similar to robustness, EGL allows protected and unprotected to be in the
825 // same share group.
826 uint8_t protectedAccess : 1;
827 uint8_t reserved : 6; // must initialize to zero
828 };
829 uint8_t permutationIndex;
830 static constexpr uint32_t kPermutationCount = 0x1 << 2;
831 };
832 static_assert(sizeof(ComputePipelineOptions) == 1, "Size check failed");
833 ComputePipelineOptions GetComputePipelineOptions(vk::PipelineRobustness robustness,
834 vk::PipelineProtectedAccess protectedAccess);
835
836 // Compute Pipeline Description
837 class ComputePipelineDesc final
838 {
839 public:
840 void *operator new(std::size_t size);
841 void operator delete(void *ptr);
842
843 ComputePipelineDesc();
844 ComputePipelineDesc(const ComputePipelineDesc &other);
845 ComputePipelineDesc &operator=(const ComputePipelineDesc &other);
846
847 ComputePipelineDesc(VkSpecializationInfo *specializationInfo,
848 vk::ComputePipelineOptions pipelineOptions);
849 ~ComputePipelineDesc() = default;
850
851 size_t hash() const;
852 bool keyEqual(const ComputePipelineDesc &other) const;
853
854 template <typename T>
getPtr()855 const T *getPtr() const
856 {
857 return reinterpret_cast<const T *>(this);
858 }
859
getConstantIds()860 std::vector<uint32_t> getConstantIds() const { return mConstantIds; }
getConstants()861 std::vector<uint32_t> getConstants() const { return mConstants; }
getPipelineOptions()862 ComputePipelineOptions getPipelineOptions() const { return mPipelineOptions; }
863
864 private:
865 std::vector<uint32_t> mConstantIds, mConstants;
866 ComputePipelineOptions mPipelineOptions = {};
867 char mPadding[7] = {};
868 };
869
870 class PipelineHelper;
871
872 // When a graphics pipeline is created, the shaders state is either directly specified (monolithic
873 // pipeline) or is specified in a pipeline library. This struct encapsulates the choices.
874 struct GraphicsPipelineShadersInfo final
875 {
876 public:
GraphicsPipelineShadersInfofinal877 GraphicsPipelineShadersInfo(const ShaderModuleMap *shaders,
878 const SpecializationConstants *specConsts)
879 : mShaders(shaders), mSpecConsts(specConsts)
880 {}
GraphicsPipelineShadersInfofinal881 GraphicsPipelineShadersInfo(vk::PipelineHelper *pipelineLibrary)
882 : mPipelineLibrary(pipelineLibrary)
883 {}
884
pipelineLibraryfinal885 vk::PipelineHelper *pipelineLibrary() const { return mPipelineLibrary; }
usePipelineLibraryfinal886 bool usePipelineLibrary() const { return mPipelineLibrary != nullptr; }
887
888 private:
889 // If the shaders state should be directly specified in the final pipeline.
890 const ShaderModuleMap *mShaders = nullptr;
891 const SpecializationConstants *mSpecConsts = nullptr;
892
893 // If the shaders state is provided via a pipeline library.
894 vk::PipelineHelper *mPipelineLibrary = nullptr;
895
896 friend class GraphicsPipelineDesc;
897 };
898
899 // State changes are applied through the update methods. Each update method can also have a
900 // sibling method that applies the update without marking a state transition. The non-transition
901 // update methods are used for internal shader pipelines. Not every non-transition update method
902 // is implemented yet as not every state is used in internal shaders.
903 class GraphicsPipelineDesc final
904 {
905 public:
906 // Use aligned allocation and free so we can use the alignas keyword.
907 void *operator new(std::size_t size);
908 void operator delete(void *ptr);
909
910 GraphicsPipelineDesc();
911 ~GraphicsPipelineDesc();
912 GraphicsPipelineDesc(const GraphicsPipelineDesc &other);
913 GraphicsPipelineDesc &operator=(const GraphicsPipelineDesc &other);
914
915 size_t hash(GraphicsPipelineSubset subset) const;
916 bool keyEqual(const GraphicsPipelineDesc &other, GraphicsPipelineSubset subset) const;
917
918 void initDefaults(const ErrorContext *context,
919 GraphicsPipelineSubset subset,
920 PipelineRobustness contextRobustness,
921 PipelineProtectedAccess contextProtectedAccess);
922
923 // For custom comparisons.
924 template <typename T>
getPtr()925 const T *getPtr() const
926 {
927 return reinterpret_cast<const T *>(this);
928 }
929
930 VkResult initializePipeline(ErrorContext *context,
931 PipelineCacheAccess *pipelineCache,
932 GraphicsPipelineSubset subset,
933 const RenderPass &compatibleRenderPass,
934 const PipelineLayout &pipelineLayout,
935 const GraphicsPipelineShadersInfo &shaders,
936 Pipeline *pipelineOut,
937 CacheLookUpFeedback *feedbackOut) const;
938
939 // Vertex input state. For ES 3.1 this should be separated into binding and attribute.
940 void updateVertexInput(ContextVk *contextVk,
941 GraphicsPipelineTransitionBits *transition,
942 uint32_t attribIndex,
943 GLuint stride,
944 GLuint divisor,
945 angle::FormatID format,
946 bool compressed,
947 GLuint relativeOffset);
948 void setVertexShaderComponentTypes(gl::AttributesMask activeAttribLocations,
949 gl::ComponentTypeMask componentTypeMask);
950 void updateVertexShaderComponentTypes(GraphicsPipelineTransitionBits *transition,
951 gl::AttributesMask activeAttribLocations,
952 gl::ComponentTypeMask componentTypeMask);
953
954 // Input assembly info
955 void setTopology(gl::PrimitiveMode drawMode);
956 void updateTopology(GraphicsPipelineTransitionBits *transition, gl::PrimitiveMode drawMode);
957 void updatePrimitiveRestartEnabled(GraphicsPipelineTransitionBits *transition,
958 bool primitiveRestartEnabled);
959
960 // Viewport states
961 void updateDepthClipControl(GraphicsPipelineTransitionBits *transition, bool negativeOneToOne);
962
963 // Raster states
964 void updatePolygonMode(GraphicsPipelineTransitionBits *transition, gl::PolygonMode polygonMode);
965 void updateCullMode(GraphicsPipelineTransitionBits *transition,
966 const gl::RasterizerState &rasterState);
967 void updateFrontFace(GraphicsPipelineTransitionBits *transition,
968 const gl::RasterizerState &rasterState,
969 bool invertFrontFace);
970 void updateRasterizerDiscardEnabled(GraphicsPipelineTransitionBits *transition,
971 bool rasterizerDiscardEnabled);
972
973 // Multisample states
974 uint32_t getRasterizationSamples() const;
975 void setRasterizationSamples(uint32_t rasterizationSamples);
976 void updateRasterizationSamples(GraphicsPipelineTransitionBits *transition,
977 uint32_t rasterizationSamples);
978 void updateAlphaToCoverageEnable(GraphicsPipelineTransitionBits *transition, bool enable);
979 void updateAlphaToOneEnable(GraphicsPipelineTransitionBits *transition, bool enable);
980 void updateSampleMask(GraphicsPipelineTransitionBits *transition,
981 uint32_t maskNumber,
982 uint32_t mask);
983
984 void updateSampleShading(GraphicsPipelineTransitionBits *transition, bool enable, float value);
985
986 // RenderPass description.
getRenderPassDesc()987 const RenderPassDesc &getRenderPassDesc() const { return mSharedNonVertexInput.renderPass; }
988
989 void setRenderPassDesc(const RenderPassDesc &renderPassDesc);
990 void updateRenderPassDesc(GraphicsPipelineTransitionBits *transition,
991 const angle::FeaturesVk &features,
992 const RenderPassDesc &renderPassDesc,
993 FramebufferFetchMode framebufferFetchMode);
994 void setRenderPassSampleCount(GLint samples);
995 void setRenderPassFramebufferFetchMode(FramebufferFetchMode framebufferFetchMode);
getRenderPassColorFramebufferFetchMode()996 bool getRenderPassColorFramebufferFetchMode() const
997 {
998 return mSharedNonVertexInput.renderPass.hasColorFramebufferFetch();
999 }
getRenderPassDepthStencilFramebufferFetchMode()1000 bool getRenderPassDepthStencilFramebufferFetchMode() const
1001 {
1002 return mSharedNonVertexInput.renderPass.hasDepthStencilFramebufferFetch();
1003 }
1004
1005 void setRenderPassFoveation(bool isFoveated);
getRenderPassFoveation()1006 bool getRenderPassFoveation() const
1007 {
1008 return mSharedNonVertexInput.renderPass.hasFragmentShadingAttachment();
1009 }
1010
1011 void setRenderPassColorAttachmentFormat(size_t colorIndexGL, angle::FormatID formatID);
1012
1013 // Blend states
1014 void setSingleBlend(uint32_t colorIndexGL,
1015 bool enabled,
1016 VkBlendOp op,
1017 VkBlendFactor srcFactor,
1018 VkBlendFactor dstFactor);
1019 void updateBlendEnabled(GraphicsPipelineTransitionBits *transition,
1020 gl::DrawBufferMask blendEnabledMask);
1021 void updateBlendFuncs(GraphicsPipelineTransitionBits *transition,
1022 const gl::BlendStateExt &blendStateExt,
1023 gl::DrawBufferMask attachmentMask);
1024 void updateBlendEquations(GraphicsPipelineTransitionBits *transition,
1025 const gl::BlendStateExt &blendStateExt,
1026 gl::DrawBufferMask attachmentMask);
1027 void resetBlendFuncsAndEquations(GraphicsPipelineTransitionBits *transition,
1028 const gl::BlendStateExt &blendStateExt,
1029 gl::DrawBufferMask previousAttachmentsMask,
1030 gl::DrawBufferMask newAttachmentsMask);
1031 void setColorWriteMasks(gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
1032 const gl::DrawBufferMask &alphaMask,
1033 const gl::DrawBufferMask &enabledDrawBuffers);
1034 void setSingleColorWriteMask(uint32_t colorIndexGL, VkColorComponentFlags colorComponentFlags);
1035 void updateColorWriteMasks(GraphicsPipelineTransitionBits *transition,
1036 gl::BlendStateExt::ColorMaskStorage::Type colorMasks,
1037 const gl::DrawBufferMask &alphaMask,
1038 const gl::DrawBufferMask &enabledDrawBuffers);
1039 void updateMissingOutputsMask(GraphicsPipelineTransitionBits *transition,
1040 gl::DrawBufferMask missingOutputsMask);
1041
1042 // Logic op
1043 void updateLogicOpEnabled(GraphicsPipelineTransitionBits *transition, bool enable);
1044 void updateLogicOp(GraphicsPipelineTransitionBits *transition, VkLogicOp logicOp);
1045
1046 // Depth/stencil states.
1047 void setDepthTestEnabled(bool enabled);
1048 void setDepthWriteEnabled(bool enabled);
1049 void setDepthFunc(VkCompareOp op);
1050 void setDepthClampEnabled(bool enabled);
1051 void setStencilTestEnabled(bool enabled);
1052 void setStencilFrontFuncs(VkCompareOp compareOp);
1053 void setStencilBackFuncs(VkCompareOp compareOp);
1054 void setStencilFrontOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
1055 void setStencilBackOps(VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp);
1056 void setStencilFrontWriteMask(uint8_t mask);
1057 void setStencilBackWriteMask(uint8_t mask);
1058 void updateDepthTestEnabled(GraphicsPipelineTransitionBits *transition,
1059 const gl::DepthStencilState &depthStencilState,
1060 const gl::Framebuffer *drawFramebuffer);
1061 void updateDepthFunc(GraphicsPipelineTransitionBits *transition,
1062 const gl::DepthStencilState &depthStencilState);
1063 void updateDepthClampEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
1064 void updateDepthWriteEnabled(GraphicsPipelineTransitionBits *transition,
1065 const gl::DepthStencilState &depthStencilState,
1066 const gl::Framebuffer *drawFramebuffer);
1067 void updateStencilTestEnabled(GraphicsPipelineTransitionBits *transition,
1068 const gl::DepthStencilState &depthStencilState,
1069 const gl::Framebuffer *drawFramebuffer);
1070 void updateStencilFrontFuncs(GraphicsPipelineTransitionBits *transition,
1071 const gl::DepthStencilState &depthStencilState);
1072 void updateStencilBackFuncs(GraphicsPipelineTransitionBits *transition,
1073 const gl::DepthStencilState &depthStencilState);
1074 void updateStencilFrontOps(GraphicsPipelineTransitionBits *transition,
1075 const gl::DepthStencilState &depthStencilState);
1076 void updateStencilBackOps(GraphicsPipelineTransitionBits *transition,
1077 const gl::DepthStencilState &depthStencilState);
1078
1079 // Depth offset.
1080 void updatePolygonOffsetEnabled(GraphicsPipelineTransitionBits *transition, bool enabled);
1081
1082 // Tessellation
1083 void updatePatchVertices(GraphicsPipelineTransitionBits *transition, GLuint value);
1084
1085 // Subpass
1086 void resetSubpass(GraphicsPipelineTransitionBits *transition);
1087 void nextSubpass(GraphicsPipelineTransitionBits *transition);
1088 void setSubpass(uint32_t subpass);
1089 uint32_t getSubpass() const;
1090
1091 void updateSurfaceRotation(GraphicsPipelineTransitionBits *transition,
1092 bool isRotatedAspectRatio);
getSurfaceRotation()1093 bool getSurfaceRotation() const { return mShaders.shaders.bits.surfaceRotation; }
1094
1095 void updateEmulatedDitherControl(GraphicsPipelineTransitionBits *transition, uint16_t value);
getEmulatedDitherControl()1096 uint32_t getEmulatedDitherControl() const { return mShaders.shaders.emulatedDitherControl; }
1097
isLegacyDitherEnabled()1098 bool isLegacyDitherEnabled() const
1099 {
1100 return mSharedNonVertexInput.renderPass.isLegacyDitherEnabled();
1101 }
1102
1103 void updateNonZeroStencilWriteMaskWorkaround(GraphicsPipelineTransitionBits *transition,
1104 bool enabled);
1105
setSupportsDynamicStateForTest(bool supports)1106 void setSupportsDynamicStateForTest(bool supports)
1107 {
1108 mVertexInput.inputAssembly.bits.useVertexInputBindingStrideDynamicState = supports;
1109 mShaders.shaders.bits.nonZeroStencilWriteMaskWorkaround = false;
1110 }
1111
1112 static VkFormat getPipelineVertexInputStateFormat(ErrorContext *context,
1113 angle::FormatID formatID,
1114 bool compressed,
1115 const gl::ComponentType programAttribType,
1116 uint32_t attribIndex);
1117
1118 // Helpers to dump the state
getVertexInputStateForLog()1119 const PipelineVertexInputState &getVertexInputStateForLog() const { return mVertexInput; }
getShadersStateForLog()1120 const PipelineShadersState &getShadersStateForLog() const { return mShaders; }
getSharedNonVertexInputStateForLog()1121 const PipelineSharedNonVertexInputState &getSharedNonVertexInputStateForLog() const
1122 {
1123 return mSharedNonVertexInput;
1124 }
getFragmentOutputStateForLog()1125 const PipelineFragmentOutputState &getFragmentOutputStateForLog() const
1126 {
1127 return mFragmentOutput;
1128 }
1129
hasPipelineProtectedAccess()1130 bool hasPipelineProtectedAccess() const
1131 {
1132 ASSERT(mShaders.shaders.bits.isProtectedContext ==
1133 mVertexInput.inputAssembly.bits.isProtectedContext);
1134 ASSERT(mShaders.shaders.bits.isProtectedContext ==
1135 mFragmentOutput.blendMaskAndLogic.bits.isProtectedContext);
1136
1137 return mShaders.shaders.bits.isProtectedContext;
1138 }
1139
1140 private:
1141 void updateSubpass(GraphicsPipelineTransitionBits *transition, uint32_t subpass);
1142
1143 const void *getPipelineSubsetMemory(GraphicsPipelineSubset subset, size_t *sizeOut) const;
1144
1145 void initializePipelineVertexInputState(
1146 ErrorContext *context,
1147 GraphicsPipelineVertexInputVulkanStructs *stateOut,
1148 GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
1149
1150 void initializePipelineShadersState(
1151 ErrorContext *context,
1152 const ShaderModuleMap &shaders,
1153 const SpecializationConstants &specConsts,
1154 GraphicsPipelineShadersVulkanStructs *stateOut,
1155 GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
1156
1157 void initializePipelineSharedNonVertexInputState(
1158 ErrorContext *context,
1159 GraphicsPipelineSharedNonVertexInputVulkanStructs *stateOut,
1160 GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
1161
1162 void initializePipelineFragmentOutputState(
1163 ErrorContext *context,
1164 GraphicsPipelineFragmentOutputVulkanStructs *stateOut,
1165 GraphicsPipelineDynamicStateList *dynamicStateListOut) const;
1166
1167 PipelineShadersState mShaders;
1168 PipelineSharedNonVertexInputState mSharedNonVertexInput;
1169 PipelineFragmentOutputState mFragmentOutput;
1170 PipelineVertexInputState mVertexInput;
1171 };
1172
1173 // Verify the packed pipeline description has no gaps in the packing.
1174 // This is not guaranteed by the spec, but is validated by a compile-time check.
1175 // No gaps or padding at the end ensures that hashing and memcmp checks will not run
1176 // into uninitialized memory regions.
1177 constexpr size_t kGraphicsPipelineDescSize = sizeof(GraphicsPipelineDesc);
1178 static_assert(kGraphicsPipelineDescSize == kGraphicsPipelineDescSumOfSizes, "Size mismatch");
1179
1180 // Values are based on data recorded here -> https://anglebug.com/42267114#comment5
1181 constexpr size_t kDefaultDescriptorSetLayoutBindingsCount = 8;
1182 constexpr size_t kDefaultImmutableSamplerBindingsCount = 1;
1183 using DescriptorSetLayoutBindingVector =
1184 angle::FastVector<VkDescriptorSetLayoutBinding, kDefaultDescriptorSetLayoutBindingsCount>;
1185
1186 // A packed description of a descriptor set layout. Use similarly to RenderPassDesc and
1187 // GraphicsPipelineDesc. Currently we only need to differentiate layouts based on sampler and ubo
1188 // usage. In the future we could generalize this.
1189 class DescriptorSetLayoutDesc final
1190 {
1191 public:
1192 DescriptorSetLayoutDesc();
1193 ~DescriptorSetLayoutDesc();
1194 DescriptorSetLayoutDesc(const DescriptorSetLayoutDesc &other);
1195 DescriptorSetLayoutDesc &operator=(const DescriptorSetLayoutDesc &other);
1196
1197 size_t hash() const;
1198 bool operator==(const DescriptorSetLayoutDesc &other) const;
1199
1200 void addBinding(uint32_t bindingIndex,
1201 VkDescriptorType descriptorType,
1202 uint32_t count,
1203 VkShaderStageFlags stages,
1204 const Sampler *immutableSampler);
1205
1206 void unpackBindings(DescriptorSetLayoutBindingVector *bindings) const;
1207
empty()1208 bool empty() const { return mDescriptorSetLayoutBindings.empty(); }
1209
1210 private:
1211 // There is a small risk of an issue if the sampler cache is evicted but not the descriptor
1212 // cache we would have an invalid handle here. Thus propose follow-up work:
1213 // TODO: https://issuetracker.google.com/issues/159156775: Have immutable sampler use serial
1214 union PackedDescriptorSetBinding
1215 {
1216 static constexpr uint8_t kInvalidType = 255;
1217
1218 struct
1219 {
1220 uint8_t type; // Stores a packed VkDescriptorType descriptorType.
1221 uint8_t stages; // Stores a packed VkShaderStageFlags.
1222 uint16_t count : 15; // Stores a packed uint32_t descriptorCount
1223 uint16_t hasImmutableSampler : 1; // Whether this binding has an immutable sampler
1224 };
1225 uint32_t value;
1226
1227 bool operator==(const PackedDescriptorSetBinding &other) const
1228 {
1229 return value == other.value;
1230 }
1231 };
1232
1233 // 1x 32bit
1234 static_assert(sizeof(PackedDescriptorSetBinding) == 4, "Unexpected size");
1235
1236 angle::FastVector<VkSampler, kDefaultImmutableSamplerBindingsCount> mImmutableSamplers;
1237 angle::FastVector<PackedDescriptorSetBinding, kDefaultDescriptorSetLayoutBindingsCount>
1238 mDescriptorSetLayoutBindings;
1239
1240 #if !defined(ANGLE_IS_64_BIT_CPU)
1241 ANGLE_MAYBE_UNUSED_PRIVATE_FIELD uint32_t mPadding = 0;
1242 #endif
1243 };
1244
1245 // The following are for caching descriptor set layouts. Limited to max three descriptor set
1246 // layouts. This can be extended in the future.
1247 constexpr size_t kMaxDescriptorSetLayouts = ToUnderlying(DescriptorSetIndex::EnumCount);
1248
1249 union PackedPushConstantRange
1250 {
1251 struct
1252 {
1253 uint8_t offset;
1254 uint8_t size;
1255 uint16_t stageMask;
1256 };
1257 uint32_t value;
1258
1259 bool operator==(const PackedPushConstantRange &other) const { return value == other.value; }
1260 };
1261
1262 static_assert(sizeof(PackedPushConstantRange) == sizeof(uint32_t), "Unexpected Size");
1263
1264 template <typename T>
1265 using DescriptorSetArray = angle::PackedEnumMap<DescriptorSetIndex, T>;
1266 using DescriptorSetLayoutPointerArray = DescriptorSetArray<DescriptorSetLayoutPtr>;
1267
1268 class PipelineLayoutDesc final
1269 {
1270 public:
1271 PipelineLayoutDesc();
1272 ~PipelineLayoutDesc();
1273 PipelineLayoutDesc(const PipelineLayoutDesc &other);
1274 PipelineLayoutDesc &operator=(const PipelineLayoutDesc &rhs);
1275
1276 size_t hash() const;
1277 bool operator==(const PipelineLayoutDesc &other) const;
1278
1279 void updateDescriptorSetLayout(DescriptorSetIndex setIndex,
1280 const DescriptorSetLayoutDesc &desc);
1281 void updatePushConstantRange(VkShaderStageFlags stageMask, uint32_t offset, uint32_t size);
1282
getPushConstantRange()1283 const PackedPushConstantRange &getPushConstantRange() const { return mPushConstantRange; }
1284
1285 private:
1286 DescriptorSetArray<DescriptorSetLayoutDesc> mDescriptorSetLayouts;
1287 PackedPushConstantRange mPushConstantRange;
1288 ANGLE_MAYBE_UNUSED_PRIVATE_FIELD uint32_t mPadding;
1289
1290 // Verify the arrays are properly packed.
1291 static_assert(sizeof(decltype(mDescriptorSetLayouts)) ==
1292 (sizeof(DescriptorSetLayoutDesc) * kMaxDescriptorSetLayouts),
1293 "Unexpected size");
1294 };
1295
1296 // Verify the structure is properly packed.
1297 static_assert(sizeof(PipelineLayoutDesc) == sizeof(DescriptorSetArray<DescriptorSetLayoutDesc>) +
1298 sizeof(PackedPushConstantRange) + sizeof(uint32_t),
1299 "Unexpected Size");
1300
1301 enum class YcbcrLinearFilterSupport
1302 {
1303 Unsupported,
1304 Supported,
1305 };
1306
1307 class YcbcrConversionDesc final
1308 {
1309 public:
1310 YcbcrConversionDesc();
1311 ~YcbcrConversionDesc();
1312 YcbcrConversionDesc(const YcbcrConversionDesc &other);
1313 YcbcrConversionDesc &operator=(const YcbcrConversionDesc &other);
1314
1315 size_t hash() const;
1316 bool operator==(const YcbcrConversionDesc &other) const;
1317
valid()1318 bool valid() const { return mExternalOrVkFormat != 0; }
1319 void reset();
1320 void update(Renderer *renderer,
1321 uint64_t externalFormat,
1322 VkSamplerYcbcrModelConversion conversionModel,
1323 VkSamplerYcbcrRange colorRange,
1324 VkChromaLocation xChromaOffset,
1325 VkChromaLocation yChromaOffset,
1326 VkFilter chromaFilter,
1327 VkComponentMapping components,
1328 angle::FormatID intendedFormatID,
1329 YcbcrLinearFilterSupport linearFilterSupported);
getChromaFilter()1330 VkFilter getChromaFilter() const { return static_cast<VkFilter>(mChromaFilter); }
1331 bool updateChromaFilter(Renderer *renderer, VkFilter filter);
1332 void updateConversionModel(VkSamplerYcbcrModelConversion conversionModel);
getExternalFormat()1333 uint64_t getExternalFormat() const { return mIsExternalFormat ? mExternalOrVkFormat : 0; }
1334
1335 angle::Result init(ErrorContext *context, SamplerYcbcrConversion *conversionOut) const;
1336
1337 private:
1338 // If the sampler needs to convert the image content (e.g. from YUV to RGB) then
1339 // mExternalOrVkFormat will be non-zero. The value is either the external format
1340 // as returned by vkGetAndroidHardwareBufferPropertiesANDROID or a YUV VkFormat.
1341 // For VkSamplerYcbcrConversion, mExternalOrVkFormat along with mIsExternalFormat,
1342 // mConversionModel and mColorRange works as a Serial() used elsewhere in ANGLE.
1343 uint64_t mExternalOrVkFormat;
1344 // 1 bit to identify if external format is used
1345 uint32_t mIsExternalFormat : 1;
1346 // 3 bits to identify conversion model
1347 uint32_t mConversionModel : 3;
1348 // 1 bit to identify color component range
1349 uint32_t mColorRange : 1;
1350 // 1 bit to identify x chroma location
1351 uint32_t mXChromaOffset : 1;
1352 // 1 bit to identify y chroma location
1353 uint32_t mYChromaOffset : 1;
1354 // 1 bit to identify chroma filtering
1355 uint32_t mChromaFilter : 1;
1356 // 3 bit to identify R component swizzle
1357 uint32_t mRSwizzle : 3;
1358 // 3 bit to identify G component swizzle
1359 uint32_t mGSwizzle : 3;
1360 // 3 bit to identify B component swizzle
1361 uint32_t mBSwizzle : 3;
1362 // 3 bit to identify A component swizzle
1363 uint32_t mASwizzle : 3;
1364 // 1 bit for whether linear filtering is supported (independent of whether currently enabled)
1365 uint32_t mLinearFilterSupported : 1;
1366 uint32_t mPadding : 11;
1367 uint32_t mReserved;
1368 };
1369
1370 static_assert(sizeof(YcbcrConversionDesc) == 16, "Unexpected YcbcrConversionDesc size");
1371
1372 // Packed sampler description for the sampler cache.
1373 class SamplerDesc final
1374 {
1375 public:
1376 SamplerDesc();
1377 SamplerDesc(ErrorContext *context,
1378 const gl::SamplerState &samplerState,
1379 bool stencilMode,
1380 const YcbcrConversionDesc *ycbcrConversionDesc,
1381 angle::FormatID intendedFormatID);
1382 ~SamplerDesc();
1383
1384 SamplerDesc(const SamplerDesc &other);
1385 SamplerDesc &operator=(const SamplerDesc &rhs);
1386
1387 void update(Renderer *renderer,
1388 const gl::SamplerState &samplerState,
1389 bool stencilMode,
1390 const YcbcrConversionDesc *ycbcrConversionDesc,
1391 angle::FormatID intendedFormatID);
1392 void reset();
1393 angle::Result init(ContextVk *contextVk, Sampler *sampler) const;
1394
1395 size_t hash() const;
1396 bool operator==(const SamplerDesc &other) const;
1397
1398 private:
1399 // 32*4 bits for floating point data.
1400 // Note: anisotropy enabled is implicitly determined by maxAnisotropy and caps.
1401 float mMipLodBias;
1402 float mMaxAnisotropy;
1403 float mMinLod;
1404 float mMaxLod;
1405
1406 // 16*8 bits to uniquely identify a YCbCr conversion sampler.
1407 YcbcrConversionDesc mYcbcrConversionDesc;
1408
1409 // 16 bits for modes + states.
1410 // 1 bit per filter (only 2 possible values in GL: linear/nearest)
1411 uint16_t mMagFilter : 1;
1412 uint16_t mMinFilter : 1;
1413 uint16_t mMipmapMode : 1;
1414
1415 // 3 bits per address mode (5 possible values)
1416 uint16_t mAddressModeU : 3;
1417 uint16_t mAddressModeV : 3;
1418 uint16_t mAddressModeW : 3;
1419
1420 // 1 bit for compare enabled (2 possible values)
1421 uint16_t mCompareEnabled : 1;
1422
1423 // 3 bits for compare op. (8 possible values)
1424 uint16_t mCompareOp : 3;
1425
1426 // Values from angle::ColorGeneric::Type. Float is 0 and others are 1.
1427 uint16_t mBorderColorType : 1;
1428
1429 uint16_t mPadding : 15;
1430
1431 // 16*8 bits for BorderColor
1432 angle::ColorF mBorderColor;
1433
1434 // 32 bits reserved for future use.
1435 uint32_t mReserved;
1436 };
1437
1438 static_assert(sizeof(SamplerDesc) == 56, "Unexpected SamplerDesc size");
1439
1440 // Disable warnings about struct padding.
1441 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
1442
1443 struct GraphicsPipelineTransition
1444 {
1445 GraphicsPipelineTransition();
1446 GraphicsPipelineTransition(const GraphicsPipelineTransition &other);
1447 GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,
1448 const GraphicsPipelineDesc *desc,
1449 PipelineHelper *pipeline);
1450
1451 GraphicsPipelineTransitionBits bits;
1452 const GraphicsPipelineDesc *desc;
1453 PipelineHelper *target;
1454 };
1455
1456 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition() = default;
1457
1458 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
1459 const GraphicsPipelineTransition &other) = default;
1460
GraphicsPipelineTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc * desc,PipelineHelper * pipeline)1461 ANGLE_INLINE GraphicsPipelineTransition::GraphicsPipelineTransition(
1462 GraphicsPipelineTransitionBits bits,
1463 const GraphicsPipelineDesc *desc,
1464 PipelineHelper *pipeline)
1465 : bits(bits), desc(desc), target(pipeline)
1466 {}
1467
GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,GraphicsPipelineTransitionBits bitsB,const GraphicsPipelineDesc & descA,const GraphicsPipelineDesc & descB)1468 ANGLE_INLINE bool GraphicsPipelineTransitionMatch(GraphicsPipelineTransitionBits bitsA,
1469 GraphicsPipelineTransitionBits bitsB,
1470 const GraphicsPipelineDesc &descA,
1471 const GraphicsPipelineDesc &descB)
1472 {
1473 if (bitsA != bitsB)
1474 return false;
1475
1476 // We currently mask over 4 bytes of the pipeline description with each dirty bit.
1477 // We could consider using 8 bytes and a mask of 32 bits. This would make some parts
1478 // of the code faster. The for loop below would scan over twice as many bits per iteration.
1479 // But there may be more collisions between the same dirty bit masks leading to different
1480 // transitions. Thus there may be additional cost when applications use many transitions.
1481 // We should revisit this in the future and investigate using different bit widths.
1482 static_assert(sizeof(uint32_t) == kGraphicsPipelineDirtyBitBytes, "Size mismatch");
1483
1484 const uint32_t *rawPtrA = descA.getPtr<uint32_t>();
1485 const uint32_t *rawPtrB = descB.getPtr<uint32_t>();
1486
1487 for (size_t dirtyBit : bitsA)
1488 {
1489 if (rawPtrA[dirtyBit] != rawPtrB[dirtyBit])
1490 return false;
1491 }
1492
1493 return true;
1494 }
1495
1496 // A class that encapsulates the vk::PipelineCache and associated mutex. The mutex may be nullptr
1497 // if synchronization is not necessary.
1498 class PipelineCacheAccess
1499 {
1500 public:
1501 PipelineCacheAccess() = default;
1502 ~PipelineCacheAccess() = default;
1503
init(const vk::PipelineCache * pipelineCache,angle::SimpleMutex * mutex)1504 void init(const vk::PipelineCache *pipelineCache, angle::SimpleMutex *mutex)
1505 {
1506 mPipelineCache = pipelineCache;
1507 mMutex = mutex;
1508 }
1509
1510 VkResult createGraphicsPipeline(vk::ErrorContext *context,
1511 const VkGraphicsPipelineCreateInfo &createInfo,
1512 vk::Pipeline *pipelineOut);
1513 VkResult createComputePipeline(vk::ErrorContext *context,
1514 const VkComputePipelineCreateInfo &createInfo,
1515 vk::Pipeline *pipelineOut);
1516
1517 VkResult getCacheData(vk::ErrorContext *context, size_t *cacheSize, void *cacheData);
1518
1519 void merge(Renderer *renderer, const vk::PipelineCache &pipelineCache);
1520
isThreadSafe()1521 bool isThreadSafe() const { return mMutex != nullptr; }
1522
1523 private:
1524 std::unique_lock<angle::SimpleMutex> getLock();
1525
1526 const vk::PipelineCache *mPipelineCache = nullptr;
1527 angle::SimpleMutex *mMutex;
1528 };
1529
1530 // Monolithic pipeline creation tasks are created as soon as a pipeline is created out of libraries.
1531 // However, they are not immediately posted to the worker queue to allow pacing. On each use of a
1532 // pipeline, an attempt is made to post the task.
1533 class CreateMonolithicPipelineTask : public ErrorContext, public angle::Closure
1534 {
1535 public:
1536 CreateMonolithicPipelineTask(Renderer *renderer,
1537 const PipelineCacheAccess &pipelineCache,
1538 const PipelineLayout &pipelineLayout,
1539 const ShaderModuleMap &shaders,
1540 const SpecializationConstants &specConsts,
1541 const GraphicsPipelineDesc &desc);
1542
1543 // The compatible render pass is set only when the task is ready to run. This is because the
1544 // render pass cache may have been cleared since the task was created (e.g. to accomodate
1545 // framebuffer fetch). Such render pass cache clears ensure there are no active tasks, so it's
1546 // safe to hold on to this pointer for the brief period between task post and completion.
1547 //
1548 // Not applicable to dynamic rendering.
getRenderPassDesc()1549 const RenderPassDesc &getRenderPassDesc() const { return mDesc.getRenderPassDesc(); }
1550 void setCompatibleRenderPass(const RenderPass *compatibleRenderPass);
1551
1552 void operator()() override;
1553
getResult()1554 VkResult getResult() const { return mResult; }
getPipeline()1555 Pipeline &getPipeline() { return mPipeline; }
getFeedback()1556 CacheLookUpFeedback getFeedback() const { return mFeedback; }
1557
1558 void handleError(VkResult result,
1559 const char *file,
1560 const char *function,
1561 unsigned int line) override;
1562
1563 private:
1564 // Input to pipeline creation
1565 PipelineCacheAccess mPipelineCache;
1566 const RenderPass *mCompatibleRenderPass;
1567 const PipelineLayout &mPipelineLayout;
1568 const ShaderModuleMap &mShaders;
1569 SpecializationConstants mSpecConsts;
1570 GraphicsPipelineDesc mDesc;
1571
1572 // Results
1573 VkResult mResult;
1574 Pipeline mPipeline;
1575 CacheLookUpFeedback mFeedback;
1576 };
1577
1578 class WaitableMonolithicPipelineCreationTask
1579 {
1580 public:
1581 ~WaitableMonolithicPipelineCreationTask();
1582
setTask(std::shared_ptr<CreateMonolithicPipelineTask> && task)1583 void setTask(std::shared_ptr<CreateMonolithicPipelineTask> &&task) { mTask = std::move(task); }
setRenderPass(const RenderPass * compatibleRenderPass)1584 void setRenderPass(const RenderPass *compatibleRenderPass)
1585 {
1586 mTask->setCompatibleRenderPass(compatibleRenderPass);
1587 }
onSchedule(const std::shared_ptr<angle::WaitableEvent> & waitableEvent)1588 void onSchedule(const std::shared_ptr<angle::WaitableEvent> &waitableEvent)
1589 {
1590 mWaitableEvent = waitableEvent;
1591 }
reset()1592 void reset()
1593 {
1594 mWaitableEvent.reset();
1595 mTask.reset();
1596 }
1597
isValid()1598 bool isValid() const { return mTask.get() != nullptr; }
isPosted()1599 bool isPosted() const { return mWaitableEvent.get() != nullptr; }
isReady()1600 bool isReady() { return mWaitableEvent->isReady(); }
wait()1601 void wait() { return mWaitableEvent->wait(); }
1602
getTask()1603 std::shared_ptr<CreateMonolithicPipelineTask> getTask() const { return mTask; }
1604
1605 private:
1606 std::shared_ptr<angle::WaitableEvent> mWaitableEvent;
1607 std::shared_ptr<CreateMonolithicPipelineTask> mTask;
1608 };
1609
1610 class PipelineHelper final : public Resource
1611 {
1612 public:
1613 PipelineHelper();
1614 ~PipelineHelper() override;
1615 inline explicit PipelineHelper(Pipeline &&pipeline, CacheLookUpFeedback feedback);
1616 PipelineHelper &operator=(PipelineHelper &&other);
1617
1618 void destroy(VkDevice device);
1619 void release(ErrorContext *context);
1620
valid()1621 bool valid() const { return mPipeline.valid(); }
getPipeline()1622 const Pipeline &getPipeline() const { return mPipeline; }
1623
1624 // Get the pipeline. If there is a monolithic pipeline creation task pending, scheduling it is
1625 // attempted. If that task is done, the pipeline is replaced with the results and the old
1626 // pipeline released.
1627 angle::Result getPreferredPipeline(ContextVk *contextVk, const Pipeline **pipelineOut);
1628
findTransition(GraphicsPipelineTransitionBits bits,const GraphicsPipelineDesc & desc,PipelineHelper ** pipelineOut)1629 ANGLE_INLINE bool findTransition(GraphicsPipelineTransitionBits bits,
1630 const GraphicsPipelineDesc &desc,
1631 PipelineHelper **pipelineOut) const
1632 {
1633 // Search could be improved using sorting or hashing.
1634 for (const GraphicsPipelineTransition &transition : mTransitions)
1635 {
1636 if (GraphicsPipelineTransitionMatch(transition.bits, bits, *transition.desc, desc))
1637 {
1638 *pipelineOut = transition.target;
1639 return true;
1640 }
1641 }
1642
1643 return false;
1644 }
1645
1646 void addTransition(GraphicsPipelineTransitionBits bits,
1647 const GraphicsPipelineDesc *desc,
1648 PipelineHelper *pipeline);
1649
getTransitions()1650 const std::vector<GraphicsPipelineTransition> &getTransitions() const { return mTransitions; }
1651
setComputePipeline(Pipeline && pipeline,CacheLookUpFeedback feedback)1652 void setComputePipeline(Pipeline &&pipeline, CacheLookUpFeedback feedback)
1653 {
1654 ASSERT(!mPipeline.valid());
1655 mPipeline = std::move(pipeline);
1656
1657 ASSERT(mCacheLookUpFeedback == CacheLookUpFeedback::None);
1658 mCacheLookUpFeedback = feedback;
1659 }
getCacheLookUpFeedback()1660 CacheLookUpFeedback getCacheLookUpFeedback() const { return mCacheLookUpFeedback; }
1661
1662 void setLinkedLibraryReferences(vk::PipelineHelper *shadersPipeline);
1663
1664 void retainInRenderPass(RenderPassCommandBufferHelper *renderPassCommands);
1665
setMonolithicPipelineCreationTask(std::shared_ptr<CreateMonolithicPipelineTask> && task)1666 void setMonolithicPipelineCreationTask(std::shared_ptr<CreateMonolithicPipelineTask> &&task)
1667 {
1668 mMonolithicPipelineCreationTask.setTask(std::move(task));
1669 }
1670
1671 private:
1672 void reset();
1673
1674 std::vector<GraphicsPipelineTransition> mTransitions;
1675 Pipeline mPipeline;
1676 CacheLookUpFeedback mCacheLookUpFeedback = CacheLookUpFeedback::None;
1677 CacheLookUpFeedback mMonolithicCacheLookUpFeedback = CacheLookUpFeedback::None;
1678
1679 // The list of pipeline helpers that were referenced when creating a linked pipeline. These
1680 // pipelines must be kept alive, so their serial is updated at the same time as this object.
1681 // The shaders pipeline is the only library so far.
1682 PipelineHelper *mLinkedShaders = nullptr;
1683
1684 // If pipeline libraries are used and monolithic pipelines are created in parallel, this is the
1685 // temporary library created (previously in |mPipeline|) that is now replaced by the monolithic
1686 // one. It is not immediately garbage collected when replaced, because there is currently a bug
1687 // with that. http://anglebug.com/42266335
1688 Pipeline mLinkedPipelineToRelease;
1689
1690 // An async task to create a monolithic pipeline. Only used if the pipeline was originally
1691 // created as a linked library. The |getPipeline()| call will attempt to schedule this task
1692 // through the share group, which manages and paces these tasks. Once the task results are
1693 // ready, |mPipeline| is released and replaced by the result of this task.
1694 WaitableMonolithicPipelineCreationTask mMonolithicPipelineCreationTask;
1695 };
1696
1697 class FramebufferHelper : public Resource
1698 {
1699 public:
1700 FramebufferHelper();
1701 ~FramebufferHelper() override;
1702
1703 FramebufferHelper(FramebufferHelper &&other);
1704 FramebufferHelper &operator=(FramebufferHelper &&other);
1705
1706 angle::Result init(ErrorContext *context, const VkFramebufferCreateInfo &createInfo);
1707 void destroy(Renderer *renderer);
1708 void release(ContextVk *contextVk);
1709
valid()1710 bool valid() { return mFramebuffer.valid(); }
1711
getFramebuffer()1712 const Framebuffer &getFramebuffer() const
1713 {
1714 ASSERT(mFramebuffer.valid());
1715 return mFramebuffer;
1716 }
1717
getFramebuffer()1718 Framebuffer &getFramebuffer()
1719 {
1720 ASSERT(mFramebuffer.valid());
1721 return mFramebuffer;
1722 }
1723
1724 private:
1725 // Vulkan object.
1726 Framebuffer mFramebuffer;
1727 };
1728
PipelineHelper(Pipeline && pipeline,CacheLookUpFeedback feedback)1729 ANGLE_INLINE PipelineHelper::PipelineHelper(Pipeline &&pipeline, CacheLookUpFeedback feedback)
1730 : mPipeline(std::move(pipeline)), mCacheLookUpFeedback(feedback)
1731 {}
1732
1733 ANGLE_INLINE PipelineHelper &PipelineHelper::operator=(PipelineHelper &&other)
1734 {
1735 ASSERT(!mPipeline.valid());
1736
1737 std::swap(mPipeline, other.mPipeline);
1738 mCacheLookUpFeedback = other.mCacheLookUpFeedback;
1739
1740 return *this;
1741 }
1742
1743 struct ImageSubresourceRange
1744 {
1745 // GL max is 1000 (fits in 10 bits).
1746 uint32_t level : 10;
1747 // Max 31 levels (2 ** 5 - 1). Can store levelCount-1 if we need to save another bit.
1748 uint32_t levelCount : 5;
1749 // Implementation max is 4096 (12 bits).
1750 uint32_t layer : 12;
1751 // One of vk::LayerMode values. If 0, it means all layers. Otherwise it's the count of layers
1752 // which is usually 1, except for multiview in which case it can be up to
1753 // gl::IMPLEMENTATION_MAX_2D_ARRAY_TEXTURE_LAYERS.
1754 uint32_t layerMode : 3;
1755 // For reads: Values are either ImageViewColorspace::Linear or ImageViewColorspace::SRGB
1756 uint32_t readColorspace : 1;
1757 // For writes: Values are either ImageViewColorspace::Linear or ImageViewColorspace::SRGB
1758 uint32_t writeColorspace : 1;
1759
1760 static_assert(gl::IMPLEMENTATION_MAX_TEXTURE_LEVELS < (1 << 5),
1761 "Not enough bits for level count");
1762 static_assert(gl::IMPLEMENTATION_MAX_2D_ARRAY_TEXTURE_LAYERS <= (1 << 12),
1763 "Not enough bits for layer index");
1764 static_assert(gl::IMPLEMENTATION_ANGLE_MULTIVIEW_MAX_VIEWS <= (1 << 3),
1765 "Not enough bits for layer count");
1766 };
1767
1768 static_assert(sizeof(ImageSubresourceRange) == sizeof(uint32_t), "Size mismatch");
1769
1770 inline bool operator==(const ImageSubresourceRange &a, const ImageSubresourceRange &b)
1771 {
1772 return a.level == b.level && a.levelCount == b.levelCount && a.layer == b.layer &&
1773 a.layerMode == b.layerMode && a.readColorspace == b.readColorspace &&
1774 a.writeColorspace == b.writeColorspace;
1775 }
1776
1777 constexpr ImageSubresourceRange kInvalidImageSubresourceRange = {0, 0, 0, 0, 0, 0};
1778
1779 struct ImageOrBufferViewSubresourceSerial
1780 {
1781 ImageOrBufferViewSerial viewSerial;
1782 ImageSubresourceRange subresource;
1783 };
1784
1785 inline bool operator==(const ImageOrBufferViewSubresourceSerial &a,
1786 const ImageOrBufferViewSubresourceSerial &b)
1787 {
1788 return a.viewSerial == b.viewSerial && a.subresource == b.subresource;
1789 }
1790
1791 constexpr ImageOrBufferViewSubresourceSerial kInvalidImageOrBufferViewSubresourceSerial = {
1792 kInvalidImageOrBufferViewSerial, kInvalidImageSubresourceRange};
1793
1794 // Always starts with array element zero, with descriptorCount descriptors.
1795 struct WriteDescriptorDesc
1796 {
1797 uint8_t binding; // Redundant: determined by the containing WriteDesc array.
1798 uint8_t descriptorCount; // Number of array elements in this descriptor write.
1799 uint8_t descriptorType; // Packed VkDescriptorType.
1800 uint8_t descriptorInfoIndex; // Base index into an array of DescriptorInfoDescs.
1801 };
1802
1803 static_assert(sizeof(WriteDescriptorDesc) == 4, "Size mismatch");
1804
1805 struct DescriptorInfoDesc
1806 {
1807 uint32_t samplerOrBufferSerial;
1808 uint32_t imageViewSerialOrOffset;
1809 uint32_t imageLayoutOrRange;
1810 uint32_t imageSubresourceRange;
1811 };
1812
1813 static_assert(sizeof(DescriptorInfoDesc) == 16, "Size mismatch");
1814
1815 // Generic description of a descriptor set. Used as a key when indexing descriptor set caches. The
1816 // key storage is an angle:FixedVector. Beyond a certain fixed size we'll end up using heap memory
1817 // to store keys. Currently we specialize the structure for three use cases: uniforms, textures,
1818 // and other shader resources. Because of the way the specialization works we can't currently cache
1819 // programs that use some types of resources.
1820 static constexpr size_t kFastDescriptorSetDescLimit = 8;
1821
1822 struct DescriptorDescHandles
1823 {
1824 VkBuffer buffer;
1825 VkSampler sampler;
1826 VkImageView imageView;
1827 VkBufferView bufferView;
1828 };
1829
1830 class WriteDescriptorDescs
1831 {
1832 public:
reset()1833 void reset()
1834 {
1835 mDescs.clear();
1836 mDynamicDescriptorSetCount = 0;
1837 mCurrentInfoIndex = 0;
1838 }
1839
1840 void updateShaderBuffers(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1841 const std::vector<gl::InterfaceBlock> &blocks,
1842 VkDescriptorType descriptorType);
1843
1844 void updateAtomicCounters(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1845 const std::vector<gl::AtomicCounterBuffer> &atomicCounterBuffers);
1846
1847 void updateImages(const gl::ProgramExecutable &executable,
1848 const ShaderInterfaceVariableInfoMap &variableInfoMap);
1849
1850 void updateInputAttachments(const gl::ProgramExecutable &executable,
1851 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1852 FramebufferVk *framebufferVk);
1853
1854 void updateExecutableActiveTextures(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1855 const gl::ProgramExecutable &executable);
1856
1857 void updateDefaultUniform(gl::ShaderBitSet shaderTypes,
1858 const ShaderInterfaceVariableInfoMap &variableInfoMap,
1859 const gl::ProgramExecutable &executable);
1860
1861 void updateTransformFeedbackWrite(const ShaderInterfaceVariableInfoMap &variableInfoMap,
1862 const gl::ProgramExecutable &executable);
1863
1864 void updateDynamicDescriptorsCount();
1865
size()1866 size_t size() const { return mDescs.size(); }
empty()1867 bool empty() const { return mDescs.size() == 0; }
1868
1869 const WriteDescriptorDesc &operator[](uint32_t bindingIndex) const
1870 {
1871 return mDescs[bindingIndex];
1872 }
1873
getTotalDescriptorCount()1874 size_t getTotalDescriptorCount() const { return mCurrentInfoIndex; }
getDynamicDescriptorSetCount()1875 size_t getDynamicDescriptorSetCount() const { return mDynamicDescriptorSetCount; }
1876
1877 private:
hasWriteDescAtIndex(uint32_t bindingIndex)1878 bool hasWriteDescAtIndex(uint32_t bindingIndex) const
1879 {
1880 return bindingIndex < mDescs.size() && mDescs[bindingIndex].descriptorCount > 0;
1881 }
1882
incrementDescriptorCount(uint32_t bindingIndex,uint32_t count)1883 void incrementDescriptorCount(uint32_t bindingIndex, uint32_t count)
1884 {
1885 // Validate we have no subsequent writes.
1886 ASSERT(hasWriteDescAtIndex(bindingIndex));
1887 mDescs[bindingIndex].descriptorCount += count;
1888 }
1889
1890 void updateWriteDesc(uint32_t bindingIndex,
1891 VkDescriptorType descriptorType,
1892 uint32_t descriptorCount);
1893
1894 // After a preliminary minimum size, use heap memory.
1895 angle::FastMap<WriteDescriptorDesc, kFastDescriptorSetDescLimit> mDescs;
1896 size_t mDynamicDescriptorSetCount = 0;
1897 uint32_t mCurrentInfoIndex = 0;
1898 };
1899 std::ostream &operator<<(std::ostream &os, const WriteDescriptorDescs &desc);
1900
1901 class DescriptorSetDesc
1902 {
1903 public:
1904 DescriptorSetDesc() = default;
1905 ~DescriptorSetDesc() = default;
1906
DescriptorSetDesc(const DescriptorSetDesc & other)1907 DescriptorSetDesc(const DescriptorSetDesc &other) : mDescriptorInfos(other.mDescriptorInfos) {}
1908
1909 DescriptorSetDesc &operator=(const DescriptorSetDesc &other)
1910 {
1911 mDescriptorInfos = other.mDescriptorInfos;
1912 return *this;
1913 }
1914
1915 size_t hash() const;
1916
size()1917 size_t size() const { return mDescriptorInfos.size(); }
resize(size_t count)1918 void resize(size_t count) { mDescriptorInfos.resize(count); }
1919
getKeySizeBytes()1920 size_t getKeySizeBytes() const { return mDescriptorInfos.size() * sizeof(DescriptorInfoDesc); }
1921
1922 bool operator==(const DescriptorSetDesc &other) const
1923 {
1924 return mDescriptorInfos.size() == other.mDescriptorInfos.size() &&
1925 memcmp(mDescriptorInfos.data(), other.mDescriptorInfos.data(),
1926 mDescriptorInfos.size() * sizeof(DescriptorInfoDesc)) == 0;
1927 }
1928
getInfoDesc(uint32_t infoDescIndex)1929 DescriptorInfoDesc &getInfoDesc(uint32_t infoDescIndex)
1930 {
1931 return mDescriptorInfos[infoDescIndex];
1932 }
1933
getInfoDesc(uint32_t infoDescIndex)1934 const DescriptorInfoDesc &getInfoDesc(uint32_t infoDescIndex) const
1935 {
1936 return mDescriptorInfos[infoDescIndex];
1937 }
1938
1939 void updateDescriptorSet(Renderer *renderer,
1940 const WriteDescriptorDescs &writeDescriptorDescs,
1941 UpdateDescriptorSetsBuilder *updateBuilder,
1942 const DescriptorDescHandles *handles,
1943 VkDescriptorSet descriptorSet) const;
1944
1945 private:
1946 // After a preliminary minimum size, use heap memory.
1947 angle::FastVector<DescriptorInfoDesc, kFastDescriptorSetDescLimit> mDescriptorInfos;
1948 };
1949 std::ostream &operator<<(std::ostream &os, const DescriptorSetDesc &desc);
1950
1951 class DescriptorPoolHelper;
1952
1953 // SharedDescriptorSetCacheKey.
1954 // Because DescriptorSet must associate with a pool, we need to define a structure that wraps both.
1955 class DescriptorSetDescAndPool final
1956 {
1957 public:
DescriptorSetDescAndPool()1958 DescriptorSetDescAndPool() : mPool(nullptr) {}
DescriptorSetDescAndPool(const DescriptorSetDesc & desc,DynamicDescriptorPool * pool)1959 DescriptorSetDescAndPool(const DescriptorSetDesc &desc, DynamicDescriptorPool *pool)
1960 : mDesc(desc), mPool(pool)
1961 {}
DescriptorSetDescAndPool(DescriptorSetDescAndPool && other)1962 DescriptorSetDescAndPool(DescriptorSetDescAndPool &&other)
1963 : mDesc(other.mDesc), mPool(other.mPool)
1964 {
1965 other.mPool = nullptr;
1966 }
~DescriptorSetDescAndPool()1967 ~DescriptorSetDescAndPool() { ASSERT(!valid()); }
destroy(VkDevice)1968 void destroy(VkDevice /*device*/) { mPool = nullptr; }
1969
1970 void destroyCachedObject(Renderer *renderer);
releaseCachedObject(ContextVk * contextVk)1971 void releaseCachedObject(ContextVk *contextVk) { UNREACHABLE(); }
1972 void releaseCachedObject(Renderer *renderer);
valid()1973 bool valid() const { return mPool != nullptr; }
getDesc()1974 const DescriptorSetDesc &getDesc() const
1975 {
1976 ASSERT(valid());
1977 return mDesc;
1978 }
1979 bool operator==(const DescriptorSetDescAndPool &other) const
1980 {
1981 return mDesc == other.mDesc && mPool == other.mPool;
1982 }
1983
1984 // Returns true if the key/value can be found in the cache.
1985 bool hasValidCachedObject(ContextVk *contextVk) const;
1986
1987 private:
1988 DescriptorSetDesc mDesc;
1989 DynamicDescriptorPool *mPool;
1990 };
1991 using SharedDescriptorSetCacheKey = SharedPtr<DescriptorSetDescAndPool>;
1992 ANGLE_INLINE const SharedDescriptorSetCacheKey
CreateSharedDescriptorSetCacheKey(const DescriptorSetDesc & desc,DynamicDescriptorPool * pool)1993 CreateSharedDescriptorSetCacheKey(const DescriptorSetDesc &desc, DynamicDescriptorPool *pool)
1994 {
1995 return SharedDescriptorSetCacheKey::MakeShared(VK_NULL_HANDLE, desc, pool);
1996 }
1997
1998 constexpr VkDescriptorType kStorageBufferDescriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1999
2000 // Manages a descriptor set desc with a few helper routines and also stores object handles.
2001 class DescriptorSetDescBuilder final
2002 {
2003 public:
2004 DescriptorSetDescBuilder();
2005 DescriptorSetDescBuilder(size_t descriptorCount);
2006 ~DescriptorSetDescBuilder();
2007
2008 DescriptorSetDescBuilder(const DescriptorSetDescBuilder &other);
2009 DescriptorSetDescBuilder &operator=(const DescriptorSetDescBuilder &other);
2010
getDesc()2011 const DescriptorSetDesc &getDesc() const { return mDesc; }
2012
resize(size_t descriptorCount)2013 void resize(size_t descriptorCount)
2014 {
2015 mDesc.resize(descriptorCount);
2016 mHandles.resize(descriptorCount);
2017 mDynamicOffsets.resize(descriptorCount);
2018 }
2019
2020 // Specific helpers for uniforms/xfb descriptors.
2021 void updateUniformBuffer(uint32_t shaderIndex,
2022 const WriteDescriptorDescs &writeDescriptorDescs,
2023 const BufferHelper &bufferHelper,
2024 VkDeviceSize bufferRange);
2025
2026 void updateTransformFeedbackBuffer(const Context *context,
2027 const ShaderInterfaceVariableInfoMap &variableInfoMap,
2028 const WriteDescriptorDescs &writeDescriptorDescs,
2029 uint32_t xfbBufferIndex,
2030 const BufferHelper &bufferHelper,
2031 VkDeviceSize bufferOffset,
2032 VkDeviceSize bufferRange);
2033
2034 void updateUniformsAndXfb(Context *context,
2035 const gl::ProgramExecutable &executable,
2036 const WriteDescriptorDescs &writeDescriptorDescs,
2037 const BufferHelper *currentUniformBuffer,
2038 const BufferHelper &emptyBuffer,
2039 bool activeUnpaused,
2040 TransformFeedbackVk *transformFeedbackVk);
2041
2042 // Specific helpers for shader resource descriptors.
2043 template <typename CommandBufferT>
2044 void updateOneShaderBuffer(Context *context,
2045 CommandBufferT *commandBufferHelper,
2046 const ShaderInterfaceVariableInfoMap &variableInfoMap,
2047 const gl::BufferVector &buffers,
2048 const gl::InterfaceBlock &block,
2049 uint32_t bufferIndex,
2050 VkDescriptorType descriptorType,
2051 VkDeviceSize maxBoundBufferRange,
2052 const BufferHelper &emptyBuffer,
2053 const WriteDescriptorDescs &writeDescriptorDescs,
2054 const GLbitfield memoryBarrierBits);
2055 template <typename CommandBufferT>
2056 void updateShaderBuffers(Context *context,
2057 CommandBufferT *commandBufferHelper,
2058 const gl::ProgramExecutable &executable,
2059 const ShaderInterfaceVariableInfoMap &variableInfoMap,
2060 const gl::BufferVector &buffers,
2061 const std::vector<gl::InterfaceBlock> &blocks,
2062 VkDescriptorType descriptorType,
2063 VkDeviceSize maxBoundBufferRange,
2064 const BufferHelper &emptyBuffer,
2065 const WriteDescriptorDescs &writeDescriptorDescs,
2066 const GLbitfield memoryBarrierBits);
2067 template <typename CommandBufferT>
2068 void updateAtomicCounters(Context *context,
2069 CommandBufferT *commandBufferHelper,
2070 const gl::ProgramExecutable &executable,
2071 const ShaderInterfaceVariableInfoMap &variableInfoMap,
2072 const gl::BufferVector &buffers,
2073 const std::vector<gl::AtomicCounterBuffer> &atomicCounterBuffers,
2074 const VkDeviceSize requiredOffsetAlignment,
2075 const BufferHelper &emptyBuffer,
2076 const WriteDescriptorDescs &writeDescriptorDescs);
2077 angle::Result updateImages(Context *context,
2078 const gl::ProgramExecutable &executable,
2079 const ShaderInterfaceVariableInfoMap &variableInfoMap,
2080 const gl::ActiveTextureArray<TextureVk *> &activeImages,
2081 const std::vector<gl::ImageUnit> &imageUnits,
2082 const WriteDescriptorDescs &writeDescriptorDescs);
2083 angle::Result updateInputAttachments(vk::Context *context,
2084 const gl::ProgramExecutable &executable,
2085 const ShaderInterfaceVariableInfoMap &variableInfoMap,
2086 FramebufferVk *framebufferVk,
2087 const WriteDescriptorDescs &writeDescriptorDescs);
2088
2089 // Specialized update for textures.
2090 void updatePreCacheActiveTextures(Context *context,
2091 const gl::ProgramExecutable &executable,
2092 const gl::ActiveTextureArray<TextureVk *> &textures,
2093 const gl::SamplerBindingVector &samplers);
2094
2095 void updateDescriptorSet(Renderer *renderer,
2096 const WriteDescriptorDescs &writeDescriptorDescs,
2097 UpdateDescriptorSetsBuilder *updateBuilder,
2098 VkDescriptorSet descriptorSet) const;
2099
getDynamicOffsets()2100 const uint32_t *getDynamicOffsets() const { return mDynamicOffsets.data(); }
getDynamicOffsetsSize()2101 size_t getDynamicOffsetsSize() const { return mDynamicOffsets.size(); }
2102
2103 private:
2104 void updateInputAttachment(Context *context,
2105 uint32_t binding,
2106 VkImageLayout layout,
2107 const vk::ImageView *imageView,
2108 ImageOrBufferViewSubresourceSerial serial,
2109 const WriteDescriptorDescs &writeDescriptorDescs);
2110
2111 void setEmptyBuffer(uint32_t infoDescIndex,
2112 VkDescriptorType descriptorType,
2113 const BufferHelper &emptyBuffer);
2114
2115 DescriptorSetDesc mDesc;
2116 angle::FastVector<DescriptorDescHandles, kFastDescriptorSetDescLimit> mHandles;
2117 angle::FastVector<uint32_t, kFastDescriptorSetDescLimit> mDynamicOffsets;
2118 };
2119
2120 // In the FramebufferDesc object:
2121 // - Depth/stencil serial is at index 0
2122 // - Color serials are at indices [1, gl::IMPLEMENTATION_MAX_DRAW_BUFFERS]
2123 // - Depth/stencil resolve attachment is at index gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+1
2124 // - Resolve attachments are at indices [gl::IMPLEMENTATION_MAX_DRAW_BUFFERS+2,
2125 // gl::IMPLEMENTATION_MAX_DRAW_BUFFERS*2+1]
2126 // Fragment shading rate attachment serial is at index
2127 // (gl::IMPLEMENTATION_MAX_DRAW_BUFFERS*2+1)+1
2128 constexpr size_t kFramebufferDescDepthStencilIndex = 0;
2129 constexpr size_t kFramebufferDescColorIndexOffset = kFramebufferDescDepthStencilIndex + 1;
2130 constexpr size_t kFramebufferDescDepthStencilResolveIndexOffset =
2131 kFramebufferDescColorIndexOffset + gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
2132 constexpr size_t kFramebufferDescColorResolveIndexOffset =
2133 kFramebufferDescDepthStencilResolveIndexOffset + 1;
2134 constexpr size_t kFramebufferDescFragmentShadingRateAttachmentIndexOffset =
2135 kFramebufferDescColorResolveIndexOffset + gl::IMPLEMENTATION_MAX_DRAW_BUFFERS;
2136
2137 // Enable struct padding warnings for the code below since it is used in caches.
2138 ANGLE_ENABLE_STRUCT_PADDING_WARNINGS
2139
2140 class FramebufferDesc
2141 {
2142 public:
2143 FramebufferDesc();
2144 ~FramebufferDesc();
2145
2146 FramebufferDesc(const FramebufferDesc &other);
2147 FramebufferDesc &operator=(const FramebufferDesc &other);
2148
2149 void updateColor(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
2150 void updateColorResolve(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
2151 void updateUnresolveMask(FramebufferNonResolveAttachmentMask unresolveMask);
2152 void updateDepthStencil(ImageOrBufferViewSubresourceSerial serial);
2153 void updateDepthStencilResolve(ImageOrBufferViewSubresourceSerial serial);
setWriteControlMode(gl::SrgbWriteControlMode mode)2154 ANGLE_INLINE void setWriteControlMode(gl::SrgbWriteControlMode mode)
2155 {
2156 mSrgbWriteControlMode = static_cast<uint16_t>(mode);
2157 }
updateIsMultiview(bool isMultiview)2158 void updateIsMultiview(bool isMultiview) { mIsMultiview = isMultiview; }
2159 size_t hash() const;
2160
2161 bool operator==(const FramebufferDesc &other) const;
2162
2163 uint32_t attachmentCount() const;
2164
getColorImageViewSerial(uint32_t index)2165 ImageOrBufferViewSubresourceSerial getColorImageViewSerial(uint32_t index)
2166 {
2167 ASSERT(kFramebufferDescColorIndexOffset + index < mSerials.size());
2168 return mSerials[kFramebufferDescColorIndexOffset + index];
2169 }
2170
2171 FramebufferNonResolveAttachmentMask getUnresolveAttachmentMask() const;
getWriteControlMode()2172 ANGLE_INLINE gl::SrgbWriteControlMode getWriteControlMode() const
2173 {
2174 return (mSrgbWriteControlMode == 1) ? gl::SrgbWriteControlMode::Linear
2175 : gl::SrgbWriteControlMode::Default;
2176 }
2177
2178 void updateLayerCount(uint32_t layerCount);
getLayerCount()2179 uint32_t getLayerCount() const { return mLayerCount; }
2180 void setColorFramebufferFetchMode(bool hasColorFramebufferFetch);
hasColorFramebufferFetch()2181 bool hasColorFramebufferFetch() const { return mHasColorFramebufferFetch; }
2182
isMultiview()2183 bool isMultiview() const { return mIsMultiview; }
2184
2185 void updateRenderToTexture(bool isRenderToTexture);
2186
2187 void updateFragmentShadingRate(ImageOrBufferViewSubresourceSerial serial);
2188 bool hasFragmentShadingRateAttachment() const;
2189
2190 // Used by SharedFramebufferCacheKey
destroy(VkDevice)2191 void destroy(VkDevice /*device*/) { SetBitField(mIsValid, 0); }
2192 void destroyCachedObject(Renderer *renderer);
releaseCachedObject(Renderer * renderer)2193 void releaseCachedObject(Renderer *renderer) { UNREACHABLE(); }
2194 void releaseCachedObject(ContextVk *contextVk);
valid()2195 bool valid() const { return mIsValid; }
2196 bool hasValidCachedObject(ContextVk *contextVk) const;
2197
2198 private:
2199 void reset();
2200 void update(uint32_t index, ImageOrBufferViewSubresourceSerial serial);
2201
2202 // Note: this is an exclusive index. If there is one index it will be "1".
2203 // Maximum value is 18
2204 uint16_t mMaxIndex : 5;
2205
2206 // Whether the render pass has input attachments or not.
2207 // Note that depth/stencil framebuffer fetch is only implemented for dynamic rendering, and so
2208 // does not interact with this class.
2209 uint16_t mHasColorFramebufferFetch : 1;
2210 static_assert(gl::IMPLEMENTATION_MAX_FRAMEBUFFER_LAYERS < (1 << 9) - 1,
2211 "Not enough bits for mLayerCount");
2212
2213 uint16_t mLayerCount : 9;
2214
2215 uint16_t mSrgbWriteControlMode : 1;
2216
2217 // If the render pass contains an initial subpass to unresolve a number of attachments, the
2218 // subpass description is derived from the following mask, specifying which attachments need
2219 // to be unresolved. Includes both color and depth/stencil attachments.
2220 uint16_t mUnresolveAttachmentMask : kMaxFramebufferNonResolveAttachments;
2221
2222 // Whether this is a multisampled-render-to-single-sampled framebuffer. Only used when using
2223 // VK_EXT_multisampled_render_to_single_sampled. Only one bit is used and the rest is padding.
2224 uint16_t mIsRenderToTexture : 14 - kMaxFramebufferNonResolveAttachments;
2225
2226 uint16_t mIsMultiview : 1;
2227 // Used by SharedFramebufferCacheKey to indicate if this cache key is valid or not.
2228 uint16_t mIsValid : 1;
2229
2230 FramebufferAttachmentArray<ImageOrBufferViewSubresourceSerial> mSerials;
2231 };
2232
2233 constexpr size_t kFramebufferDescSize = sizeof(FramebufferDesc);
2234 static_assert(kFramebufferDescSize == 156, "Size check failed");
2235
2236 // Disable warnings about struct padding.
2237 ANGLE_DISABLE_STRUCT_PADDING_WARNINGS
2238
2239 using SharedFramebufferCacheKey = SharedPtr<FramebufferDesc>;
2240 ANGLE_INLINE const SharedFramebufferCacheKey
CreateSharedFramebufferCacheKey(const FramebufferDesc & desc)2241 CreateSharedFramebufferCacheKey(const FramebufferDesc &desc)
2242 {
2243 return SharedFramebufferCacheKey::MakeShared(VK_NULL_HANDLE, desc);
2244 }
2245
2246 // The SamplerHelper allows a Sampler to be coupled with a serial.
2247 // Must be included before we declare SamplerCache.
2248 class SamplerHelper final : angle::NonCopyable
2249 {
2250 public:
2251 SamplerHelper() = default;
~SamplerHelper()2252 ~SamplerHelper() { ASSERT(!valid()); }
2253
2254 explicit SamplerHelper(SamplerHelper &&samplerHelper);
2255 SamplerHelper &operator=(SamplerHelper &&rhs);
2256
2257 angle::Result init(ErrorContext *context, const VkSamplerCreateInfo &createInfo);
2258 angle::Result init(ContextVk *contextVk, const SamplerDesc &desc);
destroy(VkDevice device)2259 void destroy(VkDevice device) { mSampler.destroy(device); }
destroy()2260 void destroy() { ASSERT(!valid()); }
valid()2261 bool valid() const { return mSampler.valid(); }
get()2262 const Sampler &get() const { return mSampler; }
getSamplerSerial()2263 SamplerSerial getSamplerSerial() const { return mSamplerSerial; }
2264
2265 private:
2266 Sampler mSampler;
2267 SamplerSerial mSamplerSerial;
2268 };
2269
2270 using SharedSamplerPtr = SharedPtr<SamplerHelper>;
2271
2272 class RenderPassHelper final : angle::NonCopyable
2273 {
2274 public:
2275 RenderPassHelper();
2276 ~RenderPassHelper();
2277
2278 RenderPassHelper(RenderPassHelper &&other);
2279 RenderPassHelper &operator=(RenderPassHelper &&other);
2280
2281 void destroy(VkDevice device);
2282 void release(ContextVk *contextVk);
2283
2284 const RenderPass &getRenderPass() const;
2285 RenderPass &getRenderPass();
2286
2287 const RenderPassPerfCounters &getPerfCounters() const;
2288 RenderPassPerfCounters &getPerfCounters();
2289
2290 private:
2291 RenderPass mRenderPass;
2292 RenderPassPerfCounters mPerfCounters;
2293 };
2294
2295 // Helper class manages the lifetime of various cache objects so that the cache entry can be
2296 // destroyed when one of the components becomes invalid.
2297 template <class SharedCacheKeyT>
2298 class SharedCacheKeyManager
2299 {
2300 public:
2301 SharedCacheKeyManager() = default;
~SharedCacheKeyManager()2302 ~SharedCacheKeyManager() { ASSERT(empty()); }
2303 // Store the pointer to the cache key and retains it
2304 void addKey(const SharedCacheKeyT &key);
2305 // Iterate over the descriptor array and release the descriptor and cache.
2306 void releaseKeys(ContextVk *contextVk);
2307 void releaseKeys(Renderer *renderer);
2308 // Iterate over the descriptor array and destroy the descriptor and cache.
2309 void destroyKeys(Renderer *renderer);
2310 void clear();
2311
2312 // The following APIs are expected to be used for assertion only
empty()2313 bool empty() const { return mSharedCacheKeys.empty(); }
2314 bool allValidEntriesAreCached(ContextVk *contextVk) const;
2315
2316 private:
2317 size_t updateEmptySlotBits();
2318 void addKeyImpl(const SharedCacheKeyT &key);
2319
2320 bool containsKeyWithOwnerEqual(const SharedCacheKeyT &key) const;
2321 void assertAllEntriesDestroyed() const;
2322
2323 // Tracks an array of cache keys with refcounting. Note this owns one refcount of
2324 // SharedCacheKeyT object.
2325 std::deque<SharedCacheKeyT> mSharedCacheKeys;
2326 SharedCacheKeyT mLastAddedSharedCacheKey;
2327
2328 // To speed up searching for available slot in the mSharedCacheKeys, we use bitset to track
2329 // available (i.e, empty) slot
2330 static constexpr size_t kInvalidSlot = -1;
2331 static constexpr size_t kSlotBitCount = 64;
2332 using SlotBitMask = angle::BitSet64<kSlotBitCount>;
2333 std::vector<SlotBitMask> mEmptySlotBits;
2334 };
2335
2336 using FramebufferCacheManager = SharedCacheKeyManager<SharedFramebufferCacheKey>;
2337 template <>
2338 void FramebufferCacheManager::addKey(const SharedFramebufferCacheKey &key);
2339
2340 using DescriptorSetCacheManager = SharedCacheKeyManager<SharedDescriptorSetCacheKey>;
2341 template <>
2342 void DescriptorSetCacheManager::addKey(const SharedDescriptorSetCacheKey &key);
2343 } // namespace vk
2344 } // namespace rx
2345
2346 // Introduce std::hash for the above classes.
2347 namespace std
2348 {
2349 template <>
2350 struct hash<rx::vk::RenderPassDesc>
2351 {
2352 size_t operator()(const rx::vk::RenderPassDesc &key) const { return key.hash(); }
2353 };
2354
2355 template <>
2356 struct hash<rx::vk::AttachmentOpsArray>
2357 {
2358 size_t operator()(const rx::vk::AttachmentOpsArray &key) const { return key.hash(); }
2359 };
2360
2361 template <>
2362 struct hash<rx::vk::DescriptorSetLayoutDesc>
2363 {
2364 size_t operator()(const rx::vk::DescriptorSetLayoutDesc &key) const { return key.hash(); }
2365 };
2366
2367 template <>
2368 struct hash<rx::vk::PipelineLayoutDesc>
2369 {
2370 size_t operator()(const rx::vk::PipelineLayoutDesc &key) const { return key.hash(); }
2371 };
2372
2373 template <>
2374 struct hash<rx::vk::ImageSubresourceRange>
2375 {
2376 size_t operator()(const rx::vk::ImageSubresourceRange &key) const
2377 {
2378 return *reinterpret_cast<const uint32_t *>(&key);
2379 }
2380 };
2381
2382 template <>
2383 struct hash<rx::vk::DescriptorSetDesc>
2384 {
2385 size_t operator()(const rx::vk::DescriptorSetDesc &key) const { return key.hash(); }
2386 };
2387
2388 template <>
2389 struct hash<rx::vk::FramebufferDesc>
2390 {
2391 size_t operator()(const rx::vk::FramebufferDesc &key) const { return key.hash(); }
2392 };
2393
2394 template <>
2395 struct hash<rx::vk::YcbcrConversionDesc>
2396 {
2397 size_t operator()(const rx::vk::YcbcrConversionDesc &key) const { return key.hash(); }
2398 };
2399
2400 template <>
2401 struct hash<rx::vk::SamplerDesc>
2402 {
2403 size_t operator()(const rx::vk::SamplerDesc &key) const { return key.hash(); }
2404 };
2405
2406 // See Resource Serial types defined in vk_utils.h.
2407 #define ANGLE_HASH_VK_SERIAL(Type) \
2408 template <> \
2409 struct hash<rx::vk::Type##Serial> \
2410 { \
2411 size_t operator()(const rx::vk::Type##Serial &key) const \
2412 { \
2413 return key.getValue(); \
2414 } \
2415 };
2416
2417 ANGLE_VK_SERIAL_OP(ANGLE_HASH_VK_SERIAL)
2418
2419 } // namespace std
2420
2421 namespace rx
2422 {
2423 // Cache types for various Vulkan objects
2424 enum class VulkanCacheType
2425 {
2426 CompatibleRenderPass,
2427 RenderPassWithOps,
2428 GraphicsPipeline,
2429 ComputePipeline,
2430 PipelineLayout,
2431 Sampler,
2432 SamplerYcbcrConversion,
2433 DescriptorSetLayout,
2434 DriverUniformsDescriptors,
2435 TextureDescriptors,
2436 UniformsAndXfbDescriptors,
2437 ShaderResourcesDescriptors,
2438 Framebuffer,
2439 DescriptorMetaCache,
2440 EnumCount
2441 };
2442
2443 // Base class for all caches. Provides cache hit and miss counters.
2444 class CacheStats final : angle::NonCopyable
2445 {
2446 public:
2447 CacheStats() { reset(); }
2448 ~CacheStats() {}
2449
2450 CacheStats(const CacheStats &rhs)
2451 : mHitCount(rhs.mHitCount), mMissCount(rhs.mMissCount), mSize(rhs.mSize)
2452 {}
2453
2454 CacheStats &operator=(const CacheStats &rhs)
2455 {
2456 mHitCount = rhs.mHitCount;
2457 mMissCount = rhs.mMissCount;
2458 mSize = rhs.mSize;
2459 return *this;
2460 }
2461
2462 ANGLE_INLINE void hit() { mHitCount++; }
2463 ANGLE_INLINE void miss() { mMissCount++; }
2464 ANGLE_INLINE void incrementSize() { mSize++; }
2465 ANGLE_INLINE void decrementSize() { mSize--; }
2466 ANGLE_INLINE void missAndIncrementSize()
2467 {
2468 mMissCount++;
2469 mSize++;
2470 }
2471 ANGLE_INLINE void accumulate(const CacheStats &stats)
2472 {
2473 mHitCount += stats.mHitCount;
2474 mMissCount += stats.mMissCount;
2475 mSize += stats.mSize;
2476 }
2477
2478 uint32_t getHitCount() const { return mHitCount; }
2479 uint32_t getMissCount() const { return mMissCount; }
2480
2481 ANGLE_INLINE double getHitRatio() const
2482 {
2483 if (mHitCount + mMissCount == 0)
2484 {
2485 return 0;
2486 }
2487 else
2488 {
2489 return static_cast<double>(mHitCount) / (mHitCount + mMissCount);
2490 }
2491 }
2492
2493 ANGLE_INLINE uint32_t getSize() const { return mSize; }
2494 ANGLE_INLINE void setSize(uint32_t size) { mSize = size; }
2495
2496 void reset()
2497 {
2498 mHitCount = 0;
2499 mMissCount = 0;
2500 mSize = 0;
2501 }
2502
2503 void resetHitAndMissCount()
2504 {
2505 mHitCount = 0;
2506 mMissCount = 0;
2507 }
2508
2509 void accumulateCacheStats(VulkanCacheType cacheType, const CacheStats &cacheStats)
2510 {
2511 mHitCount += cacheStats.getHitCount();
2512 mMissCount += cacheStats.getMissCount();
2513 }
2514
2515 private:
2516 uint32_t mHitCount;
2517 uint32_t mMissCount;
2518 uint32_t mSize;
2519 };
2520
2521 template <VulkanCacheType CacheType>
2522 class HasCacheStats : angle::NonCopyable
2523 {
2524 public:
2525 template <typename Accumulator>
2526 void accumulateCacheStats(Accumulator *accum)
2527 {
2528 accum->accumulateCacheStats(CacheType, mCacheStats);
2529 mCacheStats.reset();
2530 }
2531
2532 void getCacheStats(CacheStats *accum) const { accum->accumulate(mCacheStats); }
2533
2534 protected:
2535 HasCacheStats() = default;
2536 virtual ~HasCacheStats() = default;
2537
2538 CacheStats mCacheStats;
2539 };
2540
2541 using VulkanCacheStats = angle::PackedEnumMap<VulkanCacheType, CacheStats>;
2542
2543 // FramebufferVk Cache
2544 class FramebufferCache final : angle::NonCopyable
2545 {
2546 public:
2547 FramebufferCache() = default;
2548 ~FramebufferCache() { ASSERT(mPayload.empty()); }
2549
2550 void destroy(vk::Renderer *renderer);
2551
2552 bool get(ContextVk *contextVk, const vk::FramebufferDesc &desc, vk::Framebuffer &framebuffer);
2553 void insert(ContextVk *contextVk,
2554 const vk::FramebufferDesc &desc,
2555 vk::FramebufferHelper &&framebufferHelper);
2556 void erase(ContextVk *contextVk, const vk::FramebufferDesc &desc);
2557
2558 size_t getSize() const { return mPayload.size(); }
2559 bool empty() const { return mPayload.empty(); }
2560
2561 private:
2562 angle::HashMap<vk::FramebufferDesc, vk::FramebufferHelper> mPayload;
2563 CacheStats mCacheStats;
2564 };
2565
2566 // TODO(jmadill): Add cache trimming/eviction.
2567 class RenderPassCache final : angle::NonCopyable
2568 {
2569 public:
2570 RenderPassCache();
2571 ~RenderPassCache();
2572
2573 void destroy(ContextVk *contextVk);
2574 void clear(ContextVk *contextVk);
2575
2576 ANGLE_INLINE angle::Result getCompatibleRenderPass(ContextVk *contextVk,
2577 const vk::RenderPassDesc &desc,
2578 const vk::RenderPass **renderPassOut)
2579 {
2580 auto outerIt = mPayload.find(desc);
2581 if (outerIt != mPayload.end())
2582 {
2583 InnerCache &innerCache = outerIt->second;
2584 ASSERT(!innerCache.empty());
2585
2586 // Find the first element and return it.
2587 *renderPassOut = &innerCache.begin()->second.getRenderPass();
2588 mCompatibleRenderPassCacheStats.hit();
2589 return angle::Result::Continue;
2590 }
2591
2592 mCompatibleRenderPassCacheStats.missAndIncrementSize();
2593 return addCompatibleRenderPass(contextVk, desc, renderPassOut);
2594 }
2595
2596 angle::Result getRenderPassWithOps(ContextVk *contextVk,
2597 const vk::RenderPassDesc &desc,
2598 const vk::AttachmentOpsArray &attachmentOps,
2599 const vk::RenderPass **renderPassOut);
2600
2601 static void InitializeOpsForCompatibleRenderPass(const vk::RenderPassDesc &desc,
2602 vk::AttachmentOpsArray *opsOut);
2603 static angle::Result MakeRenderPass(vk::ErrorContext *context,
2604 const vk::RenderPassDesc &desc,
2605 const vk::AttachmentOpsArray &ops,
2606 vk::RenderPass *renderPass,
2607 vk::RenderPassPerfCounters *renderPassCounters);
2608
2609 private:
2610 angle::Result getRenderPassWithOpsImpl(ContextVk *contextVk,
2611 const vk::RenderPassDesc &desc,
2612 const vk::AttachmentOpsArray &attachmentOps,
2613 bool updatePerfCounters,
2614 const vk::RenderPass **renderPassOut);
2615
2616 angle::Result addCompatibleRenderPass(ContextVk *contextVk,
2617 const vk::RenderPassDesc &desc,
2618 const vk::RenderPass **renderPassOut);
2619
2620 // Use a two-layer caching scheme. The top level matches the "compatible" RenderPass elements.
2621 // The second layer caches the attachment load/store ops and initial/final layout.
2622 // Switch to `std::unordered_map` to retain pointer stability.
2623 using InnerCache = std::unordered_map<vk::AttachmentOpsArray, vk::RenderPassHelper>;
2624 using OuterCache = std::unordered_map<vk::RenderPassDesc, InnerCache>;
2625
2626 OuterCache mPayload;
2627 CacheStats mCompatibleRenderPassCacheStats;
2628 CacheStats mRenderPassWithOpsCacheStats;
2629 };
2630
2631 enum class PipelineSource
2632 {
2633 // Pipeline created when warming up the program's pipeline cache
2634 WarmUp,
2635 // Monolithic pipeline created at draw time
2636 Draw,
2637 // Pipeline created at draw time by linking partial pipeline libraries
2638 DrawLinked,
2639 // Pipeline created for UtilsVk
2640 Utils,
2641 // Pipeline created at dispatch time
2642 Dispatch
2643 };
2644
2645 struct ComputePipelineDescHash
2646 {
2647 size_t operator()(const rx::vk::ComputePipelineDesc &key) const { return key.hash(); }
2648 };
2649 struct GraphicsPipelineDescCompleteHash
2650 {
2651 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const
2652 {
2653 return key.hash(vk::GraphicsPipelineSubset::Complete);
2654 }
2655 };
2656 struct GraphicsPipelineDescShadersHash
2657 {
2658 size_t operator()(const rx::vk::GraphicsPipelineDesc &key) const
2659 {
2660 return key.hash(vk::GraphicsPipelineSubset::Shaders);
2661 }
2662 };
2663
2664 struct ComputePipelineDescKeyEqual
2665 {
2666 size_t operator()(const rx::vk::ComputePipelineDesc &first,
2667 const rx::vk::ComputePipelineDesc &second) const
2668 {
2669 return first.keyEqual(second);
2670 }
2671 };
2672 struct GraphicsPipelineDescCompleteKeyEqual
2673 {
2674 size_t operator()(const rx::vk::GraphicsPipelineDesc &first,
2675 const rx::vk::GraphicsPipelineDesc &second) const
2676 {
2677 return first.keyEqual(second, vk::GraphicsPipelineSubset::Complete);
2678 }
2679 };
2680 struct GraphicsPipelineDescShadersKeyEqual
2681 {
2682 size_t operator()(const rx::vk::GraphicsPipelineDesc &first,
2683 const rx::vk::GraphicsPipelineDesc &second) const
2684 {
2685 return first.keyEqual(second, vk::GraphicsPipelineSubset::Shaders);
2686 }
2687 };
2688
2689 // Derive the KeyEqual and GraphicsPipelineSubset enum from the Hash struct
2690 template <typename Hash>
2691 struct GraphicsPipelineCacheTypeHelper
2692 {
2693 using KeyEqual = GraphicsPipelineDescCompleteKeyEqual;
2694 static constexpr vk::GraphicsPipelineSubset kSubset = vk::GraphicsPipelineSubset::Complete;
2695 };
2696
2697 template <>
2698 struct GraphicsPipelineCacheTypeHelper<GraphicsPipelineDescShadersHash>
2699 {
2700 using KeyEqual = GraphicsPipelineDescShadersKeyEqual;
2701 static constexpr vk::GraphicsPipelineSubset kSubset = vk::GraphicsPipelineSubset::Shaders;
2702 };
2703
2704 // Compute Pipeline Cache implementation
2705 // TODO(aannestrand): Add cache trimming/eviction.
2706 // http://anglebug.com/391672281
2707 class ComputePipelineCache final : HasCacheStats<rx::VulkanCacheType::ComputePipeline>
2708 {
2709 public:
2710 ComputePipelineCache() = default;
2711 ~ComputePipelineCache() override { ASSERT(mPayload.empty()); }
2712
2713 void destroy(vk::ErrorContext *context);
2714 void release(vk::ErrorContext *context);
2715
2716 angle::Result getOrCreatePipeline(vk::ErrorContext *context,
2717 vk::PipelineCacheAccess *pipelineCache,
2718 const vk::PipelineLayout &pipelineLayout,
2719 vk::ComputePipelineOptions &pipelineOptions,
2720 PipelineSource source,
2721 vk::PipelineHelper **pipelineOut,
2722 const char *shaderName,
2723 VkSpecializationInfo *specializationInfo,
2724 const vk::ShaderModuleMap &shaderModuleMap);
2725
2726 private:
2727 angle::Result createPipeline(vk::ErrorContext *context,
2728 vk::PipelineCacheAccess *pipelineCache,
2729 const vk::PipelineLayout &pipelineLayout,
2730 vk::ComputePipelineOptions &pipelineOptions,
2731 PipelineSource source,
2732 const char *shaderName,
2733 const vk::ShaderModule &shaderModule,
2734 VkSpecializationInfo *specializationInfo,
2735 const vk::ComputePipelineDesc &desc,
2736 vk::PipelineHelper **pipelineOut);
2737
2738 std::unordered_map<vk::ComputePipelineDesc,
2739 vk::PipelineHelper,
2740 ComputePipelineDescHash,
2741 ComputePipelineDescKeyEqual>
2742 mPayload;
2743 };
2744
2745 // TODO(jmadill): Add cache trimming/eviction.
2746 template <typename Hash>
2747 class GraphicsPipelineCache final : public HasCacheStats<VulkanCacheType::GraphicsPipeline>
2748 {
2749 public:
2750 GraphicsPipelineCache() = default;
2751 ~GraphicsPipelineCache() override { ASSERT(mPayload.empty()); }
2752
2753 void destroy(vk::ErrorContext *context);
2754 void release(vk::ErrorContext *context);
2755
2756 void populate(const vk::GraphicsPipelineDesc &desc,
2757 vk::Pipeline &&pipeline,
2758 vk::PipelineHelper **pipelineHelperOut);
2759
2760 // Get a pipeline from the cache, if it exists
2761 ANGLE_INLINE bool getPipeline(const vk::GraphicsPipelineDesc &desc,
2762 const vk::GraphicsPipelineDesc **descPtrOut,
2763 vk::PipelineHelper **pipelineOut)
2764 {
2765 auto item = mPayload.find(desc);
2766 if (item == mPayload.end())
2767 {
2768 return false;
2769 }
2770
2771 *descPtrOut = &item->first;
2772 *pipelineOut = &item->second;
2773
2774 mCacheStats.hit();
2775
2776 return true;
2777 }
2778
2779 angle::Result createPipeline(vk::ErrorContext *context,
2780 vk::PipelineCacheAccess *pipelineCache,
2781 const vk::RenderPass &compatibleRenderPass,
2782 const vk::PipelineLayout &pipelineLayout,
2783 const vk::GraphicsPipelineShadersInfo &shaders,
2784 PipelineSource source,
2785 const vk::GraphicsPipelineDesc &desc,
2786 const vk::GraphicsPipelineDesc **descPtrOut,
2787 vk::PipelineHelper **pipelineOut);
2788
2789 // Helper for VulkanPipelineCachePerf that resets the object without destroying any object.
2790 void reset() { mPayload.clear(); }
2791
2792 private:
2793 void addToCache(PipelineSource source,
2794 const vk::GraphicsPipelineDesc &desc,
2795 vk::Pipeline &&pipeline,
2796 vk::CacheLookUpFeedback feedback,
2797 const vk::GraphicsPipelineDesc **descPtrOut,
2798 vk::PipelineHelper **pipelineOut);
2799
2800 using KeyEqual = typename GraphicsPipelineCacheTypeHelper<Hash>::KeyEqual;
2801 std::unordered_map<vk::GraphicsPipelineDesc, vk::PipelineHelper, Hash, KeyEqual> mPayload;
2802 };
2803
2804 using CompleteGraphicsPipelineCache = GraphicsPipelineCache<GraphicsPipelineDescCompleteHash>;
2805 using ShadersGraphicsPipelineCache = GraphicsPipelineCache<GraphicsPipelineDescShadersHash>;
2806
2807 class DescriptorSetLayoutCache final : angle::NonCopyable
2808 {
2809 public:
2810 DescriptorSetLayoutCache();
2811 ~DescriptorSetLayoutCache();
2812
2813 void destroy(vk::Renderer *renderer);
2814
2815 angle::Result getDescriptorSetLayout(vk::ErrorContext *context,
2816 const vk::DescriptorSetLayoutDesc &desc,
2817 vk::DescriptorSetLayoutPtr *descriptorSetLayoutOut);
2818
2819 // Helpers for white box tests
2820 size_t getCacheHitCount() const { return mCacheStats.getHitCount(); }
2821 size_t getCacheMissCount() const { return mCacheStats.getMissCount(); }
2822
2823 private:
2824 mutable angle::SimpleMutex mMutex;
2825 std::unordered_map<vk::DescriptorSetLayoutDesc, vk::DescriptorSetLayoutPtr> mPayload;
2826 CacheStats mCacheStats;
2827 };
2828
2829 class PipelineLayoutCache final : public HasCacheStats<VulkanCacheType::PipelineLayout>
2830 {
2831 public:
2832 PipelineLayoutCache();
2833 ~PipelineLayoutCache() override;
2834
2835 void destroy(vk::Renderer *renderer);
2836
2837 angle::Result getPipelineLayout(vk::ErrorContext *context,
2838 const vk::PipelineLayoutDesc &desc,
2839 const vk::DescriptorSetLayoutPointerArray &descriptorSetLayouts,
2840 vk::PipelineLayoutPtr *pipelineLayoutOut);
2841
2842 private:
2843 mutable angle::SimpleMutex mMutex;
2844 std::unordered_map<vk::PipelineLayoutDesc, vk::PipelineLayoutPtr> mPayload;
2845 };
2846
2847 class SamplerCache final : public HasCacheStats<VulkanCacheType::Sampler>
2848 {
2849 public:
2850 SamplerCache();
2851 ~SamplerCache() override;
2852
2853 void destroy(vk::Renderer *renderer);
2854
2855 angle::Result getSampler(ContextVk *contextVk,
2856 const vk::SamplerDesc &desc,
2857 vk::SharedSamplerPtr *samplerOut);
2858
2859 private:
2860 std::unordered_map<vk::SamplerDesc, vk::SharedSamplerPtr> mPayload;
2861 };
2862
2863 // YuvConversion Cache
2864 class SamplerYcbcrConversionCache final
2865 : public HasCacheStats<VulkanCacheType::SamplerYcbcrConversion>
2866 {
2867 public:
2868 SamplerYcbcrConversionCache();
2869 ~SamplerYcbcrConversionCache() override;
2870
2871 void destroy(vk::Renderer *renderer);
2872
2873 angle::Result getSamplerYcbcrConversion(vk::ErrorContext *context,
2874 const vk::YcbcrConversionDesc &ycbcrConversionDesc,
2875 VkSamplerYcbcrConversion *vkSamplerYcbcrConversionOut);
2876
2877 private:
2878 using SamplerYcbcrConversionMap =
2879 std::unordered_map<vk::YcbcrConversionDesc, vk::SamplerYcbcrConversion>;
2880 SamplerYcbcrConversionMap mExternalFormatPayload;
2881 SamplerYcbcrConversionMap mVkFormatPayload;
2882 };
2883
2884 // Descriptor Set Cache
2885 template <typename T>
2886 class DescriptorSetCache final : angle::NonCopyable
2887 {
2888 public:
2889 DescriptorSetCache() = default;
2890 ~DescriptorSetCache() { ASSERT(mPayload.empty()); }
2891
2892 DescriptorSetCache(DescriptorSetCache &&other) : DescriptorSetCache()
2893 {
2894 *this = std::move(other);
2895 }
2896
2897 DescriptorSetCache &operator=(DescriptorSetCache &&other)
2898 {
2899 std::swap(mPayload, other.mPayload);
2900 return *this;
2901 }
2902
2903 void clear() { mPayload.clear(); }
2904
2905 bool getDescriptorSet(const vk::DescriptorSetDesc &desc, T *descriptorSetOut) const
2906 {
2907 auto iter = mPayload.find(desc);
2908 if (iter != mPayload.end())
2909 {
2910 *descriptorSetOut = iter->second;
2911 return true;
2912 }
2913 return false;
2914 }
2915
2916 void insertDescriptorSet(const vk::DescriptorSetDesc &desc, const T &descriptorSetHelper)
2917 {
2918 mPayload.emplace(desc, descriptorSetHelper);
2919 }
2920
2921 bool eraseDescriptorSet(const vk::DescriptorSetDesc &desc, T *descriptorSetOut)
2922 {
2923 auto iter = mPayload.find(desc);
2924 if (iter != mPayload.end())
2925 {
2926 *descriptorSetOut = std::move(iter->second);
2927 mPayload.erase(iter);
2928 return true;
2929 }
2930 return false;
2931 }
2932
2933 bool eraseDescriptorSet(const vk::DescriptorSetDesc &desc)
2934 {
2935 auto iter = mPayload.find(desc);
2936 if (iter != mPayload.end())
2937 {
2938 mPayload.erase(iter);
2939 return true;
2940 }
2941 return false;
2942 }
2943
2944 size_t getTotalCacheSize() const { return mPayload.size(); }
2945
2946 size_t getTotalCacheKeySizeBytes() const
2947 {
2948 size_t totalSize = 0;
2949 for (const auto &iter : mPayload)
2950 {
2951 const vk::DescriptorSetDesc &desc = iter.first;
2952 totalSize += desc.getKeySizeBytes();
2953 }
2954 return totalSize;
2955 }
2956 bool empty() const { return mPayload.empty(); }
2957
2958 private:
2959 angle::HashMap<vk::DescriptorSetDesc, T> mPayload;
2960 };
2961
2962 // There is 1 default uniform binding used per stage.
2963 constexpr uint32_t kReservedPerStageDefaultUniformBindingCount = 1;
2964
2965 class UpdateDescriptorSetsBuilder final : angle::NonCopyable
2966 {
2967 public:
2968 UpdateDescriptorSetsBuilder();
2969 ~UpdateDescriptorSetsBuilder();
2970
2971 VkDescriptorBufferInfo *allocDescriptorBufferInfos(uint32_t count)
2972 {
2973 return mDescriptorBufferInfos.allocate(count);
2974 }
2975 VkDescriptorImageInfo *allocDescriptorImageInfos(uint32_t count)
2976 {
2977 return mDescriptorImageInfos.allocate(count);
2978 }
2979 VkWriteDescriptorSet *allocWriteDescriptorSets(uint32_t count)
2980 {
2981 return mWriteDescriptorSets.allocate(count);
2982 }
2983 VkBufferView *allocBufferViews(uint32_t count) { return mBufferViews.allocate(count); }
2984
2985 VkDescriptorBufferInfo &allocDescriptorBufferInfo() { return *allocDescriptorBufferInfos(1); }
2986 VkDescriptorImageInfo &allocDescriptorImageInfo() { return *allocDescriptorImageInfos(1); }
2987 VkWriteDescriptorSet &allocWriteDescriptorSet() { return *allocWriteDescriptorSets(1); }
2988 VkBufferView &allocBufferView() { return *allocBufferViews(1); }
2989
2990 // Returns the number of written descriptor sets.
2991 uint32_t flushDescriptorSetUpdates(VkDevice device);
2992
2993 private:
2994 // Manage the storage for VkDescriptorBufferInfo and VkDescriptorImageInfo. The storage is not
2995 // required to be continuous, but the requested allocation from allocate() call must be
2996 // continuous. The actual storage will grow as needed.
2997 template <typename T>
2998 class DescriptorInfoAllocator : angle::NonCopyable
2999 {
3000 public:
3001 void init(uint32_t initialVectorCapacity)
3002 {
3003 mVectorCapacity = initialVectorCapacity;
3004 mDescriptorInfos.emplace_back();
3005 mDescriptorInfos.back().reserve(mVectorCapacity);
3006 mCurrentVector = mDescriptorInfos.begin();
3007 mTotalSize = 0;
3008 }
3009 void clear()
3010 {
3011 mDescriptorInfos.resize(1);
3012 mDescriptorInfos.front().clear();
3013 // Grow the first vector's capacity big enough to hold all of them
3014 mVectorCapacity = std::max(mTotalSize, mVectorCapacity);
3015 mDescriptorInfos.front().reserve(mVectorCapacity);
3016 mCurrentVector = mDescriptorInfos.begin();
3017 mTotalSize = 0;
3018 }
3019 T *allocate(uint32_t count);
3020
3021 bool empty() const { return mTotalSize == 0; }
3022
3023 protected:
3024 uint32_t mVectorCapacity = 16;
3025 std::deque<std::vector<T>> mDescriptorInfos;
3026 typename std::deque<std::vector<T>>::iterator mCurrentVector;
3027 uint32_t mTotalSize;
3028 };
3029
3030 class WriteDescriptorSetAllocator final : public DescriptorInfoAllocator<VkWriteDescriptorSet>
3031 {
3032 public:
3033 uint32_t updateDescriptorSets(VkDevice device) const;
3034 };
3035
3036 DescriptorInfoAllocator<VkDescriptorBufferInfo> mDescriptorBufferInfos;
3037 DescriptorInfoAllocator<VkDescriptorImageInfo> mDescriptorImageInfos;
3038 DescriptorInfoAllocator<VkBufferView> mBufferViews;
3039 WriteDescriptorSetAllocator mWriteDescriptorSets;
3040 };
3041
3042 } // namespace rx
3043
3044 #endif // LIBANGLE_RENDERER_VULKAN_VK_CACHE_UTILS_H_
3045